Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ venv
# Claude Code
CLAUDE.md
.claude/
# Python bytecode
__pycache__/
*.pyc
# Internal planning notes
docs/plans/
10 changes: 10 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[pytest]
testpaths = tests
python_files = test_*.py
python_classes = Test*
python_functions = test_*
addopts = -v --tb=short --strict-markers
filterwarnings =
error
# The repo's scripts predate py3.10; tolerate missing annotations.
ignore::DeprecationWarning
6 changes: 6 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Test-only dependencies. Install with:
# pip install -r requirements-dev.txt
#
# Kept separate from requirements.txt so the runtime image for mkdocs/feedgen
# does not pull in pytest.
pytest>=7.0
101 changes: 80 additions & 21 deletions scripts/Generate_CheatSheets_TOC.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,88 @@
same location that the script in order to be moved later by the caller script.
"""
import os
import sys
from typing import Iterable, List

# Define templates
cs_md_link_template = "* [%s](cheatsheets/%s)"

# Scan all CS files
cheatsheets = [f.name for f in os.scandir("../cheatsheets") if f.is_file()]
cheatsheets.sort()

# Generate the summary file
with open("TOC.md", "w") as index_file:
index_file.write("# Summary\n\n")
index_file.write("### Cheatsheets\n\n")
index_file.write(cs_md_link_template % ("Index Alphabetical", "Index.md"))
index_file.write("\n")
index_file.write(cs_md_link_template % ("Index ASVS", "IndexASVS.md"))
index_file.write("\n")
index_file.write(cs_md_link_template % ("Index ASVS", "IndexMASVS.md"))
index_file.write("\n")
index_file.write(cs_md_link_template % ("Index Proactive Controls", "IndexProactiveControls.md"))
index_file.write("\n")
for cheatsheet in cheatsheets:
if cheatsheet != "Index.md" and cheatsheet != "IndexASVS.md" and cheatsheet != "IndexMASVS.md" and cheatsheet != "IndexProactiveControls.md" and cheatsheet != "TOC.md":
cs_name = cheatsheet.replace("_"," ").replace(".md", "").replace("Cheat Sheet", "")
index_file.write(cs_md_link_template % (cs_name, cheatsheet))
# Files that are not actual cheat sheets and must be excluded from the TOC
# even if they happen to live in the cheatsheets/ directory.
_EXCLUDED_FROM_TOC = frozenset({
"Index.md",
"IndexASVS.md",
"IndexMASVS.md",
"IndexProactiveControls.md",
"TOC.md",
})


def to_display_name(filename: str) -> str:
"""Convert a cheatsheet filename to its human-readable display name.

Underscores become spaces, the .md suffix is dropped, and the
"Cheat Sheet" suffix (if present) is stripped. The result is
whitespace-stripped so trailing/leading spaces do not leak into
the rendered link text.

Examples:
>>> to_display_name("Authentication_Cheat_Sheet.md")
'Authentication'
>>> to_display_name("XSS_Prevention_Cheat_Sheet.md")
'XSS Prevention'
"""
return (filename
.replace("_", " ")
.replace(".md", "")
.replace("Cheat Sheet", "")
.strip())


def should_skip(filename: str) -> bool:
"""Return True for files that should not appear in the generated TOC."""
return filename in _EXCLUDED_FROM_TOC


def build_toc_lines(cheatsheets: Iterable[str]) -> List[str]:
"""Return the list of fixed pre-defined index links for the TOC.

These four links are always emitted in this order, regardless of the
contents of the cheatsheets/ directory.
"""
return [
cs_md_link_template % ("Index Alphabetical", "Index.md"),
cs_md_link_template % ("Index ASVS", "IndexASVS.md"),
cs_md_link_template % ("Index ASVS", "IndexMASVS.md"),
cs_md_link_template % ("Index Proactive Controls", "IndexProactiveControls.md"),
]


def main(cheatsheets_dir: str = "../cheatsheets", output_file: str = "TOC.md") -> int:
"""Generate the summary markdown page.

Scans ``cheatsheets_dir`` for files, sorts them alphabetically, and
writes a SUMMARY-style markdown file at ``output_file``. Returns 0 on
success.
"""
cheatsheets = sorted(
f.name for f in os.scandir(cheatsheets_dir) if f.is_file()
)
with open(output_file, "w") as index_file:
index_file.write("# Summary\n\n")
index_file.write("### Cheatsheets\n\n")
for link in build_toc_lines(cheatsheets):
index_file.write(link)
index_file.write("\n")
print("Summary markdown page generated.")
for cheatsheet in cheatsheets:
if not should_skip(cheatsheet):
index_file.write(
cs_md_link_template % (to_display_name(cheatsheet), cheatsheet)
)
index_file.write("\n")
print("Summary markdown page generated.")
return 0


if __name__ == "__main__":
sys.exit(main())
111 changes: 85 additions & 26 deletions scripts/Generate_Technologies_JSON.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,95 @@

Dependencies: pip install requests
"""
import sys
import requests
import json
import sys
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple

import requests

# Define templates
CS_BASE_URL = "https://cheatsheetseries.owasp.org/cheatsheets/%s.html"
INDEX_URL = (
"https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/Index.md"
)


def parse_index_line(line: str) -> Optional[Tuple[str, List[str]]]:
"""Parse a single line from ``Index.md``.

Index lines that reference technology icons have the shape::

[Cheatsheet Name](cheatsheets/Filename.md) ![Tech](assets/Index_Tech.svg) ...

This function returns a ``(cheatsheet_name, [technology_names])`` tuple
for any such line, or ``None`` for lines that do not reference
technology icons.

Returns:
A tuple of the cheatsheet display name and the list of
uppercased technology names, or ``None`` if the line has no
technology icon references.
"""
if "(assets/Index_" not in line:
return None
work = line.strip()
cs_name = work[1:work.index("]")]
technologies = work.split("!")[1:]
tech_names = [tech[1:tech.index("]")].upper() for tech in technologies]
return cs_name, tech_names

# Grab the index MD source from the GitHub repository
response = requests.get(
"https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/Index.md")
if response.status_code != 200:
print("Cannot load the INDEX content: HTTP %s received!" %
response.status_code)
sys.exit(1)
else:
data = OrderedDict({})
for line in response.text.split("\n"):
if "(assets/Index_" in line:
work = line.strip()
# Extract the name of the CS
cs_name = work[1:work.index("]")]
# Extract technologies and map the CS to them
technologies = work.split("!")[1:]
for technology in technologies:
technology_name = technology[1:technology.index("]")].upper()
if technology_name not in data:
data[technology_name] = []
data[technology_name].append(
{"CS_NAME": cs_name, "CS_URL": CS_BASE_URL % cs_name.replace(" ", "_")})
# Display the built structure and formatted JSON

def build_technologies_dict(
index_text: str,
) -> "OrderedDict[str, List[Dict[str, str]]]":
"""Build the technology -> [cheatsheet] mapping from ``Index.md`` text.

The returned dict preserves the order in which technologies first
appear in the index, matching the legacy behavior of the script.
"""
data: "OrderedDict[str, List[Dict[str, str]]]" = OrderedDict()
for line in index_text.split("\n"):
parsed = parse_index_line(line)
if parsed is None:
continue
cs_name, tech_names = parsed
for tech in tech_names:
data.setdefault(tech, []).append(
{
"CS_NAME": cs_name,
"CS_URL": CS_BASE_URL % cs_name.replace(" ", "_"),
}
)
return data


def fetch_index_text(url: str = INDEX_URL) -> Tuple[int, str]:
"""Fetch the ``Index.md`` content from the given URL.

Returns:
A ``(status_code, body)`` tuple. Callers are expected to check
the status code and emit a user-facing error if it is not 200.
"""
response = requests.get(url)
return response.status_code, response.text


def main() -> int:
"""Fetch the index and print the technologies JSON to stdout.

Returns 0 on success and 1 if the upstream index cannot be fetched.
"""
status, text = fetch_index_text()
if status != 200:
print(
"Cannot load the INDEX content: HTTP %s received!" % status
)
return 1
data = build_technologies_dict(text)
print(json.dumps(data, sort_keys=True, indent=1))
sys.exit(0)
return 0


if __name__ == "__main__":
sys.exit(main())
Loading