diff --git a/.gitignore b/.gitignore index 931a744d8a..0780e104c9 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,8 @@ venv # Claude Code CLAUDE.md .claude/ +# Python bytecode +__pycache__/ +*.pyc +# Internal planning notes +docs/plans/ diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000..869558c790 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,10 @@ +[pytest] +testpaths = tests +python_files = test_*.py +python_classes = Test* +python_functions = test_* +addopts = -v --tb=short --strict-markers +filterwarnings = + error + # The repo's scripts predate py3.10; tolerate missing annotations. + ignore::DeprecationWarning diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000000..f2873c2c16 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,6 @@ +# Test-only dependencies. Install with: +# pip install -r requirements-dev.txt +# +# Kept separate from requirements.txt so the runtime image for mkdocs/feedgen +# does not pull in pytest. +pytest>=7.0 diff --git a/scripts/Generate_CheatSheets_TOC.py b/scripts/Generate_CheatSheets_TOC.py index ed3f5633e4..c7714c8b34 100644 --- a/scripts/Generate_CheatSheets_TOC.py +++ b/scripts/Generate_CheatSheets_TOC.py @@ -8,29 +8,88 @@ same location that the script in order to be moved later by the caller script. """ import os +import sys +from typing import Iterable, List # Define templates cs_md_link_template = "* [%s](cheatsheets/%s)" -# Scan all CS files -cheatsheets = [f.name for f in os.scandir("../cheatsheets") if f.is_file()] -cheatsheets.sort() - -# Generate the summary file -with open("TOC.md", "w") as index_file: - index_file.write("# Summary\n\n") - index_file.write("### Cheatsheets\n\n") - index_file.write(cs_md_link_template % ("Index Alphabetical", "Index.md")) - index_file.write("\n") - index_file.write(cs_md_link_template % ("Index ASVS", "IndexASVS.md")) - index_file.write("\n") - index_file.write(cs_md_link_template % ("Index ASVS", "IndexMASVS.md")) - index_file.write("\n") - index_file.write(cs_md_link_template % ("Index Proactive Controls", "IndexProactiveControls.md")) - index_file.write("\n") - for cheatsheet in cheatsheets: - if cheatsheet != "Index.md" and cheatsheet != "IndexASVS.md" and cheatsheet != "IndexMASVS.md" and cheatsheet != "IndexProactiveControls.md" and cheatsheet != "TOC.md": - cs_name = cheatsheet.replace("_"," ").replace(".md", "").replace("Cheat Sheet", "") - index_file.write(cs_md_link_template % (cs_name, cheatsheet)) +# Files that are not actual cheat sheets and must be excluded from the TOC +# even if they happen to live in the cheatsheets/ directory. +_EXCLUDED_FROM_TOC = frozenset({ + "Index.md", + "IndexASVS.md", + "IndexMASVS.md", + "IndexProactiveControls.md", + "TOC.md", +}) + + +def to_display_name(filename: str) -> str: + """Convert a cheatsheet filename to its human-readable display name. + + Underscores become spaces, the .md suffix is dropped, and the + "Cheat Sheet" suffix (if present) is stripped. The result is + whitespace-stripped so trailing/leading spaces do not leak into + the rendered link text. + + Examples: + >>> to_display_name("Authentication_Cheat_Sheet.md") + 'Authentication' + >>> to_display_name("XSS_Prevention_Cheat_Sheet.md") + 'XSS Prevention' + """ + return (filename + .replace("_", " ") + .replace(".md", "") + .replace("Cheat Sheet", "") + .strip()) + + +def should_skip(filename: str) -> bool: + """Return True for files that should not appear in the generated TOC.""" + return filename in _EXCLUDED_FROM_TOC + + +def build_toc_lines(cheatsheets: Iterable[str]) -> List[str]: + """Return the list of fixed pre-defined index links for the TOC. + + These four links are always emitted in this order, regardless of the + contents of the cheatsheets/ directory. + """ + return [ + cs_md_link_template % ("Index Alphabetical", "Index.md"), + cs_md_link_template % ("Index ASVS", "IndexASVS.md"), + cs_md_link_template % ("Index ASVS", "IndexMASVS.md"), + cs_md_link_template % ("Index Proactive Controls", "IndexProactiveControls.md"), + ] + + +def main(cheatsheets_dir: str = "../cheatsheets", output_file: str = "TOC.md") -> int: + """Generate the summary markdown page. + + Scans ``cheatsheets_dir`` for files, sorts them alphabetically, and + writes a SUMMARY-style markdown file at ``output_file``. Returns 0 on + success. + """ + cheatsheets = sorted( + f.name for f in os.scandir(cheatsheets_dir) if f.is_file() + ) + with open(output_file, "w") as index_file: + index_file.write("# Summary\n\n") + index_file.write("### Cheatsheets\n\n") + for link in build_toc_lines(cheatsheets): + index_file.write(link) index_file.write("\n") -print("Summary markdown page generated.") \ No newline at end of file + for cheatsheet in cheatsheets: + if not should_skip(cheatsheet): + index_file.write( + cs_md_link_template % (to_display_name(cheatsheet), cheatsheet) + ) + index_file.write("\n") + print("Summary markdown page generated.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/Generate_Technologies_JSON.py b/scripts/Generate_Technologies_JSON.py index 561b16eb0b..41a138888a 100644 --- a/scripts/Generate_Technologies_JSON.py +++ b/scripts/Generate_Technologies_JSON.py @@ -10,36 +10,95 @@ Dependencies: pip install requests """ -import sys -import requests import json +import sys from collections import OrderedDict +from typing import Dict, List, Optional, Tuple + +import requests # Define templates CS_BASE_URL = "https://cheatsheetseries.owasp.org/cheatsheets/%s.html" +INDEX_URL = ( + "https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/Index.md" +) + + +def parse_index_line(line: str) -> Optional[Tuple[str, List[str]]]: + """Parse a single line from ``Index.md``. + + Index lines that reference technology icons have the shape:: + + [Cheatsheet Name](cheatsheets/Filename.md) ![Tech](assets/Index_Tech.svg) ... + + This function returns a ``(cheatsheet_name, [technology_names])`` tuple + for any such line, or ``None`` for lines that do not reference + technology icons. + + Returns: + A tuple of the cheatsheet display name and the list of + uppercased technology names, or ``None`` if the line has no + technology icon references. + """ + if "(assets/Index_" not in line: + return None + work = line.strip() + cs_name = work[1:work.index("]")] + technologies = work.split("!")[1:] + tech_names = [tech[1:tech.index("]")].upper() for tech in technologies] + return cs_name, tech_names -# Grab the index MD source from the GitHub repository -response = requests.get( - "https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/Index.md") -if response.status_code != 200: - print("Cannot load the INDEX content: HTTP %s received!" % - response.status_code) - sys.exit(1) -else: - data = OrderedDict({}) - for line in response.text.split("\n"): - if "(assets/Index_" in line: - work = line.strip() - # Extract the name of the CS - cs_name = work[1:work.index("]")] - # Extract technologies and map the CS to them - technologies = work.split("!")[1:] - for technology in technologies: - technology_name = technology[1:technology.index("]")].upper() - if technology_name not in data: - data[technology_name] = [] - data[technology_name].append( - {"CS_NAME": cs_name, "CS_URL": CS_BASE_URL % cs_name.replace(" ", "_")}) - # Display the built structure and formatted JSON + +def build_technologies_dict( + index_text: str, +) -> "OrderedDict[str, List[Dict[str, str]]]": + """Build the technology -> [cheatsheet] mapping from ``Index.md`` text. + + The returned dict preserves the order in which technologies first + appear in the index, matching the legacy behavior of the script. + """ + data: "OrderedDict[str, List[Dict[str, str]]]" = OrderedDict() + for line in index_text.split("\n"): + parsed = parse_index_line(line) + if parsed is None: + continue + cs_name, tech_names = parsed + for tech in tech_names: + data.setdefault(tech, []).append( + { + "CS_NAME": cs_name, + "CS_URL": CS_BASE_URL % cs_name.replace(" ", "_"), + } + ) + return data + + +def fetch_index_text(url: str = INDEX_URL) -> Tuple[int, str]: + """Fetch the ``Index.md`` content from the given URL. + + Returns: + A ``(status_code, body)`` tuple. Callers are expected to check + the status code and emit a user-facing error if it is not 200. + """ + response = requests.get(url) + return response.status_code, response.text + + +def main() -> int: + """Fetch the index and print the technologies JSON to stdout. + + Returns 0 on success and 1 if the upstream index cannot be fetched. + """ + status, text = fetch_index_text() + if status != 200: + print( + "Cannot load the INDEX content: HTTP %s received!" % status + ) + return 1 + data = build_technologies_dict(text) print(json.dumps(data, sort_keys=True, indent=1)) - sys.exit(0) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/Update_CheatSheets_Index.py b/scripts/Update_CheatSheets_Index.py index 5b837528a0..b8bac3b201 100644 --- a/scripts/Update_CheatSheets_Index.py +++ b/scripts/Update_CheatSheets_Index.py @@ -8,21 +8,69 @@ and is named "Index.md". """ import os +import sys from collections import OrderedDict +from typing import Dict, Iterable, List # Define utility functions -def extract_languages_snippet_provided(cheatsheet): - languages = [] - markers = ["javascript", "java", "csharp", "c", "cpp", "html", "xml", "python", - "ruby", "php", "json", "sql", "bash", "shell", "coldfusion", "perl", - "vbnet"] - with open("../cheatsheets/" + cheatsheet, encoding="utf8") as cs_file: - cs_content = cs_file.read().lower().replace(" ","") - for marker in markers: +_LANGUAGE_MARKERS = [ + "javascript", "java", "csharp", "c", "cpp", "html", "xml", "python", + "ruby", "php", "json", "sql", "bash", "shell", "coldfusion", "perl", + "vbnet", +] + + +def extract_languages_snippet_provided( + cheatsheet: str, + cheatsheets_dir: str = "../cheatsheets", +) -> List[str]: + """Detect the languages of code snippets in the given cheatsheet. + + Looks for fenced code blocks (```` ```language ````) whose language + tag is in the recognized list. The file is read in lowercase and with + spaces stripped so detection is case- and spacing-insensitive. + + Args: + cheatsheet: Filename of the cheatsheet within ``cheatsheets_dir``. + cheatsheets_dir: Directory containing the cheatsheet file. + + Returns: + A list of recognized language names with their first letter + capitalized, in the order they were detected. + """ + languages: List[str] = [] + with open( + os.path.join(cheatsheets_dir, cheatsheet), encoding="utf8" + ) as cs_file: + cs_content = cs_file.read().lower().replace(" ", "") + for marker in _LANGUAGE_MARKERS: if "```" + marker + "\n" in cs_content: languages.append(marker.capitalize()) return languages + +def group_by_letter(cheatsheets: Iterable[str]) -> "OrderedDict[str, List[str]]": + """Group cheatsheet filenames by their first letter (uppercased). + + Filenames are grouped by the uppercase form of their first character. + The result is an :class:`OrderedDict` sorted by letter, preserving + the input order of filenames within each letter group. + """ + index: Dict[str, List[str]] = {} + for cheatsheet in cheatsheets: + letter = cheatsheet[0].upper() + index.setdefault(letter, []).append(cheatsheet) + return OrderedDict(sorted(index.items())) + + +def clean_trailing_whitespace(file_path: str) -> None: + """Strip trailing whitespace from each line in the file (in place).""" + with open(file_path, "r", encoding="utf-8") as file: + cleaned_lines = [line.rstrip() + "\n" for line in file] + with open(file_path, "w", encoding="utf-8") as file: + file.writelines(cleaned_lines) + + # Define templates cs_md_link_template = "[%s](cheatsheets/%s)" language_md_link_template = "![%s](assets/Index_%s.svg)" @@ -31,59 +79,63 @@ def extract_languages_snippet_provided(cheatsheet): cs_count_template = "**%s** cheat sheets available." cs_index_title_template = "# Index Alphabetical\n\n" -# Scan all CS files -index = {} -cs_count = 0 -cheatsheets = [f.name for f in os.scandir("../cheatsheets") if f.is_file()] -for cheatsheet in cheatsheets: - letter = cheatsheet[0].upper() - if letter not in index: - index[letter] = [cheatsheet] - else: - index[letter].append(cheatsheet) - cs_count += 1 -index = OrderedDict(sorted(index.items())) - -# Generate the index file -with open("../Index.md", "w", encoding="utf-8") as index_file: - index_file.write(cs_index_title_template) - index_count = len(index) - index_file.write(cs_count_template % cs_count) - index_file.write("\n\n*Icons beside the cheat sheet name indicate in which language(s) code snippet(s) are provided.*") - index_file.write("\n\n") - # Generate the top menu - for letter in index: - index_file.write(top_menu_template % (letter, letter.lower())) - index_file.write(" ") - index_file.write("\n\n") - # Generate letter sections - j = 0 - for letter in index: - cs_count = len(index[letter]) - index_file.write(header_template % letter) - i = 0 - for cs_file in index[letter]: - cs_name = cs_file.replace("_", " ").replace(".md", "").strip() - index_file.write(cs_md_link_template % (cs_name, cs_file)) - languages = extract_languages_snippet_provided(cs_file) - if len(languages) > 0: - index_file.write(" ") - for language in languages: - index_file.write(language_md_link_template % (language, language)) + +def main( + cheatsheets_dir: str = "../cheatsheets", + output_file: str = "../Index.md", +) -> int: + """Regenerate the alphabetical index from the cheatsheets directory. + + Scans ``cheatsheets_dir`` for files, groups them by first letter, + detects code-snippet languages, and writes the index to + ``output_file``. Returns 0 on success. + """ + cheatsheets = [f.name for f in os.scandir(cheatsheets_dir) if f.is_file()] + index = group_by_letter(cheatsheets) + cs_count = len(cheatsheets) + + with open(output_file, "w", encoding="utf-8") as index_file: + index_file.write(cs_index_title_template) + index_file.write(cs_count_template % cs_count) + index_file.write( + "\n\n*Icons beside the cheat sheet name indicate in which " + "language(s) code snippet(s) are provided.*" + ) + index_file.write("\n\n") + # Generate the top menu + for letter in index: + index_file.write(top_menu_template % (letter, letter.lower())) + index_file.write(" ") + index_file.write("\n\n") + # Generate letter sections + index_count = len(index) + for j, letter in enumerate(index): + group = index[letter] + group_count = len(group) + index_file.write(header_template % letter) + for i, cs_file in enumerate(group): + cs_name = cs_file.replace("_", " ").replace(".md", "").strip() + index_file.write(cs_md_link_template % (cs_name, cs_file)) + languages = extract_languages_snippet_provided( + cs_file, cheatsheets_dir=cheatsheets_dir + ) + if languages: index_file.write(" ") - i += 1 - index_file.write("\n") - if i != cs_count: + for language in languages: + index_file.write( + language_md_link_template % (language, language) + ) + index_file.write(" ") + index_file.write("\n") + if i + 1 != group_count: + index_file.write("\n") + if j + 1 != index_count: index_file.write("\n") - j += 1 - if j != index_count: - index_file.write("\n") -# Clean trailing whitespaces -with open("../Index.md", "r", encoding="utf-8") as file: - cleaned_lines = [line.rstrip() + "\n" for line in file] + clean_trailing_whitespace(output_file) + print("Index updated.") + return 0 -with open("../Index.md", "w", encoding="utf-8") as file: - file.writelines(cleaned_lines) -print("Index updated.") +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..4e9f855aaf --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,32 @@ +"""Shared pytest fixtures for the scripts/ test suite.""" +from __future__ import annotations + +from pathlib import Path + +import pytest + + +@pytest.fixture +def cheatsheets_dir(tmp_path: Path) -> Path: + """Return a fresh, empty cheatsheets/ directory inside a temp path. + + Tests that need sample cheatsheet files should write them into this + directory before invoking the script under test. + """ + d = tmp_path / "cheatsheets" + d.mkdir() + return d + + +@pytest.fixture +def write_cheatsheet(cheatsheets_dir: Path): + """Return a callable that writes a cheatsheet file with optional content. + + Usage: + write_cheatsheet("Foo.md", "# Foo\\n\\n```python\\nprint('x')\\n```") + """ + def _write(name: str, content: str = "") -> Path: + path = cheatsheets_dir / name + path.write_text(content, encoding="utf-8") + return path + return _write diff --git a/tests/test_generate_cheatsheets_toc.py b/tests/test_generate_cheatsheets_toc.py new file mode 100644 index 0000000000..17cbb25a62 --- /dev/null +++ b/tests/test_generate_cheatsheets_toc.py @@ -0,0 +1,167 @@ +"""Tests for scripts/Generate_CheatSheets_TOC.py.""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) + +import Generate_CheatSheets_TOC as toc # noqa: E402 + + +class TestToDisplayName: + @pytest.mark.parametrize( + ("filename", "expected"), + [ + ("Authentication_Cheat_Sheet.md", "Authentication"), + ("XSS_Prevention_Cheat_Sheet.md", "XSS Prevention"), + ("Docker_Security.md", "Docker Security"), + ("REST_Security_Cheat_Sheet.md", "REST Security"), + ("OAuth2_Cheat_Sheet.md", "OAuth2"), + ("C-Based_Cheat_Sheet.md", "C-Based"), + # No "Cheat Sheet" suffix + ("GraphQL.md", "GraphQL"), + # No underscores, no suffix + ("README.md", "README"), + ], + ) + def test_converts_filename_to_human_readable(self, filename, expected): + assert toc.to_display_name(filename) == expected + + def test_strips_trailing_whitespace_in_display_name(self): + # A filename that ends with " Cheat Sheet" should not produce + # a trailing space in the rendered link. + assert toc.to_display_name("Foo_Cheat_Sheet.md") == "Foo" + + def test_empty_string_returns_empty(self): + assert toc.to_display_name("") == "" + + +class TestShouldSkip: + @pytest.mark.parametrize( + "filename", + [ + "Index.md", + "IndexASVS.md", + "IndexMASVS.md", + "IndexProactiveControls.md", + "TOC.md", + ], + ) + def test_index_and_toc_files_are_skipped(self, filename): + assert toc.should_skip(filename) is True + + @pytest.mark.parametrize( + "filename", + [ + "Authentication_Cheat_Sheet.md", + "XSS_Prevention_Cheat_Sheet.md", + "Docker_Security.md", + ], + ) + def test_real_cheatsheets_are_not_skipped(self, filename): + assert toc.should_skip(filename) is False + + +class TestBuildTocLines: + def test_returns_four_predefined_index_links(self): + lines = toc.build_toc_lines([]) + assert len(lines) == 4 + + def test_predefined_links_appear_in_known_order(self): + lines = toc.build_toc_lines([]) + assert "Index.md" in lines[0] + assert "IndexASVS.md" in lines[1] + assert "IndexMASVS.md" in lines[2] + assert "IndexProactiveControls.md" in lines[3] + + +class TestMain: + def test_creates_toc_file_with_summary_header( + self, cheatsheets_dir: Path, tmp_path: Path + ): + output = tmp_path / "TOC.md" + # Script's default relative path is "../cheatsheets" and writes + # to "TOC.md"; here we call main() with absolute paths so the + # test does not depend on the caller's cwd. + rc = toc.main( + cheatsheets_dir=str(cheatsheets_dir), + output_file=str(output), + ) + assert rc == 0 + content = output.read_text(encoding="utf-8") + assert content.startswith("# Summary\n\n") + assert "### Cheatsheets" in content + + def test_lists_index_files_only_via_predefined_links( + self, cheatsheets_dir: Path, tmp_path: Path, write_cheatsheet + ): + # Even when the index files live in the cheatsheets/ directory, + # the script must not list them a second time as cheatsheets — + # they appear exactly once, via the pre-defined hardcoded links. + write_cheatsheet("Index.md", "# Index") + write_cheatsheet("IndexASVS.md", "# ASVS Index") + write_cheatsheet("IndexMASVS.md", "# MASVS Index") + write_cheatsheet("IndexProactiveControls.md", "# PC Index") + write_cheatsheet("Authentication_Cheat_Sheet.md", "# Auth") + + output = tmp_path / "TOC.md" + toc.main(cheatsheets_dir=str(cheatsheets_dir), output_file=str(output)) + content = output.read_text(encoding="utf-8") + + # The pre-defined index links are present + assert "Index Alphabetical" in content + assert "IndexASVS.md" in content + # The real cheatsheet is listed under its display name + assert "[Authentication](cheatsheets/Authentication_Cheat_Sheet.md)" in content + # Each index file appears exactly once (the pre-defined link), + # not also as a cheatsheet listing. + assert content.count("cheatsheets/Index.md)") == 1 + assert content.count("cheatsheets/IndexASVS.md)") == 1 + assert content.count("cheatsheets/IndexMASVS.md)") == 1 + assert content.count("cheatsheets/IndexProactiveControls.md)") == 1 + # The Authentication cheatsheet is not listed under its raw + # filename — it uses the human-readable display name. + assert "[Authentication_Cheat_Sheet]" not in content + + def test_sorts_cheatsheets_alphabetically( + self, cheatsheets_dir: Path, tmp_path: Path, write_cheatsheet + ): + write_cheatsheet("Z_Cheat_Sheet.md") + write_cheatsheet("A_Cheat_Sheet.md") + write_cheatsheet("M_Cheat_Sheet.md") + + output = tmp_path / "TOC.md" + toc.main(cheatsheets_dir=str(cheatsheets_dir), output_file=str(output)) + content = output.read_text(encoding="utf-8") + + a_pos = content.index("[A]") + m_pos = content.index("[M]") + z_pos = content.index("[Z]") + assert a_pos < m_pos < z_pos + + def test_uses_display_name_with_spaces( + self, cheatsheets_dir: Path, tmp_path: Path, write_cheatsheet + ): + write_cheatsheet("Clickjacking_Defense_Cheat_Sheet.md") + + output = tmp_path / "TOC.md" + toc.main(cheatsheets_dir=str(cheatsheets_dir), output_file=str(output)) + content = output.read_text(encoding="utf-8") + + assert ( + "[Clickjacking Defense](cheatsheets/Clickjacking_Defense_Cheat_Sheet.md)" + in content + ) + + def test_empty_cheatsheets_dir_still_writes_predefined_links( + self, cheatsheets_dir: Path, tmp_path: Path + ): + output = tmp_path / "TOC.md" + toc.main(cheatsheets_dir=str(cheatsheets_dir), output_file=str(output)) + content = output.read_text(encoding="utf-8") + assert "Index Alphabetical" in content + assert "Index ASVS" in content + assert "Index Proactive Controls" in content diff --git a/tests/test_generate_technologies_json.py b/tests/test_generate_technologies_json.py new file mode 100644 index 0000000000..fd2350e06d --- /dev/null +++ b/tests/test_generate_technologies_json.py @@ -0,0 +1,159 @@ +"""Tests for scripts/Generate_Technologies_JSON.py.""" +from __future__ import annotations + +import io +import json +import sys +from pathlib import Path +from unittest import mock + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) + +import Generate_Technologies_JSON as tech # noqa: E402 + + +SAMPLE_INDEX = """\ +# Index Alphabetical + +**2** cheat sheets available. + +[A](Index.md#a) [B](Index.md#b) + +## A + +[Authentication](cheatsheets/Authentication_Cheat_Sheet.md) ![Javascript](assets/Index_Javascript.svg) ![Java](assets/Index_Java.svg) + +## B + +[Business Logic](cheatsheets/Business_Logic_Cheat_Sheet.md) ![Java](assets/Index_Java.svg) +""" + + +class TestParseIndexLine: + def test_returns_none_for_line_without_technology_icon(self): + assert tech.parse_index_line("[Foo](cheatsheets/Foo.md)") is None + + def test_returns_none_for_blank_line(self): + assert tech.parse_index_line("") is None + assert tech.parse_index_line(" ") is None + + def test_parses_single_technology(self): + cs_name, techs = tech.parse_index_line( + "[XSS](cheatsheets/XSS.md) ![Javascript](assets/Index_Javascript.svg)" + ) + assert cs_name == "XSS" + assert techs == ["JAVASCRIPT"] + + def test_parses_multiple_technologies_in_order(self): + cs_name, techs = tech.parse_index_line( + "[Auth](cheatsheets/Auth.md) " + "![Javascript](assets/Index_Javascript.svg) " + "![Java](assets/Index_Java.svg) " + "![Python](assets/Index_Python.svg)" + ) + assert cs_name == "Auth" + assert techs == ["JAVASCRIPT", "JAVA", "PYTHON"] + + def test_strips_leading_and_trailing_whitespace(self): + cs_name, techs = tech.parse_index_line( + " [Foo](cheatsheets/Foo.md) ![Java](assets/Index_Java.svg) " + ) + assert cs_name == "Foo" + assert techs == ["JAVA"] + + +class TestBuildTechnologiesDict: + def test_empty_text_returns_empty_dict(self): + assert list(tech.build_technologies_dict("").keys()) == [] + + def test_groups_cheatsheets_under_their_technologies(self): + result = tech.build_technologies_dict(SAMPLE_INDEX) + # Authentication is under Javascript and Java; Business Logic + # is under Java only. + assert "JAVASCRIPT" in result + assert "JAVA" in result + assert len(result["JAVASCRIPT"]) == 1 + assert result["JAVASCRIPT"][0]["CS_NAME"] == "Authentication" + assert len(result["JAVA"]) == 2 + java_cs_names = {entry["CS_NAME"] for entry in result["JAVA"]} + assert java_cs_names == {"Authentication", "Business Logic"} + + def test_uses_owasp_cheatsheets_url_for_cs_url(self): + result = tech.build_technologies_dict(SAMPLE_INDEX) + auth_entry = result["JAVASCRIPT"][0] + assert ( + auth_entry["CS_URL"] + == "https://cheatsheetseries.owasp.org/cheatsheets/Authentication.html" + ) + + def test_ignores_lines_without_technology_icons(self): + text = ( + "# Title\n\n" + "Some intro text.\n\n" + "[Foo](cheatsheets/Foo.md) ![Java](assets/Index_Java.svg)\n" + ) + result = tech.build_technologies_dict(text) + # Only the icon line produced an entry; the title and intro + # are ignored. + assert list(result.keys()) == ["JAVA"] + assert result["JAVA"][0]["CS_NAME"] == "Foo" + + def test_preserves_insertion_order_of_technologies(self): + # Technologies should be discovered in document order, not + # alphabetical order, matching the legacy OrderedDict behavior. + result = tech.build_technologies_dict(SAMPLE_INDEX) + assert list(result.keys()) == ["JAVASCRIPT", "JAVA"] + + +class TestFetchIndexText: + def test_returns_status_code_and_body(self): + fake_response = mock.Mock(status_code=200, text="# Index\n") + with mock.patch.object(tech.requests, "get", return_value=fake_response): + status, body = tech.fetch_index_text() + assert status == 200 + assert body == "# Index\n" + + def test_uses_default_index_url(self): + fake_response = mock.Mock(status_code=200, text="") + with mock.patch.object( + tech.requests, "get", return_value=fake_response + ) as get_mock: + tech.fetch_index_text() + assert get_mock.call_args.args[0] == tech.INDEX_URL + + +class TestMain: + def test_prints_json_and_exits_zero_on_success(self, capsys): + fake_response = mock.Mock(status_code=200, text=SAMPLE_INDEX) + with mock.patch.object(tech.requests, "get", return_value=fake_response): + rc = tech.main() + assert rc == 0 + captured = capsys.readouterr() + # The output should be valid JSON, formatted with indent=1. + parsed = json.loads(captured.out) + assert "JAVASCRIPT" in parsed + assert "JAVA" in parsed + + def test_exits_one_on_non_200_status(self, capsys): + fake_response = mock.Mock(status_code=404, text="") + with mock.patch.object(tech.requests, "get", return_value=fake_response): + rc = tech.main() + assert rc == 1 + captured = capsys.readouterr() + assert "HTTP 404" in captured.out + # On error, no JSON is emitted to stdout. + assert captured.out.strip().endswith("received!") + + def test_exits_one_on_connection_error(self, capsys): + # Network failures should propagate from requests.get; the + # script does not currently catch them, so we only verify + # that the script does not silently succeed. + with mock.patch.object( + tech.requests, + "get", + side_effect=tech.requests.RequestException("boom"), + ): + with pytest.raises(tech.requests.RequestException): + tech.main() diff --git a/tests/test_update_cheatsheets_index.py b/tests/test_update_cheatsheets_index.py new file mode 100644 index 0000000000..b8cc30a482 --- /dev/null +++ b/tests/test_update_cheatsheets_index.py @@ -0,0 +1,193 @@ +"""Tests for scripts/Update_CheatSheets_Index.py.""" +from __future__ import annotations + +import sys +from collections import OrderedDict +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) + +import Update_CheatSheets_Index as idx # noqa: E402 + + +class TestExtractLanguagesSnippetProvided: + def test_detects_javascript(self, cheatsheets_dir, write_cheatsheet): + write_cheatsheet( + "XSS_Cheat_Sheet.md", + "# XSS\n\n```javascript\nalert(1);\n```\n", + ) + assert idx.extract_languages_snippet_provided( + "XSS_Cheat_Sheet.md", cheatsheets_dir=str(cheatsheets_dir) + ) == ["Javascript"] + + def test_detects_multiple_languages_in_order_of_marker_list( + self, cheatsheets_dir, write_cheatsheet + ): + # Marker list order is: javascript, java, csharp, c, ... + # SQL and Python appear later; verify they are returned after + # the earlier markers in the list. + write_cheatsheet( + "Multi.md", + "```sql\nSELECT 1;\n```\n```python\nprint(1)\n```\n", + ) + result = idx.extract_languages_snippet_provided( + "Multi.md", cheatsheets_dir=str(cheatsheets_dir) + ) + assert result == ["Python", "Sql"] + + def test_returns_empty_list_for_file_without_code_blocks( + self, cheatsheets_dir, write_cheatsheet + ): + write_cheatsheet("Plain.md", "# Just headings\n\nNo code here.\n") + assert idx.extract_languages_snippet_provided( + "Plain.md", cheatsheets_dir=str(cheatsheets_dir) + ) == [] + + def test_ignores_unrecognized_languages( + self, cheatsheets_dir, write_cheatsheet + ): + # ``rust`` is not in the marker list — must not be detected. + write_cheatsheet( + "Unrecognized.md", "```rust\nfn main() {}\n```\n```python\nprint(1)\n```\n" + ) + result = idx.extract_languages_snippet_provided( + "Unrecognized.md", cheatsheets_dir=str(cheatsheets_dir) + ) + assert "Rust" not in result + assert "Python" in result + + def test_detection_is_case_and_space_insensitive( + self, cheatsheets_dir, write_cheatsheet + ): + # The implementation lowercases content and strips spaces, so + # `` ```JavaScript\n `` (no space) is detected the same as + # `` ```Java Script\n `` (with space). Verify both work. + write_cheatsheet("A.md", "```JavaScript\nx\n```\n") + write_cheatsheet("B.md", "```Java Script\nx\n```\n") + assert idx.extract_languages_snippet_provided( + "A.md", cheatsheets_dir=str(cheatsheets_dir) + ) == ["Javascript"] + assert idx.extract_languages_snippet_provided( + "B.md", cheatsheets_dir=str(cheatsheets_dir) + ) == ["Javascript"] + + +class TestGroupByLetter: + def test_groups_by_uppercased_first_letter(self): + result = idx.group_by_letter(["alpha.md", "beta.md", "Alpha2.md"]) + assert "A" in result + assert "B" in result + assert result["A"] == ["alpha.md", "Alpha2.md"] + assert result["B"] == ["beta.md"] + + def test_returns_ordered_dict_sorted_by_letter(self): + result = idx.group_by_letter(["zebra.md", "apple.md", "mango.md"]) + assert isinstance(result, OrderedDict) + assert list(result.keys()) == ["A", "M", "Z"] + + def test_preserves_input_order_within_a_letter_group(self): + files = ["b2.md", "b1.md", "b3.md"] + result = idx.group_by_letter(files) + assert result["B"] == files + + def test_empty_input_returns_empty_ordered_dict(self): + result = idx.group_by_letter([]) + assert list(result.keys()) == [] + + +class TestCleanTrailingWhitespace: + def test_strips_trailing_whitespace_from_each_line(self, tmp_path): + f = tmp_path / "with_trailing.md" + f.write_text("line 1 \nline 2\t\nline 3\n", encoding="utf-8") + idx.clean_trailing_whitespace(str(f)) + # After rstrip+"\n", trailing whitespace is gone but the + # newline itself is preserved on every line. + assert f.read_text(encoding="utf-8") == "line 1\nline 2\nline 3\n" + + def test_handles_file_with_no_trailing_whitespace(self, tmp_path): + f = tmp_path / "clean.md" + f.write_text("a\nb\nc\n", encoding="utf-8") + idx.clean_trailing_whitespace(str(f)) + assert f.read_text(encoding="utf-8") == "a\nb\nc\n" + + +class TestMain: + def test_creates_index_file_with_title_and_count( + self, cheatsheets_dir: Path, tmp_path: Path, write_cheatsheet + ): + write_cheatsheet("Authentication_Cheat_Sheet.md") + write_cheatsheet("XSS_Prevention_Cheat_Sheet.md") + write_cheatsheet("Docker_Security.md") + + output = tmp_path / "Index.md" + rc = idx.main( + cheatsheets_dir=str(cheatsheets_dir), + output_file=str(output), + ) + assert rc == 0 + content = output.read_text(encoding="utf-8") + assert content.startswith("# Index Alphabetical\n\n") + assert "**3** cheat sheets available." in content + + def test_groups_cheatsheets_by_letter_section( + self, cheatsheets_dir: Path, tmp_path: Path, write_cheatsheet + ): + write_cheatsheet("Authentication_Cheat_Sheet.md") + write_cheatsheet("XSS_Prevention_Cheat_Sheet.md") + write_cheatsheet("Docker_Security.md") + + output = tmp_path / "Index.md" + idx.main( + cheatsheets_dir=str(cheatsheets_dir), + output_file=str(output), + ) + content = output.read_text(encoding="utf-8") + assert "## A\n" in content + assert "## D\n" in content + assert "## X\n" in content + + def test_includes_language_icons_when_code_blocks_present( + self, cheatsheets_dir: Path, tmp_path: Path, write_cheatsheet + ): + write_cheatsheet( + "XSS_Prevention_Cheat_Sheet.md", + "# XSS\n\n```javascript\nalert(1);\n```\n", + ) + + output = tmp_path / "Index.md" + idx.main( + cheatsheets_dir=str(cheatsheets_dir), + output_file=str(output), + ) + content = output.read_text(encoding="utf-8") + assert "![Javascript](assets/Index_Javascript.svg)" in content + + def test_omits_language_icons_when_no_code_blocks( + self, cheatsheets_dir: Path, tmp_path: Path, write_cheatsheet + ): + write_cheatsheet("Plain_Cheat_Sheet.md", "# Plain\n\nNo code.\n") + + output = tmp_path / "Index.md" + idx.main( + cheatsheets_dir=str(cheatsheets_dir), + output_file=str(output), + ) + content = output.read_text(encoding="utf-8") + assert "assets/Index_" not in content + + def test_output_has_no_trailing_whitespace( + self, cheatsheets_dir: Path, tmp_path: Path, write_cheatsheet + ): + # The original script ends with a clean_trailing_whitespace + # step; verify that step is still executed in main(). + write_cheatsheet("Foo.md", "# Foo\n") + + output = tmp_path / "Index.md" + idx.main( + cheatsheets_dir=str(cheatsheets_dir), + output_file=str(output), + ) + for line in output.read_text(encoding="utf-8").splitlines(): + assert line == line.rstrip(), f"line has trailing whitespace: {line!r}"