From 6725358dba1032000c7332b28109357761ab69b4 Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Sun, 10 May 2026 09:30:36 +0100 Subject: [PATCH 1/6] docs: add CLAUDE.md with architecture and dev commands Pointer file for Claude Code: high-level prompt-building hot path, settings/Dynaconf flow, git-provider abstraction, and the unit-test command. Defers to AGENTS.md for the full repo guidelines. --- CLAUDE.md | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000000..711057ba3f --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,53 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +See `AGENTS.md` for the full repository guidelines (dos/don'ts, coding style, safety, security). The notes below are the high-leverage subset for navigating PR-Agent quickly. + +## Common commands + +Run from the repo root with the virtualenv activated: + +- Single unit test: `PYTHONPATH=. ./.venv/bin/pytest tests/unittest/test_fix_json_escape_char.py -q` +- Full unit suite: `PYTHONPATH=. ./.venv/bin/pytest tests/unittest -v` +- Pytest auto-discovery is configured in `pyproject.toml` (`asyncio_mode = "auto"`, `testpaths = ["tests"]`); always set `PYTHONPATH=.` to avoid import errors. +- Local CLI run: `python -m pr_agent.cli --pr_url review` +- Lint: project uses Ruff with `line-length = 120` (config in `pyproject.toml`); pre-commit hooks live in `.pre-commit-config.yaml`. +- Docker test target (mirror of CI): `docker build -f docker/Dockerfile --target test .` +- E2E (`tests/e2e_tests/`) and health (`tests/health_test/`) suites require provider tokens (`TOKEN_GITHUB`, `TOKEN_GITLAB`, `BITBUCKET_USERNAME`/`PASSWORD`) and are slow — only run when configured. + +Python ≥ 3.12 is required (see `pyproject.toml`). + +## Architecture + +PR-Agent is a CLI/server that runs AI-powered tools (`/review`, `/describe`, `/improve`, `/ask`, etc.) against a pull request on GitHub, GitLab, Bitbucket, Azure DevOps, Gitea, Gerrit, or local. The dispatch flow is `pr_agent/agent/pr_agent.py` → `command2class` map → tool class in `pr_agent/tools/`. Each tool is responsible for fetching the PR via a git provider, building a Jinja2 prompt, calling the model, and publishing the result. + +### Prompt building (the hot path) + +Every tool follows the same shape: in `__init__` it constructs a `self.vars` dict, then passes it together with system/user prompt strings to a `TokenHandler`. At run time the prompts are rendered with `jinja2.Environment(undefined=StrictUndefined)` against `self.vars`. Adding new context to a prompt means: extend `self.vars` in the tool, then add a `{%- if my_var %}` block in the matching prompt TOML. Because templates use `StrictUndefined`, every variable referenced in the template must be present in `vars` (use `{%- if … %}` guards, never optional Jinja lookups). + +System/user prompt strings live as TOML in `pr_agent/settings/`, loaded via Dynaconf as part of `global_settings` in `pr_agent/config_loader.py`. The mapping between tool and prompt file follows naming conventions: `pr_reviewer.py` ↔ `pr_reviewer_prompts.toml`, `pr_description.py` ↔ `pr_description_prompts.toml`, `pr_code_suggestions.py` ↔ `code_suggestions/pr_code_suggestions_prompts.toml` (and the `_not_decoupled` variant). New prompt files must also be registered in the `settings_files=[...]` list in `config_loader.py` to be loaded into `global_settings`. + +### Settings and runtime config + +`get_settings()` from `pr_agent/config_loader.py` is the single accessor for configuration. It returns either a request-scoped Dynaconf object stored in `starlette_context` (server flows) or the module-level `global_settings`. Defaults live in `pr_agent/settings/configuration.toml`; per-repo overrides come from the repo's `.pr_agent.toml`, merged in `pr_agent/git_providers/utils.py::apply_repo_settings` (called once per request before tool dispatch). When introducing a new config section, add it to `configuration.toml` with comments — that file is the authoritative listing of options, and `apply_repo_settings` does a per-section merge so partial overrides work. + +Sensitive values (API keys, tokens) come from environment variables or `.secrets.toml` (gitignored); `apply_secrets_manager_config()` optionally pulls from AWS Secrets Manager. + +### Git providers + +`pr_agent/git_providers/` contains one provider per platform (GitHub, GitLab, Bitbucket variants, Azure DevOps, Gitea, Gerrit, Codecommit, local). They share the `GitProvider` interface in `git_providers/git_provider.py` (capabilities probed via `is_supported("feature")`) and are selected via `config.git_provider`. Tools should never branch on `isinstance(provider, GithubProvider)` for behavior — query `is_supported(...)` instead, since providers may stub or override features. Some prompt features (e.g. semantic file types in `/describe`) are gated on `gfm_markdown` support. + +### Servers and entrypoints + +`pr_agent/servers/` hosts the webhook entrypoints (`github_app.py`, `gitlab_webhook.py`, `bitbucket_app.py`, etc.) that translate webhooks into `PRAgent.handle_request(pr_url, command)` calls. The CLI entry point is `pr_agent/cli.py` (registered as the `pr-agent` console script). + +### Tests + +Unit tests in `tests/unittest/` are the right place for helpers in `pr_agent/algo/`, prompt-building logic, and provider adapters; mirror the file naming pattern (`test_.py`). Use `parametrize` where the surrounding files do. The health test (`tests/health_test/main.py`) exercises `/describe`, `/review`, `/improve` against real PRs and is the canary for prompt regressions — update its expected artifacts when prompts change meaningfully. + +## Conventions to keep in mind + +- Prompt and configuration TOMLs are single sources of truth. When changing behavior, update the prompts and the config defaults together; don't fork values across files. +- Conventional Commit messages; feature branches as `feature/` or `fix/`. +- Don't reformat or reorder unrelated lines in prompt/config files — diffs in those files are reviewed closely and small noise is rejected. From 8479dae6210e82907634bf1258209f45a903a27a Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Sun, 10 May 2026 09:30:49 +0100 Subject: [PATCH 2/6] feat(skills): support agent skills (SKILL.md) in review/improve/describe Discover SKILL.md files from configured filesystem paths, parse YAML frontmatter, and inject the combined name/description/body block into the review, improve, and describe prompts as a separate skills_context variable (rendered above extra_instructions so user-supplied instructions still take precedence). Activation is description-based: every discovered skill is included with its "Use when..." description, and the model decides which guidance to apply. A token budget (skills.max_skills_tokens, default 4000) caps the injected block, dropping skills from the end if exceeded. Disabled by default; enable via [skills] in configuration. No new dependencies (uses stdlib yaml). Refs The-PR-Agent/pr-agent#2384. --- pr_agent/algo/skills_loader.py | 194 ++++++++++++++++++ .../pr_code_suggestions_prompts.toml | 9 + ...ode_suggestions_prompts_not_decoupled.toml | 9 + pr_agent/settings/configuration.toml | 8 + pr_agent/settings/pr_description_prompts.toml | 8 + pr_agent/settings/pr_reviewer_prompts.toml | 9 + pr_agent/tools/pr_code_suggestions.py | 2 + pr_agent/tools/pr_description.py | 2 + pr_agent/tools/pr_reviewer.py | 2 + tests/unittest/test_skills_loader.py | 182 ++++++++++++++++ 10 files changed, 425 insertions(+) create mode 100644 pr_agent/algo/skills_loader.py create mode 100644 tests/unittest/test_skills_loader.py diff --git a/pr_agent/algo/skills_loader.py b/pr_agent/algo/skills_loader.py new file mode 100644 index 0000000000..c17e44a50b --- /dev/null +++ b/pr_agent/algo/skills_loader.py @@ -0,0 +1,194 @@ +""" +Agent skills loader. + +Discovers ``SKILL.md`` files from configured filesystem paths, parses their YAML +frontmatter, and formats them as prompt context for review/improve/describe tools. + +A skill is a directory containing a ``SKILL.md`` file with the structure: + + --- + name: terraform-standards + description: Use when reviewing Terraform code... + --- + + # Terraform Review Guidance + ... + +Activation is description-based: every discovered skill is included with its +name, description, and body. The model decides which guidance applies based on +the descriptions. +""" +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import List, Optional + +import yaml + +from pr_agent.config_loader import get_settings +from pr_agent.log import get_logger + +# Approximate characters-per-token used to keep the skills block under budget. +_CHARS_PER_TOKEN = 4 +_FRONTMATTER_DELIMITER = "---" + + +@dataclass(frozen=True) +class Skill: + name: str + description: str + body: str + path: str + + +def _parse_skill_file(file_path: str) -> Optional[Skill]: + """Parse a single SKILL.md file. Returns None and logs a warning on malformed input.""" + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + except OSError as e: + get_logger().warning(f"Skill file unreadable: {file_path} ({e})") + return None + + lines = content.splitlines() + if not lines or lines[0].strip() != _FRONTMATTER_DELIMITER: + get_logger().warning(f"Skill file missing opening frontmatter delimiter: {file_path}") + return None + end_idx = None + for i in range(1, len(lines)): + if lines[i].strip() == _FRONTMATTER_DELIMITER: + end_idx = i + break + if end_idx is None: + get_logger().warning(f"Skill file missing closing frontmatter delimiter: {file_path}") + return None + + frontmatter_text = "\n".join(lines[1:end_idx]) + body = "\n".join(lines[end_idx + 1 :]).strip() + + try: + meta = yaml.safe_load(frontmatter_text) or {} + except yaml.YAMLError as e: + get_logger().warning(f"Skill frontmatter is not valid YAML: {file_path} ({e})") + return None + + if not isinstance(meta, dict): + get_logger().warning(f"Skill frontmatter must be a mapping: {file_path}") + return None + + name = meta.get("name") + description = meta.get("description") + if not isinstance(name, str) or not name.strip(): + get_logger().warning(f"Skill missing required 'name' field: {file_path}") + return None + if not isinstance(description, str) or not description.strip(): + get_logger().warning(f"Skill missing required 'description' field: {file_path}") + return None + + return Skill(name=name.strip(), description=description.strip(), body=body, path=file_path) + + +def discover_skills(paths: List[str]) -> List[Skill]: + """Scan the given filesystem paths for ``*/SKILL.md`` files. + + Each entry in ``paths`` may be either a directory containing skill + subdirectories (recursive search) or a path to a SKILL.md file directly. + Missing paths are skipped with a warning. + """ + skills: List[Skill] = [] + seen: set = set() + + for raw_path in paths or []: + if not isinstance(raw_path, str) or not raw_path.strip(): + continue + path = os.path.expanduser(raw_path.strip()) + if not os.path.exists(path): + get_logger().warning(f"Skills path does not exist: {path}") + continue + + if os.path.isfile(path): + candidates = [path] if os.path.basename(path) == "SKILL.md" else [] + else: + candidates = [] + for root, _dirs, files in os.walk(path): + if "SKILL.md" in files: + candidates.append(os.path.join(root, "SKILL.md")) + + for candidate in candidates: + real = os.path.realpath(candidate) + if real in seen: + continue + seen.add(real) + skill = _parse_skill_file(candidate) + if skill is not None: + skills.append(skill) + + skills.sort(key=lambda s: s.name) + return skills + + +def _format_skill(skill: Skill) -> str: + return ( + f"### Skill: {skill.name}\n" + f"When to use: {skill.description}\n\n" + f"{skill.body}".rstrip() + ) + + +def format_skills_context(skills: List[Skill], max_tokens: int) -> str: + """Format skills into a prompt-ready string under a token budget. + + Skills are emitted in order; once the running character count would exceed + the budget (estimated as ``max_tokens * 4`` characters), remaining skills are + dropped. Returns an empty string if no skills fit. + """ + if not skills: + return "" + if max_tokens is None or max_tokens <= 0: + return "" + + char_budget = max_tokens * _CHARS_PER_TOKEN + pieces: List[str] = [] + used = 0 + separator = "\n\n---\n\n" + for skill in skills: + formatted = _format_skill(skill) + addition = (separator if pieces else "") + formatted + if used + len(addition) > char_budget: + if not pieces: + # First skill alone exceeds the budget; truncate its body so we + # still provide partial context rather than dropping everything. + truncated = formatted[: max(0, char_budget - len("\n\n[truncated]"))] + pieces.append(truncated + "\n\n[truncated]") + used = len(pieces[0]) + else: + get_logger().info( + f"Skills context budget reached; dropping {len(skills) - len(pieces)} skill(s)" + ) + break + pieces.append(formatted) + used += len(addition) + + return separator.join(pieces).strip() + + +def get_skills_context() -> str: + """Convenience helper: read settings, discover, and format. Returns '' + when skills are disabled or no paths yield content. Never raises.""" + try: + settings = get_settings() + skills_cfg = settings.get("skills", None) + if not skills_cfg: + return "" + if not skills_cfg.get("enabled", False): + return "" + paths = skills_cfg.get("paths", []) or [] + max_tokens = int(skills_cfg.get("max_skills_tokens", 4000) or 0) + skills = discover_skills(list(paths)) + if not skills: + return "" + return format_skills_context(skills, max_tokens) + except Exception as e: + get_logger().warning(f"Failed to build skills context: {e}") + return "" diff --git a/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml index 36b4d0dcf6..e0383cf6f2 100644 --- a/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml @@ -73,6 +73,15 @@ Specific guidelines for generating code suggestions: - Be aware that your input consists only of partial code segments (PR diff code), not the complete codebase. Therefore, avoid making suggestions that might duplicate existing functionality, and refrain from questioning code elements (such as variable declarations or import statements) that may be defined elsewhere in the codebase. - When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: "verify that `user_id` is..." +{%- if skills_context %} + + +Organizational standards and review skills (apply the ones relevant to this PR): +====== +{{ skills_context }} +====== +{%- endif %} + {%- if extra_instructions %} diff --git a/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml index 6178ee23c0..5a9552a26c 100644 --- a/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml +++ b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml @@ -62,6 +62,15 @@ Specific guidelines for generating code suggestions: - Note that you will only see partial code segments that were changed (diff hunks in a PR code), and not the entire codebase. Avoid suggestions that might duplicate existing functionality of the outer codebase. In addition, the absence of a definition, declaration, import, or initialization for any entity in the PR code is NEVER a basis for a suggestion. - Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown. +{%- if skills_context %} + + +Organizational standards and review skills (apply the ones relevant to this PR): +====== +{{ skills_context }} +====== +{%- endif %} + {%- if extra_instructions %} diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index c2533b54ec..b23c8c14b6 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -352,6 +352,14 @@ uri = "./lancedb" # url = "https://YOUR-QDRANT-URL" # api_key = "..." +[skills] +# Agent skills (SKILL.md) support: discovers SKILL.md files from the configured +# filesystem paths and injects their content into review/improve/describe prompts. +# See https://github.com/The-PR-Agent/pr-agent/issues/2384 +enabled = false +paths = [] # list of directories to scan recursively for "*/SKILL.md" +max_skills_tokens = 4000 # token budget for the combined skills_context block + [best_practices] content = "" organization_name = "" diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 2627401ea9..50ec9e1ced 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -8,6 +8,14 @@ Your task is to provide a full description for the PR content: type, description - When quoting variables, names or file paths from the code, use backticks (`) instead of single quote ('). - When needed, use '- ' as bullets +{%- if skills_context %} + +Organizational standards and review skills (apply the ones relevant to this PR): +===== +{{ skills_context }} +===== +{% endif %} + {%- if extra_instructions %} Extra instructions from the user: diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index bbe6c6d04c..ffdb9381af 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -60,6 +60,15 @@ Constructing comments: - Keep each issue description concise. Write so the reader grasps the point immediately without close reading. - Use a matter-of-fact, helpful tone. Avoid accusatory language, excessive praise, or filler phrases like 'Great job', 'Thanks for'. +{%- if skills_context %} + + +Organizational standards and review skills (apply the ones relevant to this PR): +====== +{{ skills_context }} +====== +{% endif %} + {%- if extra_instructions %} diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index bbdf58e46d..5e1ef04d7a 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -17,6 +17,7 @@ from pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files, get_pr_diff, get_pr_multi_diffs, retry_with_fallback_models) +from pr_agent.algo.skills_loader import get_skills_context from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import (ModelType, load_yaml, replace_code_tags, show_relevant_configurations, get_max_tokens, clip_tokens, get_model) @@ -66,6 +67,7 @@ def __init__(self, pr_url: str, cli_mode=False, args: list = None, "diff_no_line_numbers": "", # empty diff for initial calculation "num_code_suggestions": num_code_suggestions, "extra_instructions": get_settings().pr_code_suggestions.extra_instructions, + "skills_context": get_skills_context(), "commit_messages_str": self.git_provider.get_commit_messages(), "relevant_best_practices": "", "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 26ea5d190a..2214e95e11 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -14,6 +14,7 @@ get_pr_diff, get_pr_diff_multiple_patchs, retry_with_fallback_models) +from pr_agent.algo.skills_loader import get_skills_context from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import (ModelType, PRDescriptionHeader, clip_tokens, get_max_tokens, get_user_labels, load_yaml, @@ -67,6 +68,7 @@ def __init__(self, pr_url: str, args: list = None, "language": self.main_pr_language, "diff": "", # empty diff for initial calculation "extra_instructions": get_settings().pr_description.extra_instructions, + "skills_context": get_skills_context(), "commit_messages_str": self.git_provider.get_commit_messages(), "enable_custom_labels": get_settings().config.enable_custom_labels, "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index c4917f3597..2995b7691d 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -12,6 +12,7 @@ from pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files, get_pr_diff, retry_with_fallback_models) +from pr_agent.algo.skills_loader import get_skills_context from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import (ModelType, PRReviewHeader, convert_to_markdown_v2, github_action_output, @@ -92,6 +93,7 @@ def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, 'question_str': question_str, 'answer_str': answer_str, "extra_instructions": get_settings().pr_reviewer.extra_instructions, + "skills_context": get_skills_context(), "commit_messages_str": self.git_provider.get_commit_messages(), "custom_labels": "", "enable_custom_labels": get_settings().config.enable_custom_labels, diff --git a/tests/unittest/test_skills_loader.py b/tests/unittest/test_skills_loader.py new file mode 100644 index 0000000000..56dad58e44 --- /dev/null +++ b/tests/unittest/test_skills_loader.py @@ -0,0 +1,182 @@ +"""Unit tests for the agent skills loader.""" +import os +import textwrap +from pathlib import Path + +from pr_agent.algo.skills_loader import (Skill, _parse_skill_file, + discover_skills, + format_skills_context, + get_skills_context) + + +def _write_skill(directory: Path, name: str, body: str = "Body content."): + skill_dir = directory / name + skill_dir.mkdir(parents=True, exist_ok=True) + skill_file = skill_dir / "SKILL.md" + skill_file.write_text(textwrap.dedent(f"""\ + --- + name: {name} + description: Use when reviewing {name} code. + --- + + {body} + """)) + return skill_file + + +class TestParseSkillFile: + def test_parses_valid_frontmatter_and_body(self, tmp_path): + skill_file = _write_skill(tmp_path, "terraform-standards", + body="# Terraform Review\n- check tags") + skill = _parse_skill_file(str(skill_file)) + assert skill is not None + assert skill.name == "terraform-standards" + assert skill.description == "Use when reviewing terraform-standards code." + assert "Terraform Review" in skill.body + assert "- check tags" in skill.body + + def test_missing_opening_delimiter_returns_none(self, tmp_path): + f = tmp_path / "SKILL.md" + f.write_text("no frontmatter here\nname: x\n") + assert _parse_skill_file(str(f)) is None + + def test_missing_closing_delimiter_returns_none(self, tmp_path): + f = tmp_path / "SKILL.md" + f.write_text("---\nname: x\ndescription: y\nstill in frontmatter\n") + assert _parse_skill_file(str(f)) is None + + def test_invalid_yaml_returns_none(self, tmp_path): + f = tmp_path / "SKILL.md" + f.write_text("---\nname: [unclosed\n---\nbody\n") + assert _parse_skill_file(str(f)) is None + + def test_missing_required_fields_returns_none(self, tmp_path): + f = tmp_path / "SKILL.md" + f.write_text("---\nname: only-name\n---\nbody\n") + assert _parse_skill_file(str(f)) is None + + f2 = tmp_path / "SKILL2.md" + f2.write_text("---\ndescription: only desc\n---\nbody\n") + assert _parse_skill_file(str(f2)) is None + + def test_body_with_inner_dashes_preserved(self, tmp_path): + f = tmp_path / "SKILL.md" + f.write_text(textwrap.dedent("""\ + --- + name: with-dashes + description: Use when X. + --- + + # Heading + --- + section after rule + """)) + skill = _parse_skill_file(str(f)) + assert skill is not None + assert "section after rule" in skill.body + assert "---" in skill.body + + +class TestDiscoverSkills: + def test_finds_nested_skill_md_files(self, tmp_path): + _write_skill(tmp_path / "a", "alpha") + _write_skill(tmp_path / "b" / "nested", "bravo") + # Directory without SKILL.md should be ignored + (tmp_path / "c").mkdir() + (tmp_path / "c" / "README.md").write_text("not a skill") + + skills = discover_skills([str(tmp_path)]) + names = {s.name for s in skills} + assert names == {"alpha", "bravo"} + + def test_skips_missing_paths_without_raising(self, tmp_path): + skills = discover_skills([str(tmp_path / "does-not-exist")]) + assert skills == [] + + def test_accepts_direct_path_to_skill_file(self, tmp_path): + skill_file = _write_skill(tmp_path, "direct") + skills = discover_skills([str(skill_file)]) + assert len(skills) == 1 + assert skills[0].name == "direct" + + def test_deduplicates_overlapping_paths(self, tmp_path): + _write_skill(tmp_path / "x", "xray") + skills = discover_skills([str(tmp_path), str(tmp_path / "x")]) + assert len(skills) == 1 + + def test_skips_malformed_files_but_returns_others(self, tmp_path): + _write_skill(tmp_path / "good", "good") + bad_dir = tmp_path / "bad" + bad_dir.mkdir() + (bad_dir / "SKILL.md").write_text("no frontmatter\n") + skills = discover_skills([str(tmp_path)]) + names = [s.name for s in skills] + assert names == ["good"] + + def test_ignores_empty_and_non_string_path_entries(self, tmp_path): + _write_skill(tmp_path, "only") + skills = discover_skills([str(tmp_path), "", None]) # type: ignore[list-item] + assert len(skills) == 1 + + +class TestFormatSkillsContext: + def _mk(self, name: str, body: str = "guidance body") -> Skill: + return Skill(name=name, description=f"Use when {name}", body=body, path=f"/{name}/SKILL.md") + + def test_returns_empty_when_no_skills(self): + assert format_skills_context([], 4000) == "" + + def test_returns_empty_when_budget_zero(self): + assert format_skills_context([self._mk("a")], 0) == "" + + def test_includes_name_description_and_body(self): + out = format_skills_context([self._mk("alpha", body="step one\nstep two")], 4000) + assert "Skill: alpha" in out + assert "When to use: Use when alpha" in out + assert "step one" in out + assert "step two" in out + + def test_drops_skills_beyond_budget(self): + big_body = "x" * 1000 + skills = [self._mk(f"s{i}", body=big_body) for i in range(5)] + # Budget of 250 tokens ~ 1000 chars, fits roughly one skill. + out = format_skills_context(skills, max_tokens=250) + assert "Skill: s0" in out + # At least one of the later skills must be dropped. + assert "Skill: s4" not in out + + def test_truncates_when_first_skill_exceeds_budget(self): + huge = self._mk("huge", body="y" * 5000) + out = format_skills_context([huge], max_tokens=50) # 200 char budget + assert "[truncated]" in out + assert len(out) <= 250 # budget + truncation marker + + def test_separator_between_multiple_skills(self): + out = format_skills_context( + [self._mk("a", body="A"), self._mk("b", body="B")], max_tokens=4000 + ) + assert out.count("---") >= 1 + assert out.index("Skill: a") < out.index("Skill: b") + + +class TestGetSkillsContext: + def test_disabled_returns_empty(self, tmp_path, monkeypatch): + from pr_agent.config_loader import get_settings + get_settings().set("skills", {"enabled": False, "paths": [str(tmp_path)], + "max_skills_tokens": 4000}) + assert get_skills_context() == "" + + def test_enabled_with_no_paths_returns_empty(self, monkeypatch): + from pr_agent.config_loader import get_settings + get_settings().set("skills", {"enabled": True, "paths": [], + "max_skills_tokens": 4000}) + assert get_skills_context() == "" + + def test_enabled_with_skills_returns_formatted(self, tmp_path): + _write_skill(tmp_path, "demo", body="check the thing") + from pr_agent.config_loader import get_settings + get_settings().set("skills", {"enabled": True, "paths": [str(tmp_path)], + "max_skills_tokens": 4000}) + out = get_skills_context() + assert "Skill: demo" in out + assert "check the thing" in out From a801454a5fb410a1c5efac54c85fb0d1316de379 Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Sun, 10 May 2026 10:01:51 +0100 Subject: [PATCH 3/6] feat(skills): inline sibling .md resources; tighten loader Apply post-review fixes to the agent-skills loader: * Inline non-SKILL.md resources. Every *.md file in the skill directory tree (including references/ subdirectories) is now gathered and appended to the skill body in the prompt. This is the closest analogue to the standard's progressive-disclosure model under PR-Agent's single-shot prompt architecture, where the model has no opportunity to Read files mid-turn. scripts/ and assets/ subdirectories remain excluded -- the implementation supports text-only skills, and skills that depend on script execution will not work here. Documented in the loader docstring and configuration.toml. * Treat nested SKILL.md files as independent skills: their subtree is not absorbed into the outer skill's resources. * Switch get_skills_context to attribute-style settings access for consistency with the rest of the codebase, narrow the catch to only the int() coercion (programmer errors now surface), and expand $ENV_VAR alongside ~ in configured paths. * Bump default max_skills_tokens from 4000 to 8000 (a typical multi-skill setup needed lifting; still user-overridable per repo). * Clean up format_skills_context truncation (drop dead variable, slice cleanly against the budget). * Add tests for: Jinja2 syntax in skill bodies (confirms substitution is single-pass and {% raw %} wrapping is unnecessary), env-var and ~ path expansion, sibling .md and references/ resource gathering, scripts/ + assets/ exclusion, nested SKILL.md isolation, resource rendering, and resource-aware budget enforcement. --- pr_agent/algo/skills_loader.py | 167 ++++++++++++++++++++------- pr_agent/settings/configuration.toml | 9 +- tests/unittest/test_skills_loader.py | 138 +++++++++++++++++++++- 3 files changed, 269 insertions(+), 45 deletions(-) diff --git a/pr_agent/algo/skills_loader.py b/pr_agent/algo/skills_loader.py index c17e44a50b..10e9f38b5f 100644 --- a/pr_agent/algo/skills_loader.py +++ b/pr_agent/algo/skills_loader.py @@ -17,12 +17,29 @@ Activation is description-based: every discovered skill is included with its name, description, and body. The model decides which guidance applies based on the descriptions. + +Resources alongside SKILL.md +---------------------------- +The agent-skills standard supports bundled files for progressive disclosure: +``references/`` (markdown context loaded on demand), ``scripts/`` (executables +the agent can invoke), and ``assets/`` (templates / images / data). PR-Agent +runs single-shot model calls and has no tool-use loop, so progressive disclosure +is not implementable here. Instead, this loader inlines every text resource +directly into the prompt: + +* All ``*.md`` files in the skill directory tree (including ``references/``) + are gathered and appended after the SKILL.md body. +* ``scripts/`` and ``assets/`` subdirectories are skipped: scripts are + executables we cannot safely run from a one-shot prompt, and assets are + typically binary. Skills that depend on script execution will not work. + +In short, this implementation supports **text-only** agent skills. """ from __future__ import annotations import os -from dataclasses import dataclass -from typing import List, Optional +from dataclasses import dataclass, field +from typing import List, Optional, Tuple import yaml @@ -32,6 +49,17 @@ # Approximate characters-per-token used to keep the skills block under budget. _CHARS_PER_TOKEN = 4 _FRONTMATTER_DELIMITER = "---" +_DEFAULT_MAX_SKILLS_TOKENS = 8000 +# Subdirectories whose contents are intentionally excluded from inlining, +# matching the agent-skills standard's executable/binary conventions. +_EXCLUDED_RESOURCE_DIRS = frozenset({"scripts", "assets"}) + + +@dataclass(frozen=True) +class SkillResource: + """A non-SKILL.md text file bundled with a skill (e.g. references/guide.md).""" + relative_path: str + content: str @dataclass(frozen=True) @@ -40,6 +68,43 @@ class Skill: description: str body: str path: str + resources: Tuple[SkillResource, ...] = field(default_factory=tuple) + + +def _gather_resources(skill_md_path: str) -> Tuple[SkillResource, ...]: + """Walk the skill's directory tree and collect sibling ``*.md`` files. + + SKILL.md itself is excluded. Subdirectories named ``scripts`` or ``assets`` + are skipped wholesale. If a nested directory contains its own SKILL.md it + is treated as a separate skill and not descended into. + """ + skill_dir = os.path.dirname(skill_md_path) + resources: List[SkillResource] = [] + + for root, dirs, files in os.walk(skill_dir): + # Prune executable / binary subtrees per the agent-skills convention. + dirs[:] = [d for d in dirs if d not in _EXCLUDED_RESOURCE_DIRS] + # A nested skill directory is independent; do not absorb its files. + if root != skill_dir and "SKILL.md" in files: + dirs[:] = [] + continue + for filename in sorted(files): + if not filename.endswith(".md"): + continue + if root == skill_dir and filename == "SKILL.md": + continue + full = os.path.join(root, filename) + try: + with open(full, "r", encoding="utf-8") as fh: + content = fh.read() + except OSError as e: + get_logger().warning(f"Skill resource unreadable: {full} ({e})") + continue + rel = os.path.relpath(full, skill_dir) + resources.append(SkillResource(relative_path=rel, content=content)) + + resources.sort(key=lambda r: r.relative_path) + return tuple(resources) def _parse_skill_file(file_path: str) -> Optional[Skill]: @@ -86,7 +151,13 @@ def _parse_skill_file(file_path: str) -> Optional[Skill]: get_logger().warning(f"Skill missing required 'description' field: {file_path}") return None - return Skill(name=name.strip(), description=description.strip(), body=body, path=file_path) + return Skill( + name=name.strip(), + description=description.strip(), + body=body, + path=file_path, + resources=_gather_resources(file_path), + ) def discover_skills(paths: List[str]) -> List[Skill]: @@ -94,7 +165,8 @@ def discover_skills(paths: List[str]) -> List[Skill]: Each entry in ``paths`` may be either a directory containing skill subdirectories (recursive search) or a path to a SKILL.md file directly. - Missing paths are skipped with a warning. + Environment variables and ``~`` are expanded. Missing paths are skipped + with a warning. """ skills: List[Skill] = [] seen: set = set() @@ -102,16 +174,16 @@ def discover_skills(paths: List[str]) -> List[Skill]: for raw_path in paths or []: if not isinstance(raw_path, str) or not raw_path.strip(): continue - path = os.path.expanduser(raw_path.strip()) - if not os.path.exists(path): - get_logger().warning(f"Skills path does not exist: {path}") + expanded = os.path.expanduser(os.path.expandvars(raw_path.strip())) + if not os.path.exists(expanded): + get_logger().warning(f"Skills path does not exist: {expanded}") continue - if os.path.isfile(path): - candidates = [path] if os.path.basename(path) == "SKILL.md" else [] + if os.path.isfile(expanded): + candidates = [expanded] if os.path.basename(expanded) == "SKILL.md" else [] else: candidates = [] - for root, _dirs, files in os.walk(path): + for root, _dirs, files in os.walk(expanded): if "SKILL.md" in files: candidates.append(os.path.join(root, "SKILL.md")) @@ -129,19 +201,27 @@ def discover_skills(paths: List[str]) -> List[Skill]: def _format_skill(skill: Skill) -> str: - return ( - f"### Skill: {skill.name}\n" - f"When to use: {skill.description}\n\n" - f"{skill.body}".rstrip() - ) + """Render a skill (and its inlined resources) as a prompt-ready string.""" + parts = [ + f"### Skill: {skill.name}", + f"When to use: {skill.description}", + "", + skill.body.rstrip(), + ] + for resource in skill.resources: + parts.append("") + parts.append(f"#### {resource.relative_path}") + parts.append(resource.content.rstrip()) + return "\n".join(parts).rstrip() def format_skills_context(skills: List[Skill], max_tokens: int) -> str: """Format skills into a prompt-ready string under a token budget. Skills are emitted in order; once the running character count would exceed - the budget (estimated as ``max_tokens * 4`` characters), remaining skills are - dropped. Returns an empty string if no skills fit. + the budget (estimated as ``max_tokens * 4`` characters), remaining skills + are dropped. If even the first skill exceeds the budget, its formatted text + is truncated and a marker appended. Returns an empty string when nothing fits. """ if not skills: return "" @@ -149,46 +229,49 @@ def format_skills_context(skills: List[Skill], max_tokens: int) -> str: return "" char_budget = max_tokens * _CHARS_PER_TOKEN + truncate_marker = "\n\n[truncated]" + separator = "\n\n---\n\n" pieces: List[str] = [] used = 0 - separator = "\n\n---\n\n" for skill in skills: formatted = _format_skill(skill) - addition = (separator if pieces else "") + formatted - if used + len(addition) > char_budget: + addition_len = (len(separator) if pieces else 0) + len(formatted) + if used + addition_len > char_budget: if not pieces: - # First skill alone exceeds the budget; truncate its body so we - # still provide partial context rather than dropping everything. - truncated = formatted[: max(0, char_budget - len("\n\n[truncated]"))] - pieces.append(truncated + "\n\n[truncated]") - used = len(pieces[0]) + available = max(0, char_budget - len(truncate_marker)) + pieces.append(formatted[:available] + truncate_marker) else: get_logger().info( f"Skills context budget reached; dropping {len(skills) - len(pieces)} skill(s)" ) break pieces.append(formatted) - used += len(addition) + used += addition_len return separator.join(pieces).strip() def get_skills_context() -> str: - """Convenience helper: read settings, discover, and format. Returns '' - when skills are disabled or no paths yield content. Never raises.""" + """Read settings, discover skills, and format them for prompt injection. + + Returns ``''`` when skills are disabled, no paths are configured, or no + skills are found. The only swallowed error is a non-numeric override of + ``skills.max_skills_tokens``; everything else surfaces normally so genuine + bugs are not masked. + """ + settings = get_settings() + if not settings.skills.enabled: + return "" + paths = list(settings.skills.paths or []) + raw_max = settings.skills.max_skills_tokens try: - settings = get_settings() - skills_cfg = settings.get("skills", None) - if not skills_cfg: - return "" - if not skills_cfg.get("enabled", False): - return "" - paths = skills_cfg.get("paths", []) or [] - max_tokens = int(skills_cfg.get("max_skills_tokens", 4000) or 0) - skills = discover_skills(list(paths)) - if not skills: - return "" - return format_skills_context(skills, max_tokens) - except Exception as e: - get_logger().warning(f"Failed to build skills context: {e}") + max_tokens = int(raw_max) + except (TypeError, ValueError): + get_logger().warning( + f"Invalid skills.max_skills_tokens={raw_max!r}; falling back to {_DEFAULT_MAX_SKILLS_TOKENS}" + ) + max_tokens = _DEFAULT_MAX_SKILLS_TOKENS + skills = discover_skills(paths) + if not skills: return "" + return format_skills_context(skills, max_tokens) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index b23c8c14b6..7924383264 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -355,10 +355,15 @@ uri = "./lancedb" [skills] # Agent skills (SKILL.md) support: discovers SKILL.md files from the configured # filesystem paths and injects their content into review/improve/describe prompts. +# Sibling *.md files in the skill directory tree (e.g. references/guide.md) are +# inlined alongside SKILL.md. PR-Agent supports text-only skills: scripts/ and +# assets/ subdirectories are skipped because PR-Agent uses a single-shot model +# call (no tool-use loop) and cannot execute scripts or load binary assets on +# demand. Skills that depend on script execution will not work here. # See https://github.com/The-PR-Agent/pr-agent/issues/2384 enabled = false -paths = [] # list of directories to scan recursively for "*/SKILL.md" -max_skills_tokens = 4000 # token budget for the combined skills_context block +paths = [] # directories to scan recursively for "*/SKILL.md"; supports ~ and $VAR +max_skills_tokens = 8000 # token budget for the combined skills_context block [best_practices] content = "" diff --git a/tests/unittest/test_skills_loader.py b/tests/unittest/test_skills_loader.py index 56dad58e44..e846ee249a 100644 --- a/tests/unittest/test_skills_loader.py +++ b/tests/unittest/test_skills_loader.py @@ -3,7 +3,10 @@ import textwrap from pathlib import Path -from pr_agent.algo.skills_loader import (Skill, _parse_skill_file, +from jinja2 import Environment, StrictUndefined + +from pr_agent.algo.skills_loader import (Skill, SkillResource, + _parse_skill_file, discover_skills, format_skills_context, get_skills_context) @@ -180,3 +183,136 @@ def test_enabled_with_skills_returns_formatted(self, tmp_path): out = get_skills_context() assert "Skill: demo" in out assert "check the thing" in out + + def test_invalid_max_tokens_falls_back_to_default(self, tmp_path): + _write_skill(tmp_path, "demo", body="check the thing") + from pr_agent.config_loader import get_settings + get_settings().set("skills", {"enabled": True, "paths": [str(tmp_path)], + "max_skills_tokens": "not-a-number"}) + # Should not raise; should still produce skills_context using the default budget. + out = get_skills_context() + assert "Skill: demo" in out + + +class TestJinjaSafety: + """Skills bodies often contain {{ }} or {% %} (Helm/Ansible/Terraform). + + Confirm that Jinja2 substitution is single-pass: the rendered template + contains the literal characters from the substituted variable, not a + re-evaluation of them. + """ + + def test_jinja_syntax_in_skill_body_renders_as_literal(self, tmp_path): + body = "Use {{ unknown_var }} and {% if foo %}bar{% endif %} here." + _write_skill(tmp_path, "helm", body=body) + skills = discover_skills([str(tmp_path)]) + out = format_skills_context(skills, max_tokens=4000) + + # Mirror the prompt-template injection site: a guarded {{ skills_context }}. + template = "before\n{%- if skills_context %}{{ skills_context }}{% endif %}\nafter" + env = Environment(undefined=StrictUndefined) + rendered = env.from_string(template).render(skills_context=out) + + assert "{{ unknown_var }}" in rendered + assert "{% if foo %}" in rendered + + +class TestPathExpansion: + def test_env_var_in_path_is_expanded(self, tmp_path, monkeypatch): + _write_skill(tmp_path, "envtest") + monkeypatch.setenv("SKILLS_TEST_DIR", str(tmp_path)) + skills = discover_skills(["$SKILLS_TEST_DIR"]) + assert [s.name for s in skills] == ["envtest"] + + def test_tilde_in_path_is_expanded(self, tmp_path, monkeypatch): + _write_skill(tmp_path, "homestest") + monkeypatch.setenv("HOME", str(tmp_path)) + skills = discover_skills(["~"]) + assert [s.name for s in skills] == ["homestest"] + + +class TestResourceGathering: + def test_sibling_md_file_is_inlined_as_resource(self, tmp_path): + _write_skill(tmp_path, "withrefs", body="main body") + skill_dir = tmp_path / "withrefs" + (skill_dir / "examples.md").write_text("# Examples\n- one\n- two\n") + + skills = discover_skills([str(tmp_path)]) + assert len(skills) == 1 + names = [r.relative_path for r in skills[0].resources] + assert names == ["examples.md"] + assert "- one" in skills[0].resources[0].content + + def test_references_subdirectory_is_inlined(self, tmp_path): + _write_skill(tmp_path, "withdir") + refs = tmp_path / "withdir" / "references" + refs.mkdir() + (refs / "guide.md").write_text("guide content") + (refs / "deep" / "nested").mkdir(parents=True) + (refs / "deep" / "nested" / "more.md").write_text("deeper content") + + skills = discover_skills([str(tmp_path)]) + rels = sorted(r.relative_path for r in skills[0].resources) + # Use os.sep-agnostic comparison + rels_normalised = [r.replace(os.sep, "/") for r in rels] + assert rels_normalised == ["references/deep/nested/more.md", "references/guide.md"] + + def test_scripts_and_assets_directories_are_excluded(self, tmp_path): + _write_skill(tmp_path, "secure") + skill_dir = tmp_path / "secure" + (skill_dir / "scripts").mkdir() + (skill_dir / "scripts" / "run.py").write_text("print('hi')") + (skill_dir / "scripts" / "notes.md").write_text("script notes (should be excluded)") + (skill_dir / "assets").mkdir() + (skill_dir / "assets" / "data.md").write_text("asset data (should be excluded)") + (skill_dir / "assets" / "img.svg").write_text("") + + skills = discover_skills([str(tmp_path)]) + rels = [r.relative_path for r in skills[0].resources] + assert rels == [] + + def test_nested_skill_directory_is_treated_independently(self, tmp_path): + _write_skill(tmp_path, "outer", body="outer body") + # Nested skill inside the outer skill's directory. + inner_dir = tmp_path / "outer" / "inner" + inner_dir.mkdir() + (inner_dir / "SKILL.md").write_text(textwrap.dedent("""\ + --- + name: inner + description: Use when inner. + --- + + inner body + """)) + # An additional .md right next to the inner SKILL.md should belong to inner only. + (inner_dir / "extra.md").write_text("extra inner content") + + skills = discover_skills([str(tmp_path)]) + by_name = {s.name: s for s in skills} + assert set(by_name) == {"inner", "outer"} + # outer must not absorb the inner skill's files + outer_rels = [r.relative_path for r in by_name["outer"].resources] + assert outer_rels == [] + # inner picks up only its own sibling + inner_rels = [r.relative_path for r in by_name["inner"].resources] + assert inner_rels == ["extra.md"] + + def test_format_skills_context_includes_resource_content(self, tmp_path): + _write_skill(tmp_path, "doc") + (tmp_path / "doc" / "checklist.md").write_text("- item one\n- item two") + skills = discover_skills([str(tmp_path)]) + out = format_skills_context(skills, max_tokens=4000) + assert "#### checklist.md" in out + assert "- item one" in out + + def test_huge_resource_is_dropped_when_skill_already_consumed_budget(self, tmp_path): + # Two skills; the second has a huge resource. Budget fits skill 1 plus + # SKILL.md of skill 2 only — so skill 2 is dropped entirely (not partially). + _write_skill(tmp_path, "first", body="first body") + _write_skill(tmp_path, "second", body="second body") + (tmp_path / "second" / "huge.md").write_text("z" * 50_000) + + skills = discover_skills([str(tmp_path)]) + out = format_skills_context(skills, max_tokens=200) # 800-char budget + assert "Skill: first" in out + assert "Skill: second" not in out From 49bc3d247f2b13d7b39b01e833482a01f869e045 Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Mon, 11 May 2026 07:46:31 +0100 Subject: [PATCH 4/6] refactor(skills): use real token counting, cache per request, tighten * Cache get_skills_context() via starlette_context so the three tools (review/improve/describe) share one discovery + parse + format per request, mirroring the apply_repo_settings cache pattern. * Replace the 4-chars-per-token heuristic with TokenEncoder and clip_tokens (already in pr_agent.algo) so the budget reflects actual model tokens. * Log when remaining skills are dropped after the first-skill truncation path (operational visibility). * Remove unused Skill.path field; pass file_path directly to _gather_resources at construction. * Drop dead inner sorted(files) in _gather_resources -- the final resources.sort() is authoritative. * Fix asymmetric Jinja whitespace strip in pr_reviewer_prompts.toml and pr_description_prompts.toml ({% endif %} -> {%- endif %}). * Trim WHAT-narrating comments per code-review pass. --- pr_agent/algo/skills_loader.py | 76 +++++++++++++------ pr_agent/settings/pr_description_prompts.toml | 2 +- pr_agent/settings/pr_reviewer_prompts.toml | 2 +- tests/unittest/test_skills_loader.py | 19 ++--- 4 files changed, 60 insertions(+), 39 deletions(-) diff --git a/pr_agent/algo/skills_loader.py b/pr_agent/algo/skills_loader.py index 10e9f38b5f..eab9a54bd7 100644 --- a/pr_agent/algo/skills_loader.py +++ b/pr_agent/algo/skills_loader.py @@ -42,17 +42,19 @@ from typing import List, Optional, Tuple import yaml +from starlette_context import context +from pr_agent.algo.token_handler import TokenEncoder +from pr_agent.algo.utils import clip_tokens from pr_agent.config_loader import get_settings from pr_agent.log import get_logger -# Approximate characters-per-token used to keep the skills block under budget. -_CHARS_PER_TOKEN = 4 _FRONTMATTER_DELIMITER = "---" _DEFAULT_MAX_SKILLS_TOKENS = 8000 # Subdirectories whose contents are intentionally excluded from inlining, # matching the agent-skills standard's executable/binary conventions. _EXCLUDED_RESOURCE_DIRS = frozenset({"scripts", "assets"}) +_CONTEXT_CACHE_KEY = "skills_context" @dataclass(frozen=True) @@ -67,10 +69,13 @@ class Skill: name: str description: str body: str - path: str resources: Tuple[SkillResource, ...] = field(default_factory=tuple) +def _count_tokens(text: str) -> int: + return len(TokenEncoder.get_token_encoder().encode(text)) + + def _gather_resources(skill_md_path: str) -> Tuple[SkillResource, ...]: """Walk the skill's directory tree and collect sibling ``*.md`` files. @@ -82,13 +87,11 @@ def _gather_resources(skill_md_path: str) -> Tuple[SkillResource, ...]: resources: List[SkillResource] = [] for root, dirs, files in os.walk(skill_dir): - # Prune executable / binary subtrees per the agent-skills convention. dirs[:] = [d for d in dirs if d not in _EXCLUDED_RESOURCE_DIRS] - # A nested skill directory is independent; do not absorb its files. if root != skill_dir and "SKILL.md" in files: dirs[:] = [] continue - for filename in sorted(files): + for filename in files: if not filename.endswith(".md"): continue if root == skill_dir and filename == "SKILL.md": @@ -155,7 +158,6 @@ def _parse_skill_file(file_path: str) -> Optional[Skill]: name=name.strip(), description=description.strip(), body=body, - path=file_path, resources=_gather_resources(file_path), ) @@ -218,49 +220,75 @@ def _format_skill(skill: Skill) -> str: def format_skills_context(skills: List[Skill], max_tokens: int) -> str: """Format skills into a prompt-ready string under a token budget. - Skills are emitted in order; once the running character count would exceed - the budget (estimated as ``max_tokens * 4`` characters), remaining skills - are dropped. If even the first skill exceeds the budget, its formatted text - is truncated and a marker appended. Returns an empty string when nothing fits. + Skills are emitted in order; once the running token count would exceed the + budget, remaining skills are dropped. If the first skill alone exceeds the + budget, its formatted text is clipped via ``clip_tokens`` and a marker is + appended. Returns an empty string when nothing fits. """ if not skills: return "" if max_tokens is None or max_tokens <= 0: return "" - char_budget = max_tokens * _CHARS_PER_TOKEN truncate_marker = "\n\n[truncated]" separator = "\n\n---\n\n" + sep_tokens = _count_tokens(separator) + marker_tokens = _count_tokens(truncate_marker) pieces: List[str] = [] used = 0 for skill in skills: formatted = _format_skill(skill) - addition_len = (len(separator) if pieces else 0) + len(formatted) - if used + addition_len > char_budget: + tokens = _count_tokens(formatted) + addition = (sep_tokens if pieces else 0) + tokens + if used + addition > max_tokens: if not pieces: - available = max(0, char_budget - len(truncate_marker)) - pieces.append(formatted[:available] + truncate_marker) + budget = max(1, max_tokens - marker_tokens) + truncated = clip_tokens(formatted, budget, add_three_dots=False) + pieces.append(truncated + truncate_marker) + if len(skills) > 1: + get_logger().info( + f"First skill exceeded budget; truncated and dropped {len(skills) - 1} skill(s)" + ) else: get_logger().info( f"Skills context budget reached; dropping {len(skills) - len(pieces)} skill(s)" ) break pieces.append(formatted) - used += addition_len + used += addition return separator.join(pieces).strip() +def _get_cached_context() -> Optional[str]: + try: + return context.get(_CONTEXT_CACHE_KEY, None) + except Exception: + return None + + +def _set_cached_context(value: str) -> None: + try: + context[_CONTEXT_CACHE_KEY] = value + except Exception: + pass + + def get_skills_context() -> str: """Read settings, discover skills, and format them for prompt injection. - Returns ``''`` when skills are disabled, no paths are configured, or no - skills are found. The only swallowed error is a non-numeric override of - ``skills.max_skills_tokens``; everything else surfaces normally so genuine - bugs are not masked. + Memoised per request via ``starlette_context`` so the three tools that + inject ``skills_context`` (review, improve, describe) share a single + discovery + parse + format. Returns ``''`` when skills are disabled, no + paths are configured, or no skills are found. """ + cached = _get_cached_context() + if cached is not None: + return cached + settings = get_settings() if not settings.skills.enabled: + _set_cached_context("") return "" paths = list(settings.skills.paths or []) raw_max = settings.skills.max_skills_tokens @@ -272,6 +300,6 @@ def get_skills_context() -> str: ) max_tokens = _DEFAULT_MAX_SKILLS_TOKENS skills = discover_skills(paths) - if not skills: - return "" - return format_skills_context(skills, max_tokens) + out = format_skills_context(skills, max_tokens) if skills else "" + _set_cached_context(out) + return out diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 50ec9e1ced..a6d921d922 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -14,7 +14,7 @@ Organizational standards and review skills (apply the ones relevant to this PR): ===== {{ skills_context }} ===== -{% endif %} +{%- endif %} {%- if extra_instructions %} diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index ffdb9381af..89eff5513e 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -67,7 +67,7 @@ Organizational standards and review skills (apply the ones relevant to this PR): ====== {{ skills_context }} ====== -{% endif %} +{%- endif %} {%- if extra_instructions %} diff --git a/tests/unittest/test_skills_loader.py b/tests/unittest/test_skills_loader.py index e846ee249a..69ca3249f5 100644 --- a/tests/unittest/test_skills_loader.py +++ b/tests/unittest/test_skills_loader.py @@ -5,8 +5,7 @@ from jinja2 import Environment, StrictUndefined -from pr_agent.algo.skills_loader import (Skill, SkillResource, - _parse_skill_file, +from pr_agent.algo.skills_loader import (Skill, _parse_skill_file, discover_skills, format_skills_context, get_skills_context) @@ -84,7 +83,6 @@ class TestDiscoverSkills: def test_finds_nested_skill_md_files(self, tmp_path): _write_skill(tmp_path / "a", "alpha") _write_skill(tmp_path / "b" / "nested", "bravo") - # Directory without SKILL.md should be ignored (tmp_path / "c").mkdir() (tmp_path / "c" / "README.md").write_text("not a skill") @@ -124,7 +122,7 @@ def test_ignores_empty_and_non_string_path_entries(self, tmp_path): class TestFormatSkillsContext: def _mk(self, name: str, body: str = "guidance body") -> Skill: - return Skill(name=name, description=f"Use when {name}", body=body, path=f"/{name}/SKILL.md") + return Skill(name=name, description=f"Use when {name}", body=body) def test_returns_empty_when_no_skills(self): assert format_skills_context([], 4000) == "" @@ -140,19 +138,15 @@ def test_includes_name_description_and_body(self): assert "step two" in out def test_drops_skills_beyond_budget(self): - big_body = "x" * 1000 - skills = [self._mk(f"s{i}", body=big_body) for i in range(5)] - # Budget of 250 tokens ~ 1000 chars, fits roughly one skill. - out = format_skills_context(skills, max_tokens=250) + skills = [self._mk(f"s{i}", body="x " * 500) for i in range(5)] + out = format_skills_context(skills, max_tokens=300) assert "Skill: s0" in out - # At least one of the later skills must be dropped. assert "Skill: s4" not in out def test_truncates_when_first_skill_exceeds_budget(self): - huge = self._mk("huge", body="y" * 5000) - out = format_skills_context([huge], max_tokens=50) # 200 char budget + huge = self._mk("huge", body="y " * 5000) + out = format_skills_context([huge], max_tokens=50) assert "[truncated]" in out - assert len(out) <= 250 # budget + truncation marker def test_separator_between_multiple_skills(self): out = format_skills_context( @@ -284,7 +278,6 @@ def test_nested_skill_directory_is_treated_independently(self, tmp_path): inner body """)) - # An additional .md right next to the inner SKILL.md should belong to inner only. (inner_dir / "extra.md").write_text("extra inner content") skills = discover_skills([str(tmp_path)]) From 10f35914cd8b529ac98fe478b3362097d76f2bd0 Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Mon, 11 May 2026 07:49:56 +0100 Subject: [PATCH 5/6] fix(skills): block repo-supplied [skills] config; cap resource file size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Qodo review findings on PR #2385: * Security: apply_repo_settings now refuses to apply the [skills] section from a repo's .pr_agent.toml. skills.paths walks the host filesystem and inlines file contents into the LLM prompt; without this gate, a malicious repo could set paths = ["/etc"] or ["~/.ssh"] in its repo settings and exfiltrate sensitive host files. The skills section is now host-level configuration only (global settings, env vars, CLI). * Reliability: per-resource file size cap (256 KB). _gather_resources now stats each candidate before opening; oversized files are skipped with a warning log rather than read into memory. Defends against pathological skill directories or accidental inclusion of generated docs that would spike memory during tool init. * Tests for both behaviours. The third Qodo finding — that get_skills_context injects every skill unconditionally rather than selecting a relevant subset based on PR context — is the documented progressive-disclosure limitation called out in the PR description's Limitations section and detailed in the out-of-band design note. A two-pass selector is the architecturally correct follow-up and is intentionally out of scope here. --- pr_agent/algo/skills_loader.py | 14 ++++++++++ pr_agent/git_providers/utils.py | 12 ++++++++ tests/unittest/test_skills_loader.py | 41 ++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/pr_agent/algo/skills_loader.py b/pr_agent/algo/skills_loader.py index eab9a54bd7..2a80e93a18 100644 --- a/pr_agent/algo/skills_loader.py +++ b/pr_agent/algo/skills_loader.py @@ -55,6 +55,10 @@ # matching the agent-skills standard's executable/binary conventions. _EXCLUDED_RESOURCE_DIRS = frozenset({"scripts", "assets"}) _CONTEXT_CACHE_KEY = "skills_context" +# Per-resource-file size cap. Defence-in-depth against pathological skill +# directories (large markdown dumps, accidental inclusion of generated docs, +# or a misconfigured paths entry pointing at a directory with huge files). +_MAX_RESOURCE_FILE_BYTES = 256 * 1024 @dataclass(frozen=True) @@ -97,6 +101,16 @@ def _gather_resources(skill_md_path: str) -> Tuple[SkillResource, ...]: if root == skill_dir and filename == "SKILL.md": continue full = os.path.join(root, filename) + try: + size = os.path.getsize(full) + except OSError as e: + get_logger().warning(f"Skill resource unreadable: {full} ({e})") + continue + if size > _MAX_RESOURCE_FILE_BYTES: + get_logger().warning( + f"Skill resource skipped (exceeds {_MAX_RESOURCE_FILE_BYTES} bytes): {full} ({size} bytes)" + ) + continue try: with open(full, "r", encoding="utf-8") as fh: content = fh.read() diff --git a/pr_agent/git_providers/utils.py b/pr_agent/git_providers/utils.py index 1e64b9578d..4036203683 100644 --- a/pr_agent/git_providers/utils.py +++ b/pr_agent/git_providers/utils.py @@ -10,6 +10,12 @@ from pr_agent.git_providers import get_git_provider_with_context from pr_agent.log import get_logger +# Sections whose values come from host-level configuration only. Repo-supplied +# .pr_agent.toml MUST NOT be able to set these, because they expose +# filesystem-touching capabilities (e.g. skills.paths) that a malicious repo +# could otherwise abuse to read sensitive host files into the LLM prompt. +_HOST_ONLY_SETTINGS_SECTIONS = frozenset({"skills"}) + def apply_repo_settings(pr_url): os.environ["AUTO_CAST_FOR_DYNACONF"] = "false" @@ -64,6 +70,12 @@ def apply_repo_settings(pr_url): # Skip excluded items, such as forbidden to load env. get_logger().debug(f"Skipping a section: {section} which is not allowed") continue + if section.lower() in _HOST_ONLY_SETTINGS_SECTIONS: + get_logger().warning( + f"Refusing to apply host-only section [{section}] from repo settings; " + f"this section can only be configured at the host level" + ) + continue section_dict = copy.deepcopy(get_settings().as_dict().get(section, {})) for key, value in contents.items(): section_dict[key] = value diff --git a/tests/unittest/test_skills_loader.py b/tests/unittest/test_skills_loader.py index 69ca3249f5..0c4db1f536 100644 --- a/tests/unittest/test_skills_loader.py +++ b/tests/unittest/test_skills_loader.py @@ -290,6 +290,36 @@ def test_nested_skill_directory_is_treated_independently(self, tmp_path): inner_rels = [r.relative_path for r in by_name["inner"].resources] assert inner_rels == ["extra.md"] + def test_repo_settings_cannot_override_host_only_skills_section(self, monkeypatch): + """A malicious repo's .pr_agent.toml must not be able to enable skills + or set skills.paths — that would allow host-file exfiltration to the LLM. + """ + from pr_agent.config_loader import get_settings + from pr_agent.git_providers import utils as gp_utils + + get_settings().unset("skills") + get_settings().set("skills", {"enabled": False, "paths": [], + "max_skills_tokens": 8000}) + get_settings().config.use_repo_settings_file = True + + repo_toml = b'[skills]\nenabled = true\npaths = ["/etc/pwned"]\n' + + class FakeGitProvider: + def __init__(self, *a, **kw): + pass + + def get_repo_settings(self): + return repo_toml + + monkeypatch.setattr(gp_utils, "get_git_provider_with_context", + lambda _url: FakeGitProvider()) + gp_utils.apply_repo_settings("https://example.com/owner/repo/pull/1") + + assert get_settings().skills.enabled is False, \ + "Repo settings must not be able to enable skills" + assert "/etc/pwned" not in list(get_settings().skills.paths), \ + "Repo settings must not be able to inject paths" + def test_format_skills_context_includes_resource_content(self, tmp_path): _write_skill(tmp_path, "doc") (tmp_path / "doc" / "checklist.md").write_text("- item one\n- item two") @@ -298,6 +328,17 @@ def test_format_skills_context_includes_resource_content(self, tmp_path): assert "#### checklist.md" in out assert "- item one" in out + def test_oversized_resource_file_is_skipped(self, tmp_path, caplog): + _write_skill(tmp_path, "huge-res") + huge = tmp_path / "huge-res" / "huge.md" + huge.write_text("a" * (300 * 1024)) # 300 KB, above the 256 KB cap + (tmp_path / "huge-res" / "fine.md").write_text("small content") + + skills = discover_skills([str(tmp_path)]) + rels = [r.relative_path for r in skills[0].resources] + assert "fine.md" in rels + assert "huge.md" not in rels + def test_huge_resource_is_dropped_when_skill_already_consumed_budget(self, tmp_path): # Two skills; the second has a huge resource. Budget fits skill 1 plus # SKILL.md of skill 2 only — so skill 2 is dropped entirely (not partially). From 33f6e27ce4210205a9203dd6f15129d2bd609c0d Mon Sep 17 00:00:00 2001 From: Ismael Martinez Ramos Date: Mon, 11 May 2026 09:45:03 +0100 Subject: [PATCH 6/6] fix(skills): tolerate non-UTF-8 files instead of crashing UnicodeDecodeError is not a subclass of OSError, so a single non-UTF-8 SKILL.md or sibling resource would propagate out of get_skills_context() and break /review, /improve, /describe whenever skills were enabled. Both reads now catch (OSError, UnicodeDecodeError); the offending file is logged and skipped. Tests cover both SKILL.md and resource paths. Addresses Qodo finding #3 on PR #2385. --- pr_agent/algo/skills_loader.py | 4 ++-- tests/unittest/test_skills_loader.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/skills_loader.py b/pr_agent/algo/skills_loader.py index 2a80e93a18..3aee220526 100644 --- a/pr_agent/algo/skills_loader.py +++ b/pr_agent/algo/skills_loader.py @@ -114,7 +114,7 @@ def _gather_resources(skill_md_path: str) -> Tuple[SkillResource, ...]: try: with open(full, "r", encoding="utf-8") as fh: content = fh.read() - except OSError as e: + except (OSError, UnicodeDecodeError) as e: get_logger().warning(f"Skill resource unreadable: {full} ({e})") continue rel = os.path.relpath(full, skill_dir) @@ -129,7 +129,7 @@ def _parse_skill_file(file_path: str) -> Optional[Skill]: try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() - except OSError as e: + except (OSError, UnicodeDecodeError) as e: get_logger().warning(f"Skill file unreadable: {file_path} ({e})") return None diff --git a/tests/unittest/test_skills_loader.py b/tests/unittest/test_skills_loader.py index 0c4db1f536..1a7721bbaa 100644 --- a/tests/unittest/test_skills_loader.py +++ b/tests/unittest/test_skills_loader.py @@ -328,6 +328,23 @@ def test_format_skills_context_includes_resource_content(self, tmp_path): assert "#### checklist.md" in out assert "- item one" in out + def test_non_utf8_skill_md_is_skipped_without_crashing(self, tmp_path): + bad = tmp_path / "broken" + bad.mkdir() + (bad / "SKILL.md").write_bytes(b"---\nname: x\ndescription: y\n---\n\n\xff\xfe invalid utf-8") + _write_skill(tmp_path, "good") + skills = discover_skills([str(tmp_path)]) + assert [s.name for s in skills] == ["good"] + + def test_non_utf8_resource_file_is_skipped_without_crashing(self, tmp_path): + _write_skill(tmp_path, "mixed") + (tmp_path / "mixed" / "good.md").write_text("readable content") + (tmp_path / "mixed" / "bad.md").write_bytes(b"\xff\xfe binary garbage") + skills = discover_skills([str(tmp_path)]) + rels = [r.relative_path for r in skills[0].resources] + assert "good.md" in rels + assert "bad.md" not in rels + def test_oversized_resource_file_is_skipped(self, tmp_path, caplog): _write_skill(tmp_path, "huge-res") huge = tmp_path / "huge-res" / "huge.md"