diff --git a/CHANGELOG.md b/CHANGELOG.md index 2501966..1dcac6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ The format is based on Keep a Changelog, and this project follows Semantic Versi - Removed the empty root `.gitkeep` placeholder. ### Fixed +- PR summaries now stay aligned with merge triage test-first actions, and full-fallback PR analysis hides repo-wide unchanged noise from merge triage scoring. - GitHub Actions example now installs from the GitHub repository while the package is not published on PyPI. ## [0.1.1] - 2026-04-21 diff --git a/src/ai_risk_manager/pipeline/run.py b/src/ai_risk_manager/pipeline/run.py index aac10a4..3d05cc9 100644 --- a/src/ai_risk_manager/pipeline/run.py +++ b/src/ai_risk_manager/pipeline/run.py @@ -722,6 +722,7 @@ def _stage_analysis( test_plan, summary=summary, analysis_scope=scope.analysis_scope, + changed_files=scope.changed_files, ) sinks.progress.finish(6, total_steps, "QA strategy agent", t) diff --git a/src/ai_risk_manager/pr_scope.py b/src/ai_risk_manager/pr_scope.py new file mode 100644 index 0000000..687300c --- /dev/null +++ b/src/ai_risk_manager/pr_scope.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import Protocol + +_PR_SCOPED_RULE_IDS = {"ui_journey_smoke_failed"} +_PR_SCOPED_RULE_PREFIXES = ("pr_",) + + +class FindingLike(Protocol): + rule_id: str + source_ref: str + evidence_refs: list[str] + + +def normalize_path(path: str) -> str: + normalized = path.replace("\\", "/").strip() + while normalized.startswith("./"): + normalized = normalized[2:] + return normalized + + +def source_ref_path(source_ref: str) -> str: + parts = source_ref.rsplit(":", 1) + if len(parts) == 2 and parts[1].isdigit(): + return parts[0] + return source_ref + + +def finding_matches_changed_files(finding: FindingLike, changed_files: set[str]) -> bool: + refs = [finding.source_ref, *finding.evidence_refs] + normalized_changed = {normalize_path(path) for path in changed_files} + for ref in refs: + if normalize_path(source_ref_path(ref)) in normalized_changed: + return True + return False + + +def is_pr_scoped_finding(finding: FindingLike, changed_files: set[str]) -> bool: + if finding.rule_id.startswith(_PR_SCOPED_RULE_PREFIXES) or finding.rule_id in _PR_SCOPED_RULE_IDS: + return True + return bool(changed_files) and finding_matches_changed_files(finding, changed_files) diff --git a/src/ai_risk_manager/reports/generator.py b/src/ai_risk_manager/reports/generator.py index d8452a1..4e572f9 100644 --- a/src/ai_risk_manager/reports/generator.py +++ b/src/ai_risk_manager/reports/generator.py @@ -1,9 +1,12 @@ from __future__ import annotations from collections import Counter +from collections.abc import Iterable from pathlib import Path +from ai_risk_manager.pr_scope import is_pr_scoped_finding, normalize_path from ai_risk_manager.schemas.types import ( + Finding, GitHubCheckPayload, GitHubCheckConclusion, PRSummary, @@ -36,8 +39,6 @@ "agent_generated_test_missing_negative_path": "Add negative-path coverage for changed write or validation flows.", "agent_generated_test_nondeterministic_dependency": "Stabilize flaky tests before relying on them in merge decisions.", } -_PR_SCOPED_RULE_IDS = {"ui_journey_smoke_failed"} -_PR_SCOPED_RULE_PREFIXES = ("pr_",) def _summary_counts(findings: FindingsReport) -> dict[str, int]: @@ -50,34 +51,6 @@ def _summary_counts(findings: FindingsReport) -> dict[str, int]: } -def _normalize_path(path: str) -> str: - normalized = path.replace("\\", "/").strip() - while normalized.startswith("./"): - normalized = normalized[2:] - return normalized - - -def _source_ref_path(source_ref: str) -> str: - parts = source_ref.rsplit(":", 1) - if len(parts) == 2 and parts[1].isdigit(): - return parts[0] - return source_ref - - -def _finding_matches_changed_files(finding, changed_files: set[str]) -> bool: - refs = [finding.source_ref, *finding.evidence_refs] - for ref in refs: - if _normalize_path(_source_ref_path(ref)) in changed_files: - return True - return False - - -def _is_pr_scoped_finding(finding, changed_files: set[str]) -> bool: - if finding.rule_id.startswith(_PR_SCOPED_RULE_PREFIXES) or finding.rule_id in _PR_SCOPED_RULE_IDS: - return True - return bool(changed_files) and _finding_matches_changed_files(finding, changed_files) - - def _cap_repo_wide_repeated_findings(findings, changed_files: set[str]): if not changed_files: return list(findings) @@ -85,7 +58,7 @@ def _cap_repo_wide_repeated_findings(findings, changed_files: set[str]): capped = [] repo_wide_rule_counts: dict[str, int] = {} for finding in findings: - if not _is_pr_scoped_finding(finding, changed_files): + if not is_pr_scoped_finding(finding, changed_files): repo_wide_rule_counts[finding.rule_id] = repo_wide_rule_counts.get(finding.rule_id, 0) + 1 if repo_wide_rule_counts[finding.rule_id] > 1: continue @@ -94,11 +67,11 @@ def _cap_repo_wide_repeated_findings(findings, changed_files: set[str]): def _rank_findings(findings, *, changed_files: set[str] | None = None, prefer_pr_scope: bool = False): - normalized_changed_files = {_normalize_path(path) for path in (changed_files or set())} + normalized_changed_files = {normalize_path(path) for path in (changed_files or set())} return sorted( findings, key=lambda f: ( - 0 if prefer_pr_scope and _is_pr_scoped_finding(f, normalized_changed_files) else 1, + 0 if prefer_pr_scope and is_pr_scoped_finding(f, normalized_changed_files) else 1, SEVERITY_INDEX.get(f.severity, len(SEVERITY_ORDER)), CONFIDENCE_ORDER.get(f.confidence, 3), -len(f.evidence_refs), @@ -190,6 +163,22 @@ def _pr_summary_actions(result: PipelineResult, ranked_findings) -> list[PRSumma return actions +def _merge_triage_action_findings(result: PipelineResult) -> list[Finding]: + action_finding_ids = {action.finding_id for action in result.merge_triage.actions} + return [finding for finding in result.findings.findings if finding.id in action_finding_ids] + + +def _dedupe_findings(findings: Iterable[Finding]) -> list[Finding]: + deduped: list[Finding] = [] + seen: set[str] = set() + for finding in findings: + if finding.id in seen: + continue + deduped.append(finding) + seen.add(finding.id) + return deduped + + def _format_profiles(summary: PRSummary) -> str: if not summary.profiles: return "none" @@ -204,8 +193,24 @@ def _format_trust(finding) -> str | None: return f"{trust_band} ({trust_score:.2f})" +def _report_focus_findings(result: PipelineResult) -> list[Finding]: + if result.analysis_scope == "full_fallback" and result.merge_triage.actions: + return _merge_triage_action_findings(result) + return list(result.findings.findings) + + +def _report_focus_test_items(result: PipelineResult): + if result.analysis_scope != "full_fallback" or not result.merge_triage.actions: + return list(result.test_plan.items) + + action_finding_ids = {action.finding_id for action in result.merge_triage.actions} + return [item for item in result.test_plan.items if item.finding_id in action_finding_ids] + + def render_report_md(result: PipelineResult, notes: list[str]) -> str: - counts = _summary_counts(result.findings) + focus_findings = _report_focus_findings(result) + focus_test_items = _report_focus_test_items(result) + counts = _summary_counts(FindingsReport(findings=focus_findings, generated_without_llm=result.findings.generated_without_llm)) lines: list[str] = [] lines.append("# Risk Analysis Report") @@ -248,6 +253,8 @@ def render_report_md(result: PipelineResult, notes: list[str]) -> str: f"`{len(result.deterministic_graph.nodes)} nodes`, `{len(result.deterministic_graph.edges)} edges`" ) lines.append(f"- Suppressed findings: `{result.suppressed_count}`") + if len(focus_findings) != len(result.findings.findings): + lines.append(f"- Detailed findings listed: `{len(result.findings.findings)}`") lines.append(f"- Run metric (precision proxy): `{result.run_metrics.precision_proxy:.2%}`") lines.append(f"- Run metric (actionability proxy): `{result.run_metrics.actionability_proxy:.2%}`") lines.append(f"- Run metric (verification pass rate): `{result.summary.verification_pass_rate:.2%}`") @@ -278,25 +285,26 @@ def render_report_md(result: PipelineResult, notes: list[str]) -> str: lines.append("") lines.append("## Why This Matters for Release Risk") lines.append("") - if not result.findings.findings: - lines.append("No high-signal release risks detected in current scope.") + if not focus_findings: + lines.append("No high-signal release risks detected in current triage scope.") else: top_severity = sorted( - result.findings.findings, + focus_findings, key=lambda f: SEVERITY_INDEX.get(f.severity, len(SEVERITY_ORDER)), )[0].severity + scope_label = "changed-file/PR-scoped" if result.analysis_scope == "full_fallback" else "active" lines.append( - f"Detected `{len(result.findings.findings)}` active risk(s). " + f"Detected `{len(focus_findings)}` {scope_label} risk(s). " f"Highest severity is `{top_severity}`, which can impact release confidence if ignored." ) lines.append("") lines.append("## Top Actions for Next Sprint") lines.append("") - if not result.findings.findings: + if not focus_findings: lines.append("No immediate actions required.") else: - actions = _rank_findings(result.findings.findings)[:5] + actions = _rank_findings(focus_findings)[:5] for finding in actions: lines.append(f"- Action: {finding.recommendation}") lines.append(f" Expected impact: reduce `{finding.rule_id}` risk around `{finding.source_ref}`.") @@ -304,10 +312,10 @@ def render_report_md(result: PipelineResult, notes: list[str]) -> str: lines.append("") lines.append("## Top Risks") lines.append("") - if not result.findings.findings: + if not focus_findings: lines.append("No risks detected in current scope.") else: - top = _rank_findings(result.findings.findings)[:5] + top = _rank_findings(focus_findings)[:5] for finding in top: lines.append(f"### {finding.title}") lines.append(f"- Severity: `{finding.severity}`") @@ -338,10 +346,10 @@ def render_report_md(result: PipelineResult, notes: list[str]) -> str: lines.append("") lines.append("## Recommended Test Strategy") lines.append("") - if not result.test_plan.items: + if not focus_test_items: lines.append("No additional test recommendations.") else: - for recommendation in result.test_plan.items: + for recommendation in focus_test_items: lines.append( f"- [{recommendation.priority}] {recommendation.recommendation} " f"(source: `{recommendation.source_ref}`)" @@ -382,7 +390,8 @@ def build_pr_summary( for finding in top_candidates if finding.status == "new" and SEVERITY_INDEX.get(finding.severity, len(SEVERITY_ORDER)) <= min_rank ] - normalized_changed_files = {_normalize_path(path) for path in (changed_files or set())} + top_candidates = _dedupe_findings([*top_candidates, *_merge_triage_action_findings(result)]) + normalized_changed_files = {normalize_path(path) for path in (changed_files or set())} ranked_top_candidates = _rank_findings( top_candidates, changed_files=normalized_changed_files, diff --git a/src/ai_risk_manager/triage/merge.py b/src/ai_risk_manager/triage/merge.py index fe9f89b..2035c82 100644 --- a/src/ai_risk_manager/triage/merge.py +++ b/src/ai_risk_manager/triage/merge.py @@ -2,6 +2,7 @@ from dataclasses import replace +from ai_risk_manager.pr_scope import is_pr_scoped_finding, normalize_path from ai_risk_manager.schemas.types import ( AnalysisScope, CIMode, @@ -142,6 +143,19 @@ def _risk_score(findings: list[Finding]) -> int: return min(100, sum(top_scores)) +def _triage_candidates( + findings: list[Finding], + *, + analysis_scope: AnalysisScope, + changed_files: set[str] | None, +) -> list[Finding]: + if analysis_scope != "full_fallback" or changed_files is None: + return list(findings) + + normalized_changed_files = {normalize_path(path) for path in changed_files} + return [finding for finding in findings if is_pr_scoped_finding(finding, normalized_changed_files)] + + def _resolve_decision( findings: list[Finding], *, @@ -194,6 +208,7 @@ def _decision_reasons( analysis_scope: AnalysisScope, repository_support_state: RepositorySupportState, summary: RunSummary, + hidden_fallback_finding_count: int = 0, ) -> list[str]: reasons: list[str] = [] new_high_or_critical = [ @@ -203,6 +218,10 @@ def _decision_reasons( reasons.append(f"{len(new_high_or_critical)} new high/critical release-risk finding(s) in current scope.") if analysis_scope == "full_fallback": reasons.append("PR impact mapping fell back to full scan, so changed-file risk attribution is weaker.") + if hidden_fallback_finding_count: + reasons.append( + f"{hidden_fallback_finding_count} repo-wide finding(s) hidden from merge triage because they do not match changed files." + ) if analysis_scope == "full" and any(finding.severity in {"critical", "high"} for finding in findings): reasons.append("Full repository scan found high/critical release-risk signals.") if repository_support_state != "supported": @@ -212,7 +231,10 @@ def _decision_reasons( if summary.verification_pass_rate < 1.0: reasons.append(f"Verification pass rate is `{summary.verification_pass_rate:.0%}`.") if not findings: - reasons.append("No findings survived evidence, policy, suppression, and confidence filters.") + if analysis_scope == "full_fallback" and hidden_fallback_finding_count: + reasons.append("No changed-file release-risk finding survived fallback filters.") + else: + reasons.append("No findings survived evidence, policy, suppression, and confidence filters.") return reasons @@ -222,15 +244,18 @@ def build_merge_triage( *, summary: RunSummary, analysis_scope: AnalysisScope, + changed_files: set[str] | None = None, ) -> MergeTriage: - ranked = _rank_findings(findings.findings) + candidates = _triage_candidates(findings.findings, analysis_scope=analysis_scope, changed_files=changed_files) + hidden_fallback_finding_count = len(findings.findings) - len(candidates) + ranked = _rank_findings(candidates) actions = _budgeted_actions(ranked, test_plan) risk_score = _risk_score(ranked) new_high_or_critical_count = sum( - 1 for finding in findings.findings if finding.status == "new" and finding.severity in {"critical", "high"} + 1 for finding in candidates if finding.status == "new" and finding.severity in {"critical", "high"} ) decision = _resolve_decision( - findings.findings, + candidates, analysis_scope=analysis_scope, repository_support_state=summary.repository_support_state, effective_ci_mode=summary.effective_ci_mode, @@ -247,10 +272,11 @@ def build_merge_triage( verification_pass_rate=summary.verification_pass_rate, evidence_completeness=summary.evidence_completeness, reasons=_decision_reasons( - findings.findings, + candidates, analysis_scope=analysis_scope, repository_support_state=summary.repository_support_state, summary=summary, + hidden_fallback_finding_count=hidden_fallback_finding_count, ), actions=actions, generated_without_llm=findings.generated_without_llm and test_plan.generated_without_llm, diff --git a/tests/test_merge_triage.py b/tests/test_merge_triage.py index 0921d66..0661476 100644 --- a/tests/test_merge_triage.py +++ b/tests/test_merge_triage.py @@ -127,6 +127,46 @@ def test_merge_triage_full_scan_requires_review_for_high_risk() -> None: assert any("Full repository scan" in reason for reason in triage.reasons) +def test_merge_triage_full_fallback_focuses_changed_file_findings() -> None: + legacy_high = _finding( + "legacy_checkout", + severity="high", + status="unchanged", + rule_id="critical_path_no_tests", + ) + unrelated_new_high = _finding( + "schema_fixture", + severity="high", + status="new", + rule_id="critical_path_no_tests", + ) + changed_medium = _finding( + "app/service", + severity="medium", + status="new", + rule_id="pr_code_change_without_test_delta", + ) + changed_medium.source_ref = "app/service.py" + changed_medium.evidence_refs = ["app/service.py"] + + triage = build_merge_triage( + FindingsReport( + findings=[legacy_high, unrelated_new_high, changed_medium], + generated_without_llm=True, + ), + RiskTestPlan(generated_without_llm=True), + summary=RunSummary(verification_pass_rate=1.0, evidence_completeness=1.0), + analysis_scope="full_fallback", + changed_files={"app/service.py"}, + ) + + assert triage.decision == "review_required" + assert triage.risk_score < 100 + assert triage.new_high_or_critical_count == 0 + assert [action.finding_id for action in triage.actions] == [changed_medium.id] + assert any("repo-wide finding(s) hidden" in reason for reason in triage.reasons) + + def test_merge_triage_markdown_explains_test_first_order() -> None: finding = _finding("create_order", severity="high", confidence="high") triage = build_merge_triage( diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index f4d298b..9754eee 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1353,7 +1353,8 @@ def test_pr_baseline_status_and_only_new_summary(tmp_path: Path, write_file) -> assert result.summary.resolved_count == 1 assert result.summary.unchanged_count == 1 pr_summary = (ctx.output_dir / "pr_summary.md").read_text(encoding="utf-8") - assert "No findings in current PR scope." in pr_summary + assert "[high] [unchanged] `critical_path_no_tests`" in pr_summary + assert "- [high] `critical_path_no_tests` at `app/api.py:1`" in pr_summary def test_pr_baseline_matches_legacy_sha1_fingerprint_after_hash_migration(tmp_path: Path, write_file) -> None: diff --git a/tests/test_reports_generator.py b/tests/test_reports_generator.py index 35716cc..5bfea5d 100644 --- a/tests/test_reports_generator.py +++ b/tests/test_reports_generator.py @@ -1,11 +1,12 @@ from __future__ import annotations -from ai_risk_manager.reports.generator import build_pr_summary +from ai_risk_manager.reports.generator import build_pr_summary, render_report_md from ai_risk_manager.schemas.types import ( Finding, FindingsReport, Graph, MergeTriage, + MergeTriageAction, PipelineResult, PreflightResult, RunMetrics, @@ -21,6 +22,7 @@ def _finding( *, severity: Severity = "high", evidence_refs: list[str] | None = None, + status: str = "new", ) -> Finding: return Finding( id=f"{rule_id}:{source_ref}", @@ -34,7 +36,7 @@ def _finding( suppression_key=f"{rule_id}:{source_ref}", recommendation=f"fix {rule_id}", evidence_refs=evidence_refs or [source_ref], - status="new", + status=status, generated_without_llm=True, ) @@ -148,3 +150,67 @@ def test_pr_summary_keeps_repeated_findings_when_source_changed() -> None: top_dependency_count = sum(1 for finding in summary.top_findings if finding.rule_id == "dependency_risk_policy_violation") assert top_dependency_count == 3 + + +def test_pr_summary_includes_merge_triage_action_when_only_new_hides_unchanged_findings() -> None: + unchanged = _finding( + "agent_generated_test_missing_negative_path", + "tests/test_sse.py:242", + severity="high", + status="unchanged", + ) + result = _result([unchanged]) + result.summary.new_count = 0 + result.summary.unchanged_count = 1 + result.merge_triage.actions = [ + MergeTriageAction( + id="merge-triage:1", + finding_id=unchanged.id, + rule_id=unchanged.rule_id, + title=unchanged.title, + priority=unchanged.severity, + confidence=unchanged.confidence, + status=unchanged.status, + source_ref=unchanged.source_ref, + action=unchanged.recommendation, + rationale="Optional cleanup remains in a changed test file.", + estimated_minutes=5, + ) + ] + + summary = build_pr_summary( + result, + [], + only_new=True, + changed_files={"tests/test_sse.py"}, + ) + + assert summary.top_findings[0].status == "unchanged" + assert summary.top_actions[0].rule_id == "agent_generated_test_missing_negative_path" + + +def test_report_full_fallback_top_sections_follow_merge_triage_scope() -> None: + repo_wide = _finding("critical_path_no_tests", "server/app.js:48", severity="high") + changed_scope = _finding("pr_code_change_without_test_delta", "public/app.js", severity="medium") + result = _result([repo_wide, changed_scope]) + result.merge_triage.actions = [ + MergeTriageAction( + id="merge-triage:1", + finding_id=changed_scope.id, + rule_id=changed_scope.rule_id, + title=changed_scope.title, + priority=changed_scope.severity, + confidence=changed_scope.confidence, + status=changed_scope.status, + source_ref=changed_scope.source_ref, + action=changed_scope.recommendation, + rationale="Changed application code has no test delta.", + estimated_minutes=3, + ) + ] + + report = render_report_md(result, []) + top_sections = report.split("## Findings", maxsplit=1)[0] + + assert "public/app.js" in top_sections + assert "server/app.js" not in top_sections