-
Notifications
You must be signed in to change notification settings - Fork 5
Add AI-assisted triage #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3c5d04f
ff1d8de
008d883
3cb5ec6
14996da
940728a
ac1c44a
3f20940
4f1acf2
5851c2f
7041f85
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,13 @@ grade: stable | |
| apps: | ||
| startriage: | ||
| command: bin/startriage | ||
| environment: | ||
| # The Copilot runtime defaults COPILOT_HOME to ~/.copilot, a hidden path the | ||
| # `home` plug cannot write to. Point it at SNAP_USER_DATA, always writable. | ||
| COPILOT_HOME: $SNAP_USER_DATA/.copilot | ||
| # Expose staged ubuntu-dev-tools helpers (pull-lp-source, debdiff, …) on PATH | ||
| # so the agent's shell tool can pull and diff package source. | ||
| PATH: $SNAP/usr/bin:$SNAP/bin:$PATH | ||
| plugs: | ||
| - network | ||
| - network-bind | ||
|
|
@@ -39,3 +46,10 @@ parts: | |
| plugin: python | ||
| source: . | ||
| source-type: git | ||
| # github-copilot-sdk bundles the Copilot CLI runtime binary it spawns, so no | ||
| # separate Node part is needed; pip ships the runtime inside the snap. | ||
| python-packages: | ||
| - github-copilot-sdk | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's ok, for the textual only bug analysis. |
||
| stage-packages: | ||
| # pull-lp-source / dpkg-source / debdiff for the agent's source analysis. | ||
| - ubuntu-dev-tools | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should rather be set up inside the agent's analysis environment (the lxd container spinup i was proposing earlier). since we're running full shell command stuff instead of textual analysis only, we should containerize it properly and the ubuntu-dev-tools should then be within that container |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| """AI/agentic triage layer for startriage.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from .agent import BugOutcome, load_system_prompt, triage_bug, triage_bugs | ||
| from .contract import ( | ||
| AgentResult, | ||
| AgentResultError, | ||
| ProposedFix, | ||
| extract_json_block, | ||
| parse_agent_result, | ||
| ) | ||
| from .provider import ( | ||
| CopilotProvider, | ||
| FakeProvider, | ||
| Provider, | ||
| build_client_kwargs, | ||
| build_provider, | ||
| build_session_kwargs, | ||
| ) | ||
| from .render import append_report, render_report, report_filename, write_report | ||
| from .run import ( | ||
| gather_user_bug_payloads, | ||
| parse_bug_number, | ||
| payloads_from_tasks, | ||
| run_agent_on_payloads, | ||
| ) | ||
|
|
||
| __all__ = [ | ||
| "AgentResult", | ||
| "AgentResultError", | ||
| "BugOutcome", | ||
| "CopilotProvider", | ||
| "FakeProvider", | ||
| "ProposedFix", | ||
| "Provider", | ||
| "append_report", | ||
| "build_client_kwargs", | ||
| "build_provider", | ||
| "build_session_kwargs", | ||
| "extract_json_block", | ||
| "gather_user_bug_payloads", | ||
| "load_system_prompt", | ||
| "parse_agent_result", | ||
| "parse_bug_number", | ||
| "payloads_from_tasks", | ||
| "render_report", | ||
| "report_filename", | ||
| "run_agent_on_payloads", | ||
| "triage_bug", | ||
| "triage_bugs", | ||
| "write_report", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| """Sequential agent loop: run one triage session per bug, skip-and-continue. | ||
|
|
||
| The provider (see :mod:`startriage.ai.provider`) runs the agent and returns its | ||
| final text; this module loads the behavioural system prompt, feeds each bug's | ||
| payload as the user message, and parses the result via the contract. A failure on | ||
| one bug is recorded and the run continues with the next, never aborting the batch. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import json | ||
| import logging | ||
| from collections.abc import Callable | ||
| from dataclasses import dataclass | ||
| from importlib.resources import files | ||
|
|
||
| from .contract import AgentResult, AgentResultError, parse_agent_result | ||
| from .provider import Provider | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| @dataclass | ||
| class BugOutcome: | ||
| """Result of triaging a single bug: either a parsed result or a failure.""" | ||
|
|
||
| bug: str | ||
| result: AgentResult | None | ||
| error: str | None | ||
| raw: str | ||
|
|
||
| @property | ||
| def ok(self) -> bool: | ||
| return self.result is not None | ||
|
|
||
|
|
||
| def load_system_prompt() -> str: | ||
| """Load the agent behavioural prompt shipped as a package resource.""" | ||
| prompt_path = files("startriage") / "data" / "agents_prompt.md" | ||
| return prompt_path.read_text(encoding="utf-8") | ||
|
|
||
|
|
||
| def _log_outcome(outcome: BugOutcome) -> None: | ||
| """Emit a per-bug step log: the decision at -v, deeper detail at -vv.""" | ||
| if outcome.ok and outcome.result is not None: | ||
| result = outcome.result | ||
| logger.info( | ||
| "Bug %s → status=%s, tags=%s", | ||
| outcome.bug, | ||
| result.status.value, | ||
| ", ".join(result.tags) or "(none)", | ||
| ) | ||
| logger.debug("Bug %s proposed fix: %s", outcome.bug, result.proposed_fix.kind.value) | ||
| if result.thought_process: | ||
| logger.debug("Bug %s thought process: %s", outcome.bug, result.thought_process) | ||
| else: | ||
| logger.warning("Bug %s failed: %s", outcome.bug, outcome.error) | ||
|
|
||
|
|
||
| async def triage_bug( | ||
| provider: Provider, | ||
| payload: dict, | ||
| system_prompt: str, | ||
| ) -> BugOutcome: | ||
| """Run one agent session for ``payload`` and parse its result. | ||
|
|
||
| Never raises for triage/agent failures: any error is captured on the returned | ||
| :class:`BugOutcome` so the caller can record it and continue. | ||
| """ | ||
| bug = str(payload.get("number", "")) | ||
| user_message = json.dumps(payload, ensure_ascii=False) | ||
| logger.debug("Bug %s: sending %d-char payload to the agent", bug, len(user_message)) | ||
| try: | ||
| raw = await provider.run(system_prompt, user_message) | ||
| except Exception as exc: | ||
| # Record any provider/runtime failure and keep going (skip-and-continue). | ||
| return BugOutcome(bug=bug, result=None, error=f"provider error: {exc}", raw="") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the exception should be noted fully (including backtrace), swallowing it will make error fixing much harder. |
||
| logger.debug("Bug %s: received %d-char agent response", bug, len(raw)) | ||
| try: | ||
| result = parse_agent_result(raw) | ||
| except AgentResultError as exc: | ||
| return BugOutcome(bug=bug, result=None, error=str(exc), raw=raw) | ||
| return BugOutcome(bug=bug, result=result, error=None, raw=raw) | ||
|
|
||
|
|
||
| async def triage_bugs( | ||
| provider: Provider, | ||
| payloads: list[dict], | ||
| system_prompt: str | None = None, | ||
| *, | ||
| on_progress: Callable[[int, int, str], None] | None = None, | ||
| ) -> list[BugOutcome]: | ||
| """Triage ``payloads`` sequentially, recording per-bug failures and continuing. | ||
|
|
||
| ``on_progress`` (when given) is called as ``(index, total, bug)`` just before | ||
| each bug is sent to the agent, so a caller can drive a spinner/progress line. | ||
| """ | ||
| prompt = system_prompt if system_prompt is not None else load_system_prompt() | ||
| total = len(payloads) | ||
| outcomes: list[BugOutcome] = [] | ||
| for index, payload in enumerate(payloads, start=1): | ||
| bug = str(payload.get("number", "")) | ||
| if on_progress is not None: | ||
| on_progress(index, total, bug) | ||
| logger.info("Triaging bug %s (%d/%d)…", bug, index, total) | ||
| outcome = await triage_bug(provider, payload, prompt) | ||
| _log_outcome(outcome) | ||
| outcomes.append(outcome) | ||
| succeeded = sum(o.ok for o in outcomes) | ||
| logger.info("AI triage complete: %d succeeded, %d failed", succeeded, total - succeeded) | ||
| return outcomes | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| """Agent → tool result contract: the JSON each bug triage must return. | ||
|
|
||
| The Copilot CLI returns a free-text final assistant message, so the agent is | ||
| instructed to end with a single fenced ``json`` block. This module extracts that | ||
| block, parses it, and validates it against the schema in ``agents_prompt.md``. | ||
| Validation is enforced in code (status / fix-kind enums) so a hallucinated or | ||
| malformed result is rejected rather than trusted. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import json | ||
| import re | ||
|
|
||
| from pydantic import BaseModel, ConfigDict, ValidationError | ||
|
|
||
| from ..enums import ProposedFixKind, TriageStatus | ||
|
|
||
| # Matches fenced code blocks, optionally tagged with a language (e.g. ```json). | ||
| _FENCED_BLOCK = re.compile( | ||
| r"```[ \t]*([A-Za-z0-9_+-]*)[ \t]*\r?\n(.*?)\r?\n```", | ||
| re.DOTALL, | ||
| ) | ||
|
|
||
|
|
||
| class AgentResultError(ValueError): | ||
| """Raised when the agent's output cannot be parsed/validated as a result.""" | ||
|
|
||
|
|
||
| class ProposedFix(BaseModel): | ||
| model_config = ConfigDict(extra="forbid") | ||
|
|
||
| kind: ProposedFixKind | ||
| value: str = "" | ||
|
|
||
|
|
||
| class AgentResult(BaseModel): | ||
| """One bug's triage result, as returned by the agent and rendered by the tool.""" | ||
|
|
||
| # Tolerate extra keys: LLM output is noisy and harmless additions should not | ||
| # fail an otherwise-valid result. The fields below are still validated strictly. | ||
| model_config = ConfigDict(extra="ignore") | ||
|
|
||
| bug: str | ||
| package: str = "" | ||
| short_title: str = "" | ||
| status: TriageStatus | ||
| tags: list[str] = [] | ||
| analysis: str = "" | ||
| thought_process: str = "" | ||
| proposed_fix: ProposedFix | ||
| references: list[str] = [] | ||
| suggested_improvements: str = "" | ||
|
|
||
|
|
||
| def extract_json_block(text: str) -> str: | ||
| """Return the JSON payload of the last fenced block in ``text``. | ||
|
|
||
| Prefers a ```json-tagged block; falls back to the last untagged fenced block so | ||
| a missing language hint does not break parsing. Raises :class:`AgentResultError` | ||
| when no fenced block is present. | ||
| """ | ||
| matches = _FENCED_BLOCK.findall(text) | ||
| if not matches: | ||
| raise AgentResultError("no fenced code block found in agent output") | ||
|
|
||
| json_blocks = [body for lang, body in matches if lang.lower() == "json"] | ||
| if json_blocks: | ||
| return json_blocks[-1].strip() | ||
| # No language-tagged json block; use the last fenced block of any kind. | ||
| return matches[-1][1].strip() | ||
|
|
||
|
|
||
| def parse_agent_result(text: str) -> AgentResult: | ||
| """Extract, decode, and validate a single :class:`AgentResult` from agent text. | ||
|
|
||
| Raises :class:`AgentResultError` on a missing block, invalid JSON, or schema / | ||
| enum validation failure. | ||
| """ | ||
| block = extract_json_block(text) | ||
| try: | ||
| data = json.loads(block) | ||
| except json.JSONDecodeError as exc: | ||
| raise AgentResultError(f"agent output is not valid JSON: {exc}") from exc | ||
| if not isinstance(data, dict): | ||
| raise AgentResultError("agent JSON result must be an object") | ||
| try: | ||
| return AgentResult.model_validate(data) | ||
| except ValidationError as exc: | ||
| raise AgentResultError(f"agent result failed validation: {exc}") from exc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not just
triage-$date.md?