diff --git a/README.md b/README.md index e3fb590..bb77342 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,48 @@ startriage todo --subscribed Run `startriage triage --help` for the full option reference, including the bug flags legend. +## AI Triage (experimental) + +Inspect one or more Launchpad bugs, optionally running an AI agent over them. By +default `analyze` prints the bug metadata (status, tags, affected targets, +description, comments) so you can eyeball it — no provider or credentials +required. Add `--ai` to run the agent, which produces a suggested status, tags, +analysis, and (where applicable) a proposed fix. The agent never edits bugs and +never applies patches — it only prints its analysis. + +```bash +# Show metadata for one or more specific bugs (URL, NNNNNN, or #NNNNNN) +startriage analyze 2101234 '#2105678' + +# Run the AI agent over those bugs instead of just showing metadata +startriage analyze --ai 2101234 '#2105678' + +# Run the normal daily triage, then AI-triage every bug found +startriage triage --ai +``` + +The AI output is printed after the normal triage results. With `--markdown FILE` +the AI section is folded into that same report file (behind a clear "review +critically" notice) so you get a single cohesive document. + +Configure a provider first (credentials are written to the 0600 config, never +echoed): + +```bash +# Default provider: GitHub Copilot (needs a Copilot-enabled account) +startriage config set --ai-provider copilot --ai-github-token github_pat_... + +# Or bring your own key via an OpenAI-compatible provider (e.g. OpenRouter) +startriage config set --ai-provider openrouter \ + --ai-model anthropic/claude-opus-4.1 \ + --ai-openrouter-key sk-or-... +``` + +The Copilot token may also come from `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / +`GITHUB_TOKEN`, and the OpenRouter key from `OPENROUTER_API_KEY`. The snap bundles +the Copilot runtime and `ubuntu-dev-tools`, so source analysis works inside strict +confinement; from a git checkout install the extra with `uv sync --extra ai`. + ## Configuration adjust [the defaults](startriage/data/defaults.toml) with your user configuration file: diff --git a/pyproject.toml b/pyproject.toml index 81b415a..2a08dbb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,11 @@ dependencies = [ "platformdirs", ] +[project.optional-dependencies] +# Agentic AI triage. The SDK bundles the Copilot CLI runtime it spawns; kept +# optional so non-AI installs stay lean. The snap ships it (see snapcraft.yaml). +ai = ["github-copilot-sdk"] + [project.scripts] startriage = "startriage.__main__:main" @@ -35,7 +40,7 @@ build-backend = "setuptools.build_meta" [tool.setuptools] packages = {find = {where = ["."]}} -package-data = {"startriage" = ["data/*.toml"]} +package-data = {"startriage" = ["data/*.toml", "data/*.md"]} [tool.setuptools_scm] version_scheme = "only-version" diff --git a/snapcraft.yaml b/snapcraft.yaml index 73cda3a..4dc539c 100644 --- a/snapcraft.yaml +++ b/snapcraft.yaml @@ -29,6 +29,13 @@ grade: stable apps: startriage: command: bin/startriage + environment: + # The Copilot runtime defaults COPILOT_HOME to ~/.copilot, a hidden path the + # `home` plug cannot write to. Point it at SNAP_USER_DATA, always writable. + COPILOT_HOME: $SNAP_USER_DATA/.copilot + # Expose staged ubuntu-dev-tools helpers (pull-lp-source, debdiff, …) on PATH + # so the agent's shell tool can pull and diff package source. + PATH: $SNAP/usr/bin:$SNAP/bin:$PATH plugs: - network - network-bind @@ -39,3 +46,10 @@ parts: plugin: python source: . source-type: git + # github-copilot-sdk bundles the Copilot CLI runtime binary it spawns, so no + # separate Node part is needed; pip ships the runtime inside the snap. + python-packages: + - github-copilot-sdk + stage-packages: + # pull-lp-source / dpkg-source / debdiff for the agent's source analysis. + - ubuntu-dev-tools diff --git a/startriage/ai/__init__.py b/startriage/ai/__init__.py new file mode 100644 index 0000000..76e6af3 --- /dev/null +++ b/startriage/ai/__init__.py @@ -0,0 +1,62 @@ +"""AI/agentic triage layer for startriage.""" + +from __future__ import annotations + +from .agent import BugOutcome, load_system_prompt, triage_bug, triage_bugs +from .contract import ( + AgentResult, + AgentResultError, + ProposedFix, + extract_json_block, + parse_agent_result, +) +from .provider import ( + CopilotProvider, + FakeProvider, + Provider, + build_client_kwargs, + build_provider, + build_session_kwargs, +) +from .render import ( + append_report, + render_bug_metadata, + render_report, +) +from .run import ( + describe_bug_specs, + gather_user_bug_payloads, + parse_bug_number, + payloads_from_tasks, + run_agent_on_payloads, + run_ai_over_bug_specs, + run_ai_over_triage_results, +) + +__all__ = [ + "AgentResult", + "AgentResultError", + "BugOutcome", + "CopilotProvider", + "FakeProvider", + "ProposedFix", + "Provider", + "append_report", + "build_client_kwargs", + "build_provider", + "build_session_kwargs", + "describe_bug_specs", + "extract_json_block", + "gather_user_bug_payloads", + "load_system_prompt", + "parse_agent_result", + "parse_bug_number", + "payloads_from_tasks", + "render_bug_metadata", + "render_report", + "run_agent_on_payloads", + "run_ai_over_bug_specs", + "run_ai_over_triage_results", + "triage_bug", + "triage_bugs", +] diff --git a/startriage/ai/agent.py b/startriage/ai/agent.py new file mode 100644 index 0000000..ce868b2 --- /dev/null +++ b/startriage/ai/agent.py @@ -0,0 +1,112 @@ +"""Sequential agent loop: run one triage session per bug, skip-and-continue. + +The provider (see :mod:`startriage.ai.provider`) runs the agent and returns its +final text; this module loads the behavioural system prompt, feeds each bug's +payload as the user message, and parses the result via the contract. A failure on +one bug is recorded and the run continues with the next, never aborting the batch. +""" + +from __future__ import annotations + +import json +import logging +from collections.abc import Callable +from dataclasses import dataclass +from importlib.resources import files + +from .contract import AgentResult, AgentResultError, parse_agent_result +from .provider import Provider + +logger = logging.getLogger(__name__) + + +@dataclass +class BugOutcome: + """Result of triaging a single bug: either a parsed result or a failure.""" + + bug: str + result: AgentResult | None + error: str | None + raw: str + + @property + def ok(self) -> bool: + return self.result is not None + + +def load_system_prompt() -> str: + """Load the agent behavioural prompt shipped as a package resource.""" + prompt_path = files("startriage") / "data" / "agents_prompt.md" + return prompt_path.read_text(encoding="utf-8") + + +def _log_outcome(outcome: BugOutcome) -> None: + """Emit a per-bug step log: the decision at -v, deeper detail at -vv.""" + if outcome.ok and outcome.result is not None: + result = outcome.result + logger.info( + "Bug %s → status=%s, tags=%s", + outcome.bug, + result.status.value, + ", ".join(result.tags) or "(none)", + ) + logger.debug("Bug %s proposed fix: %s", outcome.bug, result.proposed_fix.kind.value) + if result.thought_process: + logger.debug("Bug %s thought process: %s", outcome.bug, result.thought_process) + else: + logger.warning("Bug %s failed: %s", outcome.bug, outcome.error) + + +async def triage_bug( + provider: Provider, + payload: dict, + system_prompt: str, +) -> BugOutcome: + """Run one agent session for ``payload`` and parse its result. + + Never raises for triage/agent failures: any error is captured on the returned + :class:`BugOutcome` so the caller can record it and continue. + """ + bug = str(payload.get("number", "")) + user_message = json.dumps(payload, ensure_ascii=False) + logger.debug("Bug %s: sending %d-char payload to the agent", bug, len(user_message)) + try: + raw = await provider.run(system_prompt, user_message) + except Exception as exc: + # Record any provider/runtime failure and keep going (skip-and-continue). + logger.warning("Bug %s: provider run failed", bug, exc_info=True) + return BugOutcome(bug=bug, result=None, error=f"provider error: {exc}", raw="") + logger.debug("Bug %s: received %d-char agent response", bug, len(raw)) + try: + result = parse_agent_result(raw) + except AgentResultError as exc: + return BugOutcome(bug=bug, result=None, error=str(exc), raw=raw) + return BugOutcome(bug=bug, result=result, error=None, raw=raw) + + +async def triage_bugs( + provider: Provider, + payloads: list[dict], + system_prompt: str | None = None, + *, + on_progress: Callable[[int, int, str], None] | None = None, +) -> list[BugOutcome]: + """Triage ``payloads`` sequentially, recording per-bug failures and continuing. + + ``on_progress`` (when given) is called as ``(index, total, bug)`` just before + each bug is sent to the agent, so a caller can drive a spinner/progress line. + """ + prompt = system_prompt if system_prompt is not None else load_system_prompt() + total = len(payloads) + outcomes: list[BugOutcome] = [] + for index, payload in enumerate(payloads, start=1): + bug = str(payload.get("number", "")) + if on_progress is not None: + on_progress(index, total, bug) + logger.info("Triaging bug %s (%d/%d)…", bug, index, total) + outcome = await triage_bug(provider, payload, prompt) + _log_outcome(outcome) + outcomes.append(outcome) + succeeded = sum(o.ok for o in outcomes) + logger.info("AI triage complete: %d succeeded, %d failed", succeeded, total - succeeded) + return outcomes diff --git a/startriage/ai/contract.py b/startriage/ai/contract.py new file mode 100644 index 0000000..2fb1c4e --- /dev/null +++ b/startriage/ai/contract.py @@ -0,0 +1,90 @@ +"""Agent → tool result contract: the JSON each bug triage must return. + +The Copilot CLI returns a free-text final assistant message, so the agent is +instructed to end with a single fenced ``json`` block. This module extracts that +block, parses it, and validates it against the schema in ``agents_prompt.md``. +Validation is enforced in code (status / fix-kind enums) so a hallucinated or +malformed result is rejected rather than trusted. +""" + +from __future__ import annotations + +import json +import re + +from pydantic import BaseModel, ConfigDict, ValidationError + +from ..enums import ProposedFixKind, TriageStatus + +# Matches fenced code blocks, optionally tagged with a language (e.g. ```json). +_FENCED_BLOCK = re.compile( + r"```[ \t]*([A-Za-z0-9_+-]*)[ \t]*\r?\n(.*?)\r?\n```", + re.DOTALL, +) + + +class AgentResultError(ValueError): + """Raised when the agent's output cannot be parsed/validated as a result.""" + + +class ProposedFix(BaseModel): + model_config = ConfigDict(extra="forbid") + + kind: ProposedFixKind + value: str = "" + + +class AgentResult(BaseModel): + """One bug's triage result, as returned by the agent and rendered by the tool.""" + + # Tolerate extra keys: LLM output is noisy and harmless additions should not + # fail an otherwise-valid result. The fields below are still validated strictly. + model_config = ConfigDict(extra="ignore") + + bug: str + package: str = "" + short_title: str = "" + status: TriageStatus + tags: list[str] = [] + analysis: str = "" + thought_process: str = "" + proposed_fix: ProposedFix + references: list[str] = [] + suggested_improvements: str = "" + + +def extract_json_block(text: str) -> str: + """Return the JSON payload of the last fenced block in ``text``. + + Prefers a ```json-tagged block; falls back to the last untagged fenced block so + a missing language hint does not break parsing. Raises :class:`AgentResultError` + when no fenced block is present. + """ + matches = _FENCED_BLOCK.findall(text) + if not matches: + raise AgentResultError("no fenced code block found in agent output") + + json_blocks = [body for lang, body in matches if lang.lower() == "json"] + if json_blocks: + return json_blocks[-1].strip() + # No language-tagged json block; use the last fenced block of any kind. + return matches[-1][1].strip() + + +def parse_agent_result(text: str) -> AgentResult: + """Extract, decode, and validate a single :class:`AgentResult` from agent text. + + Raises :class:`AgentResultError` on a missing block, invalid JSON, or schema / + enum validation failure. + """ + block = extract_json_block(text) + try: + data = json.loads(block) + except json.JSONDecodeError as exc: + raise AgentResultError(f"agent output is not valid JSON: {exc}") from exc + if not isinstance(data, dict): + raise AgentResultError("agent JSON result must be an object") + try: + return AgentResult.model_validate(data) + except ValidationError as exc: + raise AgentResultError(f"agent result failed validation: {exc}") from exc diff --git a/startriage/ai/provider.py b/startriage/ai/provider.py new file mode 100644 index 0000000..b2d1f7c --- /dev/null +++ b/startriage/ai/provider.py @@ -0,0 +1,159 @@ +"""Provider abstraction over the Copilot SDK for agentic triage. + +The Copilot CLI is itself the agent loop (built-in shell/file/web tools plus its +own tool-calling loop), so a "provider" is deliberately thin: it only starts a +session with the right auth/model and returns the agent's final assistant message. + +The only thing that differs between providers is *where* the credential goes: + +- **Copilot** authenticates the CLI process itself, so its GitHub token is a + ``CopilotClient(...)`` kwarg (see :func:`build_client_kwargs`). +- **OpenRouter** is BYOK through the same loop, supplied as the + ``create_session(provider=...)`` kwarg (see :func:`build_session_kwargs`). +""" + +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod +from typing import Any + +from ..config import AIConfig +from ..enums import AIProvider + +logger = logging.getLogger(__name__) + + +class Provider(ABC): + """A backend capable of running one agent session and returning its final text.""" + + #: Model id passed to the underlying session. + model: str + + @abstractmethod + async def run(self, system_prompt: str, user_message: str) -> str: + """Run a single agent session and return the final assistant text.""" + raise NotImplementedError + + +def build_client_kwargs(ai_config: AIConfig) -> dict[str, Any]: + """Build the ``CopilotClient(...)`` kwargs for ``ai_config``. + + For the Copilot provider this carries the GitHub token that authenticates the + CLI process (optional here — the SDK also reads it from the environment). For + OpenRouter (BYOK) the credential travels on the session instead, so no client + auth is needed. The token is resolved with config-over-env precedence via + :meth:`AIConfig.resolve_token`. + """ + match ai_config.provider: + case AIProvider.copilot: + token = ai_config.resolve_token() + if token: + return {"github_token": token} + return {} + case _: + return {} + + +def build_session_kwargs(ai_config: AIConfig) -> dict[str, Any]: + """Build the ``create_session(...)`` provider kwargs for ``ai_config``. + + Only OpenRouter (BYOK) contributes here, as an OpenAI-compatible ``provider`` + block; the Copilot provider authenticates at the client level instead. + """ + match ai_config.provider: + case AIProvider.openrouter: + return { + "provider": { + "type": "openai", + "base_url": ai_config.openrouter_base_url, + "api_key": ai_config.resolve_token(), + } + } + case _: + return {} + + +def _log_session_event(event: Any) -> None: + """Log a Copilot session step event at DEBUG (subscribed only under -vv).""" + event_type = getattr(event, "type", None) or type(event).__name__ + logger.debug("Copilot session event: %s", event_type) + + +class CopilotProvider(Provider): + """Real provider backed by the Copilot Python SDK (lazily imported). + + The SDK (and the Node Copilot CLI it spawns) is imported only when a session is + actually run, so non-AI commands and offline tests never need it installed. + All tools are auto-approved so unattended runs never block on a prompt; the + safety boundary is snap confinement plus a dedicated scratch dir, not an + allow-list. + """ + + def __init__(self, ai_config: AIConfig) -> None: + self._ai_config = ai_config + self.model = ai_config.model + + async def run(self, system_prompt: str, user_message: str) -> str: + # Lazy import keeps the SDK (and the Node CLI it spawns) optional; it is + # bundled by the snap rather than declared as a hard Python dependency. + from copilot import CopilotClient # ty: ignore[unresolved-import] + from copilot.session import PermissionHandler # ty: ignore[unresolved-import] + + logger.debug("Starting Copilot session (model=%s)", self.model) + async with CopilotClient(**build_client_kwargs(self._ai_config)) as client: + async with await client.create_session( + on_permission_request=PermissionHandler.approve_all, + model=self.model, + # "append" keeps the CLI's tool-use foundation and layers our + # behavioural prompt on top ("replace" would drop its guardrails). + system_message={"mode": "append", "content": system_prompt}, + **build_session_kwargs(self._ai_config), + ) as session: + # At -vv, stream the agent's step events (tool calls, reasoning) + # so unattended runs are auditable; cheap no-op otherwise. + if logger.isEnabledFor(logging.DEBUG): + session.on(_log_session_event) + # timeout=None waits until the agent is idle rather than aborting + # after the SDK's 60s default; triage turns routinely run longer + # (source pulls, debdiffs). The user can cancel with Ctrl-C. + message = await session.send_and_wait(user_message, timeout=None) + return (message.data.content or "") if message else "" + + +class FakeProvider(Provider): + """Deterministic in-memory provider for offline tests. + + Returns queued ``responses`` in order, falling back to ``default_response`` once + the queue is drained, and records every ``(system_prompt, user_message)`` call + on :attr:`calls` for assertions. + """ + + def __init__( + self, + responses: list[str] | None = None, + *, + model: str = "fake-model", + default_response: str = "", + ) -> None: + self.model = model + self._responses = list(responses or []) + self._default_response = default_response + self.calls: list[tuple[str, str]] = [] + + async def run(self, system_prompt: str, user_message: str) -> str: + self.calls.append((system_prompt, user_message)) + if self._responses: + return self._responses.pop(0) + return self._default_response + + +def build_provider(ai_config: AIConfig) -> Provider: + """Return a ready provider for ``ai_config``, validating credentials first. + + Raises :class:`AIConfigError` (via :meth:`AIConfig.require_configured`) when the + active provider has no usable credential, so callers fail smoothly before any + session is started. + """ + ai_config.require_configured() + return CopilotProvider(ai_config) diff --git a/startriage/ai/render.py b/startriage/ai/render.py new file mode 100644 index 0000000..98b4f8e --- /dev/null +++ b/startriage/ai/render.py @@ -0,0 +1,190 @@ +"""Render triage results into markdown. + +This is the tool side of the agent→tool contract: the agent only returns JSON, and +this module turns a batch of :class:`~startriage.ai.agent.BugOutcome` into markdown. +Proposed fixes are only *rendered* (a ``diff`` is shown in a fenced block, never +applied to any source tree), and per-bug failures are recorded so a skipped bug is +still visible in the report. The rendered markdown is returned to the caller, which +decides how to emit it (print to stdout or append to a triage markdown file). +""" + +from __future__ import annotations + +from datetime import date +from pathlib import Path +from typing import Any + +from ..enums import ProposedFixKind +from .agent import BugOutcome +from .contract import AgentResult, ProposedFix + +#: Heading + notice prepended when an AI report is appended to a triage markdown +#: file, to keep the AI-generated content clearly separated from the human report. +AI_APPEND_NOTICE = ( + "---\n\n" + "> **AI-generated triage aid.** The section below was produced by an automated " + "agent. Review it critically — do **not** paste it into the official triage " + "report verbatim.\n\n" +) + + +def _render_proposed_fix(fix: ProposedFix) -> str: + value = fix.value.strip() + if fix.kind is ProposedFixKind.none or not value: + return "_No fix proposed._" + if fix.kind is ProposedFixKind.reference: + return value + # kind == diff: render only; the tool never applies it to a source tree. + return f"```diff\n{value}\n```" + + +def _render_bug(result: AgentResult) -> str: + package = result.package or "unknown" + title = result.short_title or "(no title)" + tags = ", ".join(result.tags) if result.tags else "_none_" + + lines = [ + f"## LP #{result.bug} — {package} — {title}", + "", + f"**Suggested status:** {result.status.value}", + f"**Suggested tags:** {tags}", + "", + "### Analysis", + "", + result.analysis.strip() or "_No analysis provided._", + "", + "### Thought Process", + "", + result.thought_process.strip() or "_No thought process provided._", + "", + "### Proposed Fix", + "", + _render_proposed_fix(result.proposed_fix), + ] + if result.references: + lines += ["", "### References", ""] + lines += [f"- {ref}" for ref in result.references] + return "\n".join(lines) + + +def _render_failure(outcome: BugOutcome) -> str: + bug = outcome.bug or "(unknown)" + return "\n".join( + [ + f"## LP #{bug} — triage failed", + "", + f"**Error:** {outcome.error}", + ] + ) + + +def _render_suggested_improvements(results: list[AgentResult]) -> str | None: + """Aggregate non-empty, de-duplicated improvement notes across results.""" + seen: set[str] = set() + blocks: list[str] = [] + for result in results: + note = result.suggested_improvements.strip() + if note and note not in seen: + seen.add(note) + blocks.append(note) + if not blocks: + return None + return "\n\n".join(blocks) + + +def render_report(outcomes: list[BugOutcome], day: date | None = None) -> str: + """Render a full markdown report for ``outcomes``. + + Successful results render as per-bug sections; failures are recorded inline. + A trailing ``## Suggested Improvements`` section aggregates the agent's + self-improvement notes when any were returned. + """ + report_day = day or date.today() + sections = [f"# Automated triage — {report_day.isoformat()}"] + + results = [o.result for o in outcomes if o.result is not None] + + for outcome in outcomes: + if outcome.result is not None: + sections.append(_render_bug(outcome.result)) + else: + sections.append(_render_failure(outcome)) + + improvements = _render_suggested_improvements(results) + if improvements: + sections.append(f"## Suggested Improvements\n\n{improvements}") + + return "\n\n".join(sections) + "\n" + + +def _render_bug_metadata(payload: dict[str, Any]) -> str: + """Render one bug payload as human-readable metadata (no AI analysis).""" + number = payload.get("number") or "(unknown)" + title = payload.get("short_title") or payload.get("title") or "(no title)" + tags = payload.get("tags") or [] + + lines = [f"## LP #{number} — {title}", ""] + if payload.get("url"): + lines.append(f"**URL:** {payload['url']}") + lines.append(f"**Status:** {payload.get('status') or 'unknown'}") + lines.append(f"**Importance:** {payload.get('importance') or 'unknown'}") + if payload.get("heat") is not None: + lines.append(f"**Heat:** {payload['heat']}") + lines.append(f"**Tags:** {', '.join(tags) if tags else '_none_'}") + if payload.get("duplicate_of"): + lines.append(f"**Duplicate of:** LP #{payload['duplicate_of']}") + + affected = payload.get("affected") or [] + if affected: + lines += ["", "### Affected", ""] + for target in affected: + name = target.get("target") or "(unknown target)" + status = target.get("status") or "?" + importance = target.get("importance") or "?" + lines.append(f"- {name} — {status} ({importance})") + + description = (payload.get("description") or "").strip() + lines += ["", "### Description", "", description or "_No description._"] + + attachments = payload.get("attachments") or [] + if attachments: + lines += ["", "### Attachments", ""] + for att in attachments: + title_text = att.get("title") or "(untitled)" + suffix = " [patch]" if att.get("is_patch") else "" + lines.append(f"- {title_text} ({att.get('type') or 'unknown'}){suffix}") + + comments = payload.get("comments") or [] + if comments: + lines += ["", f"### Comments ({len(comments)})"] + for comment in comments: + author = comment.get("author") or "unknown" + when = comment.get("date") or "unknown date" + text = (comment.get("text") or "").strip() + lines += ["", f"**{author}** — {when}", "", text or "_(empty)_"] + + return "\n".join(lines) + + +def render_bug_metadata(payloads: list[dict[str, Any]]) -> str: + """Render bug payloads as human-readable metadata, without any AI analysis. + + This is what ``analyze`` prints when ``--ai`` is not given: the raw bug + context (status, tags, affected targets, description, comments) as gathered + for the agent, so a human can eyeball it before deciding to run the agent. + """ + sections = ["# Bug metadata"] + sections += [_render_bug_metadata(payload) for payload in payloads] + return "\n\n".join(sections) + "\n" + + +def append_report(path: Path, content: str) -> Path: + """Append an AI ``content`` report to an existing markdown file at ``path``. + + A horizontal rule and a notice (:data:`AI_APPEND_NOTICE`) are inserted first so + the AI-generated section is clearly separated from the human-written triage + report and is not mistaken for part of it. + """ + with path.open("a", encoding="utf-8") as fh: + fh.write("\n\n" + AI_APPEND_NOTICE + content) + return path diff --git a/startriage/ai/run.py b/startriage/ai/run.py new file mode 100644 index 0000000..0b78c21 --- /dev/null +++ b/startriage/ai/run.py @@ -0,0 +1,201 @@ +"""End-to-end orchestration that wires the AI layer to the CLI. + +Two entry points feed bugs to the agent and render a report: + +- :func:`gather_user_bug_payloads` resolves user-supplied bug specs (URL, + ``NNNNNN`` or ``#NNNNNN``) into agent payloads (``analyze``). +- :func:`payloads_from_tasks` turns already-fetched triage tasks into payloads + (``triage --ai``). + +Both hand their payloads to :func:`run_agent_on_payloads`, which runs the agent +sequentially and returns the rendered markdown; emitting it (printing or folding +it into a triage markdown file) is left to the caller. :func:`describe_bug_specs` +shares the same gather step but only renders the raw bug metadata (``analyze`` +without ``--ai``). Launchpad access is lazily imported inside the gather helpers +so non-AI commands and offline tests never pull in launchpadlib. +""" + +from __future__ import annotations + +import asyncio +import logging +import re +from typing import TYPE_CHECKING, Any + +from ..config import StarTriageConfig +from ..spinner import Spinner +from .agent import load_system_prompt, triage_bugs +from .provider import Provider, build_provider +from .render import render_bug_metadata, render_report + +if TYPE_CHECKING: + from collections.abc import Sequence + + from ..output import TriageResult + from ..sources.launchpad.models import Task + +logger = logging.getLogger(__name__) + +# A bare bug number, optionally ``#``-prefixed. +_BARE_BUG = re.compile(r"^#?(\d+)$") +# A genuine Launchpad bug reference inside a URL (``.../+bug/`` or ``.../bugs/``). +_URL_BUG = re.compile(r"launchpad\.net/(?:.*/)?(?:\+bug|bugs)/(\d+)", re.IGNORECASE) + + +def parse_bug_number(spec: str) -> str: + """Extract a Launchpad bug number from a bare ``NNNNNN``, ``#NNNNNN`` or LP URL. + + Only real Launchpad bug references are accepted. Arbitrary URLs or text that + merely happen to contain digits (e.g. ``https://example.com/pages/3133742``) + raise :class:`ValueError` rather than silently resolving to a wrong number. + """ + spec = spec.strip() + bare = _BARE_BUG.match(spec) + if bare: + return bare.group(1) + url = _URL_BUG.search(spec) + if url: + return url.group(1) + raise ValueError(f"could not parse a Launchpad bug number from {spec!r}") + + +def gather_user_bug_payloads(bug_specs: list[str]) -> list[dict[str, Any]]: + """Resolve user-supplied bug specs into agent payloads (blocking LP access).""" + from ..sources.launchpad.finder import connect_launchpad + from ..sources.launchpad.models import Task + + lp = connect_launchpad() + payloads: list[dict[str, Any]] = [] + seen: set[str] = set() + for spec in bug_specs: + number = parse_bug_number(spec) + if number in seen: + continue + seen.add(number) + try: + bug_tasks = list(lp.bugs[number].bug_tasks) + if not bug_tasks: + logger.warning("Skipping bug %s: no bug tasks found", number) + continue + task = Task(bug_tasks[0], subscribed=False, last_activity_ours=False) + payloads.append(task.to_agent_payload()) + except Exception: + logger.warning("Skipping bug %s: failed to fetch", number, exc_info=True) + return payloads + + +def payloads_from_tasks(tasks: list[Task]) -> list[dict[str, Any]]: + """Build agent payloads from already-fetched tasks (blocking LP access). + + Tasks are de-duplicated by bug number so a bug with multiple affected + targets is triaged once. + """ + payloads: list[dict[str, Any]] = [] + seen: set[str] = set() + for task in tasks: + if task.number in seen: + continue + seen.add(task.number) + try: + payloads.append(task.to_agent_payload()) + except Exception: + logger.warning("Skipping bug %s: failed to build payload", task.number, exc_info=True) + return payloads + + +def _make_spinner(total: int) -> Spinner: + """Return a status spinner for the triage run. + + The spinner is a no-op when stderr is not a TTY (piped/CI), so callers can + always use it unconditionally; that TTY handling lives in :class:`Spinner`. + """ + noun = "bug" if total == 1 else "bugs" + return Spinner(set(), status=f"Preparing to triage {total} {noun}…") + + +async def run_agent_on_payloads( + config: StarTriageConfig, + payloads: list[dict[str, Any]], + *, + provider: Provider | None = None, +) -> str | None: + """Run the agent over ``payloads`` and return the rendered markdown report. + + Returns the markdown string, or ``None`` when there is nothing to triage. + Emitting the report (printing it or folding it into a triage markdown file) + is left to the caller. When ``provider`` is omitted it is built from + ``config`` (validating credentials, which may raise + :class:`~startriage.config.AIConfigError`). + """ + if not payloads: + logger.info("No bugs to triage with the AI agent.") + return None + + if provider is None: + provider = build_provider(config.ai) + + system_prompt = load_system_prompt() + spinner = _make_spinner(len(payloads)) + + def on_progress(index: int, total: int, bug: str) -> None: + label = f"LP #{bug}" if bug else "bug" + spinner.set_status(f"Triaging {label} ({index}/{total})…") + + async with spinner: + outcomes = await triage_bugs(provider, payloads, system_prompt, on_progress=on_progress) + + return render_report(outcomes) + + +async def run_ai_over_bug_specs( + config: StarTriageConfig, + bug_specs: list[str], + *, + provider: Provider | None = None, +) -> str | None: + """Resolve user-supplied bug specs and run the agent, returning the report. + + Returns the rendered markdown, or ``None`` when no valid bug could be + resolved from ``bug_specs``. Launchpad access runs off-thread so the async + event loop is not blocked. + """ + payloads = await asyncio.to_thread(gather_user_bug_payloads, bug_specs) + if not payloads: + return None + return await run_agent_on_payloads(config, payloads, provider=provider) + + +async def describe_bug_specs(bug_specs: list[str]) -> str | None: + """Resolve user-supplied bug specs and render their metadata (no AI agent). + + Returns the rendered markdown, or ``None`` when no valid bug could be + resolved from ``bug_specs``. Launchpad access runs off-thread so the async + event loop is not blocked. + """ + payloads = await asyncio.to_thread(gather_user_bug_payloads, bug_specs) + if not payloads: + return None + return render_bug_metadata(payloads) + + +async def run_ai_over_triage_results( + config: StarTriageConfig, + results: Sequence[tuple[str, TriageResult]], + *, + provider: Provider | None = None, +) -> str | None: + """Run the agent over the Launchpad tasks gathered by a normal triage run. + + Returns the rendered markdown, or ``None`` when there are no Launchpad tasks + to triage. Launchpad payload construction runs off-thread. + """ + from ..sources.launchpad.triage import LaunchpadTriage + + tasks: list[Task] = [] + for _, result in results: + if isinstance(result, LaunchpadTriage): + tasks = list(result.tasks.tasks) + break + + payloads = await asyncio.to_thread(payloads_from_tasks, tasks) + return await run_agent_on_payloads(config, payloads, provider=provider) diff --git a/startriage/cli.py b/startriage/cli.py index 57ddb84..be801be 100644 --- a/startriage/cli.py +++ b/startriage/cli.py @@ -7,10 +7,18 @@ import sys from datetime import datetime, timedelta, timezone from pathlib import Path - -from .config import DEFAULT_USER_CONFIG, StarTriageConfig, load_config, resolve_team_name, update_user_config +from typing import TYPE_CHECKING + +from .config import ( + DEFAULT_USER_CONFIG, + AIConfigError, + StarTriageConfig, + load_config, + resolve_team_name, + update_user_config, +) from .dates import parse_interval, triage_task_date_range -from .enums import UpdateFilter +from .enums import AIProvider, UpdateFilter from .log import log_setup from .output import OutputConfig, OutputFormat from .savebugs import BugPersistor, SaveConfig @@ -18,6 +26,9 @@ from .sources.github.auth import _run_github_login from .triage import SOURCES, resolve_sources, run_todo, run_triage +if TYPE_CHECKING: + from .ai import Provider + def _build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( @@ -167,6 +178,14 @@ def _build_parser() -> argparse.ArgumentParser: metavar="DAYS", help="Minimum days of being stuck in proposed to be included in triage", ) + triage_p.add_argument( + "--ai", + action="store_true", + help=( + "Also run AI triage on every bug found. The AI section is printed after the " + "normal output, or with --markdown folded into that single report file" + ), + ) triage_p.set_defaults(func=_run_triage) # --- todo --- @@ -183,6 +202,24 @@ def _build_parser() -> argparse.ArgumentParser: todo_p.add_argument("-C", "--compare", metavar="PATH", help="Set path to saved file to compare bugs to") todo_p.set_defaults(func=_run_todo) + # --- analyze --- + analyze_p = sp.add_parser( + "analyze", + help="Show metadata for one or more Launchpad bugs (add --ai to run the agent)", + ) + analyze_p.add_argument( + "bug", + nargs="+", + metavar="BUG", + help="Launchpad bug to analyze: full URL, NNNNNN, or #NNNNNN", + ) + analyze_p.add_argument( + "--ai", + action="store_true", + help="Run the AI agent over the bug(s) and write a report instead of just showing metadata", + ) + analyze_p.set_defaults(func=_run_analyze) + # --- config --- config_p = sp.add_parser("config", help="Manage configuration") config_sp = config_p.add_subparsers(required=True) @@ -208,6 +245,37 @@ def _build_parser() -> argparse.ArgumentParser: "Alternatively set the GITHUB_TOKEN environment variable." ), ) + config_setdefaults_p.add_argument( + "--ai-provider", + choices=AIProvider, + help="Set AI triage provider in config (ai.provider)", + ) + config_setdefaults_p.add_argument( + "--ai-model", + metavar="MODEL", + help="Set AI triage model in config (ai.model)", + ) + config_setdefaults_p.add_argument( + "--ai-github-token", + metavar="TOKEN", + help=( + "Set Copilot GitHub token in config (ai.github_token). " + "Alternatively set the COPILOT_GITHUB_TOKEN environment variable." + ), + ) + config_setdefaults_p.add_argument( + "--ai-openrouter-key", + metavar="KEY", + help=( + "Set OpenRouter API key in config (ai.openrouter_api_key). " + "Alternatively set the OPENROUTER_API_KEY environment variable." + ), + ) + config_setdefaults_p.add_argument( + "--ai-openrouter-base-url", + metavar="URL", + help="Set OpenRouter base URL in config (ai.openrouter_base_url)", + ) config_setdefaults_p.set_defaults(func=_set_config_settings) config_show_p = config_sp.add_parser("show", help="Display resolved configuration") @@ -294,6 +362,15 @@ async def _run() -> None: async def _run_triage(args: argparse.Namespace, config: StarTriageConfig) -> None: + provider = None + output_cfg = _outputcfg_from_args(args) + if args.ai: + # Validate AI credentials up-front so a misconfig fails before the (slow) + # normal triage run rather than after it. + provider = _build_ai_provider(config) + if provider is None: + return + filter = _filter_from_args(config, args) team = config.get_team(filter.team) if args.no_ignore_list: @@ -310,8 +387,14 @@ async def _run_triage(args: argparse.Namespace, config: StarTriageConfig) -> Non general = general.model_copy(update={"proposed_min_age": args.proposed_min_age}) config.general = general - output_cfg = _outputcfg_from_args(args) - await run_triage(config, filter, output_cfg) + results = await run_triage(config, filter, output_cfg) + + if args.ai: + from .ai import run_ai_over_triage_results + + report = await run_ai_over_triage_results(config, results, provider=provider) + if report is not None: + _emit_ai_report(report, output_cfg.markdown_path) async def _run_todo(args: argparse.Namespace, config: StarTriageConfig) -> None: @@ -337,6 +420,59 @@ async def _run_todo(args: argparse.Namespace, config: StarTriageConfig) -> None: ) +def _build_ai_provider(config: StarTriageConfig) -> Provider | None: + """Build the AI provider, printing a friendly hint and returning None on misconfig.""" + from .ai import build_provider + + try: + return build_provider(config.ai) + except AIConfigError as exc: + print(f"error: {exc}", file=sys.stderr) + return None + + +def _emit_ai_report(report: str, markdown_path: Path | None) -> None: + """Emit an AI ``report`` for a ``triage --ai`` run. + + With ``--markdown`` the AI section is appended (behind a review notice) to + that same file, so the human triage report and the AI aid live in one + cohesive document. Without ``--markdown`` the report is printed to stdout, + together with the normal triage output that already went there. + """ + from .ai import append_report + from .ai.render import AI_APPEND_NOTICE + + if markdown_path is not None: + append_report(markdown_path, report) + print(f"AI triage appended to {markdown_path}") + else: + print("\n" + AI_APPEND_NOTICE + report) + + +async def _run_analyze(args: argparse.Namespace, config: StarTriageConfig) -> None: + if not args.ai: + from .ai import describe_bug_specs + + report = await describe_bug_specs(args.bug) + if report is None: + print("No valid bugs found.", file=sys.stderr) + return + print(report) + return + + from .ai import run_ai_over_bug_specs + + provider = _build_ai_provider(config) + if provider is None: + return + + report = await run_ai_over_bug_specs(config, args.bug, provider=provider) + if report is None: + print("No valid bugs to triage.", file=sys.stderr) + return + print(report) + + async def _set_config_settings(args: argparse.Namespace, _config: StarTriageConfig) -> None: updates: dict[str, dict] = {} @@ -358,12 +494,24 @@ async def _set_config_settings(args: argparse.Namespace, _config: StarTriageConf updates.setdefault("general", {})["proposed_min_age"] = args.proposed_min_age if args.github_token is not None: updates.setdefault("general", {})["github_token"] = args.github_token + if args.ai_provider is not None: + updates.setdefault("ai", {})["provider"] = str(args.ai_provider) + if args.ai_model is not None: + updates.setdefault("ai", {})["model"] = args.ai_model + if args.ai_github_token is not None: + updates.setdefault("ai", {})["github_token"] = args.ai_github_token + if args.ai_openrouter_key is not None: + updates.setdefault("ai", {})["openrouter_api_key"] = args.ai_openrouter_key + if args.ai_openrouter_base_url is not None: + updates.setdefault("ai", {})["openrouter_base_url"] = args.ai_openrouter_base_url if not updates: print("No settings to update.") return - sensitive = "github_token" in updates.get("general", {}) + sensitive = "github_token" in updates.get("general", {}) or bool( + {"github_token", "openrouter_api_key"} & updates.get("ai", {}).keys() + ) path = update_user_config(updates, config_path=args.config, sensitive=sensitive) print(f"Settings saved to {path!r}") diff --git a/startriage/config.py b/startriage/config.py index 11e1ab9..d63c9c4 100644 --- a/startriage/config.py +++ b/startriage/config.py @@ -11,7 +11,7 @@ import tomli_w from pydantic import BaseModel, ConfigDict, field_validator, model_validator -from .enums import UpdateFilter +from .enums import AIProvider, UpdateFilter def default_config_path() -> Path: @@ -23,6 +23,61 @@ def default_config_path() -> Path: DEFAULT_USER_CONFIG = default_config_path() +# Environment variables consulted for AI credentials, in priority order. +# Copilot mirrors the GitHub Copilot SDK's own precedence. +COPILOT_TOKEN_ENV_VARS = ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN") +OPENROUTER_KEY_ENV_VARS = ("STARTRIAGE_AI_OPENROUTER_KEY", "OPENROUTER_API_KEY") + + +def _first_env(names: tuple[str, ...]) -> str | None: + """Return the first non-empty value among the given environment variables.""" + for name in names: + value = os.environ.get(name) + if value: + return value + return None + + +class AIConfigError(Exception): + """Raised when the [ai] section lacks the credentials required to run.""" + + +class AIConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + provider: AIProvider = AIProvider.copilot + model: str = "claude-opus-4.8" + # Copilot auth (or rely on COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN env). + github_token: str | None = None + # OpenRouter (BYOK) auth. + openrouter_api_key: str | None = None + openrouter_base_url: str = "https://openrouter.ai/api/v1" + + def resolve_token(self) -> str | None: + """Return the effective credential for the active provider. + + Config values take precedence over environment variables. + """ + if self.provider is AIProvider.copilot: + return self.github_token or _first_env(COPILOT_TOKEN_ENV_VARS) + return self.openrouter_api_key or _first_env(OPENROUTER_KEY_ENV_VARS) + + def require_configured(self) -> None: + """Raise AIConfigError with a friendly hint when no credential is available.""" + if self.resolve_token(): + return + if self.provider is AIProvider.copilot: + raise AIConfigError( + "No Copilot credential configured. Run " + "'startriage config set --ai-github-token ' or set the " + "COPILOT_GITHUB_TOKEN environment variable." + ) + raise AIConfigError( + "No OpenRouter API key configured. Run " + "'startriage config set --ai-openrouter-key ' or set the " + "OPENROUTER_API_KEY environment variable." + ) + class GeneralConfig(BaseModel): model_config = ConfigDict(extra="forbid") @@ -86,6 +141,7 @@ class StarTriageConfig(BaseModel): model_config = ConfigDict(extra="forbid") general: GeneralConfig = GeneralConfig() + ai: AIConfig = AIConfig() team: dict[str, TeamConfig] = {} loaded_paths: list[Path] = [] @@ -98,9 +154,11 @@ def get_team(self, name: str) -> TeamConfig: raise KeyError(f"Unknown team '{name}'. Available teams: {available}") from None def show(self) -> str: - data: dict = {"general": {}, "team": {}} + data: dict = {"general": {}, "ai": {}, "team": {}} for field, value in self.general.model_dump(exclude_none=True).items(): data["general"][field] = value + for field, value in self.ai.model_dump(exclude_none=True).items(): + data["ai"][field] = value for team_name, team in self.team.items(): data["team"][team_name] = team.model_dump(exclude_none=True) @@ -184,6 +242,9 @@ def load_config(user_config_path: Path | None) -> StarTriageConfig: # Merge general section merged_general = {**defaults.get("general", {}), **user.get("general", {})} + # Merge ai section (user overrides defaults field-by-field) + merged_ai = {**defaults.get("ai", {}), **user.get("ai", {})} + # Merge team sections field-by-field so a sparse user section doesn't lose defaults default_teams = defaults.get("team", {}) user_teams = user.get("team", {}) @@ -193,7 +254,7 @@ def load_config(user_config_path: Path | None) -> StarTriageConfig: } return StarTriageConfig.model_validate( - {"general": merged_general, "team": merged_teams, "loaded_paths": loaded_paths} + {"general": merged_general, "ai": merged_ai, "team": merged_teams, "loaded_paths": loaded_paths} ) diff --git a/startriage/data/agents_prompt.md b/startriage/data/agents_prompt.md new file mode 100644 index 0000000..c2d4216 --- /dev/null +++ b/startriage/data/agents_prompt.md @@ -0,0 +1,213 @@ +# Role + +You are a Senior software engineer working for Ubuntu, responsible for triaging bugs so other engineers work on it if needed. Your job is not to fix everything, but rather filter out and point engineers to good resources about the bug and give them options on how to proceed. You get a list of bugs as input and iterate through them, performing the actions below. + +# Actions + +Perform these actions in order for each bug. Use the results of earlier steps to inform later ones. + +## 1. Validate the Report + +Check the following criteria: +- Does the report identify at least one specific source package? +- Does the described problem target the correct package? (e.g., is the user blaming package A when the fault is in package B?) +- Does it describe a specific fault, error, or incorrect behavior? + +For feature requests are valid but need to be flagged so, and does not need to be triaged further once it makes sense. To validate a feature request, consider: +- Is it available on a new version? +- Does it exist already or needs to be implemented? +- Is it suitable for upstreaming? +- Just a simple flag change or bigger effort? + +If validation fails: +- Missing information (no package, no version, no reproduction steps) → recommend status **Incomplete**. Specify what information is needed. +- Not a bug (support request, expected behavior, configuration error, unsupported setup) → recommend status **Invalid**. Explain why. +- Process ticket (sync request, merge request, SRU, MIR, freeze exception) → recommend **no-change**. These are tracked separately. + +If validation passes, proceed to step 2. + +## 2. Search for Duplicates and Existing Fixes + +Perform these searches in parallel: + +### 2.1 Search Launchpad for duplicates +- URL pattern: `https://bugs.launchpad.net/ubuntu/+source/SOURCE_PACKAGE/+bugs?field.searchtext=SEARCH_TERMS` +- Look for bugs with matching symptoms. If a duplicate is found, recommend marking the current bug as a duplicate of the older/better-reported one. + +### 2.2 Search Debian for related bugs or fixes +- URL pattern: `https://bugs.debian.org/cgi-bin/pkgreport.cgi?archive=both;src=SOURCE_PACKAGE` +- Look for matching bugs. If a fix exists in Debian, note the Debian bug number and the fix (patch, version, or commit). + +If either search finds a clear solution (existing duplicate, or fix already in Debian), note it and proceed to step 5 (Describe the Bug). + +If it does not, still give us the references so we may use it as part of the final decision. + +## 3. Search Upstream + +- Determine the upstream repository, homepage, and bug tracker. Sources (in order): + the bug's affected source package page on Launchpad, the package's `debian/control` + `Homepage:` field, and `debian/watch`. (A package-metadata cache is planned for a + future iteration but is NOT available yet — do not rely on it.) +- Search the upstream bug tracker and/or git repository for matching issues or commits. +- If the upstream project uses GitHub, GitLab, or similar, search the issues and recent commits. + +If a fix is found upstream, note the commit hash or issue URL and proceed to step 5. +If it does not, still give us the references so we may use it as part of the final decision. + +## 4. Search Other Distributions + +If steps 2-3 did not yield a solution, search other distributions. Prioritize in this order: + +- **Fedora**: `https://bugzilla.redhat.com/buglist.cgi?query_format=specific&order=relevance+desc&bug_status=__open__&product=Fedora&content=SEARCH_TERMS` +- **Arch Linux**: `https://gitlab.archlinux.org/archlinux/packaging/packages/PACKAGE_NAME/-/issues` or `https://bugs.archlinux.org/` (for legacy bugs) +- **Gentoo**: `https://bugs.gentoo.org/buglist.cgi?query_format=specific&order=relevance+desc&bug_status=__open__&content=SEARCH_TERMS` +- Other distros may be searched if the above yield nothing. + +## 5. Describe the Bug + +Write a structured description containing: +- **Affected package(s):** source package name(s) +- **Affected version(s):** package version and Ubuntu release(s) +- **Symptoms:** what goes wrong (error messages, crashes, incorrect output) +- **Reproduction steps:** how to trigger the bug (if known). If feasible, these steps should work from a clean LXD container or VM. +- **Impact:** who is affected and how severely (data loss? service interruption? cosmetic?) +- **Related bugs:** LP duplicates, Debian bugs, upstream issues found in steps 2-4 + +## 6. Analyze the Source Code + +If the bug appears valid and no fix was found in steps 2-4: +- Obtain the source code. Methods (in order of preference): + + 1. Download the source from launchpad directly using pull-lp-source, from the ubuntu-dev-tools package. This can fetch specific versions from specific releases. + +If it doesnt work for any reason (unexpected) + + 2. Find the upstream repository from the `debian/watch` file or `debian/control` Homepage field and clone/inspect it. +- Search for the code responsible for the reported error (grep for error messages, function names, etc.). +- Identify the offending lines and explain the root cause. + +## 7. Propose a Fix + +- If a fix was found in steps 2-4 (Debian patch, upstream commit, other distro patch), reference it and confirm it applies to the affected Ubuntu source version. +- If no existing fix was found but the root cause is clear from step 6, write a proposed fix as a unified diff. +- The proposed fix goes ONLY in your returned result (the `proposed_fix` field). Do not apply it to any source tree. +- If you cannot produce a fix with reasonable confidence, set `proposed_fix.kind` to `none`. + +# Context + +## Reference documentation +- Ubuntu Maintainers Handbook — Bug Triage: https://github.com/canonical/ubuntu-maintainers-handbook/blob/main/BugTriage.md + +## Bug statuses +When recommending a status change, use one of these: +- **Invalid**: the report is not a bug, or the issue is already fixed in the reported version(s). +- **Incomplete**: more information is needed from the reporter before the bug can be acted on. +- **Triaged**: the bug is valid and reproducible; there may or may not be a known fix. +- Duplicate: self explanatory +- **no-change** (not a Launchpad status): leave the bug as-is in its current status ("New", “Confirmed”, etc). This means the bug needs more engineering input beyond what this triage can provide. + +## Optional tags +- `server-todo`: the bug has a known fix or very high priority. The team should work on it soon. +- `bitesize`: the bug is actionable and the fix is straightforward (e.g., a patch is already available upstream or in Debian and applies cleanly). +- `server-triage-discuss`: the bug is ambiguous and should be discussed by the team in the next standup or weekly meeting. +- `regression-update`: the bug appears to be a regression caused by an SRU or security update. + +## Definitions +- **Debdiff**: a unified diff between two versions of a Debian/Ubuntu source package, generated by `debdiff old.dsc new.dsc`. It shows all changes between the two versions. +- **SRU**: Stable Release Update — a bug fix backported to a stable (non-development) Ubuntu release. +- **MIR**: Main Inclusion Request — a request to promote a package from Universe to Main. + +## Package cache +A per-package metadata cache is planned for a future iteration but is NOT available +yet. For now, derive upstream/Debian/homepage information from the bug's affected +source package, the package's `debian/control` `Homepage:` field, and `debian/watch`. + +## Special cases to be aware of +Certain packages have known triaging patterns (from the handbook): +- **MySQL**: check for duplicates first; many reports are common usage errors. Check `mysql-8.0` bugs sorted by heat. +- **libvirt/virtualization**: "permission denied" issues are often caused by AppArmor profiles applied by libvirt. Ask for `dmesg` AppArmor denials. + +# Expectation + +## Workflow + +For each bug in the input: + +1. **Validate** the report (Action step 1). If invalid or incomplete, record the status recommendation and stop processing this bug. +2. **Search** for duplicates and existing fixes (Action steps 2-4). Stop searching as soon as a feasible solution is found. +3. **Describe** the bug (Action step 5). +4. If the bug is valid and actionable: + a. **Analyze** the source code (Action step 6). + b. **Propose** a fix if possible (Action step 7). + +## Output + +You do NOT write any files. For each bug, **return a single JSON object** with this +exact schema (the surrounding tool renders it into the triage report): + +```json +{ + "bug": "NNNNNN", + "package": "", + "short_title": "", + "status": "Invalid|Incomplete|Triaged|Duplicate|no-change", + "tags": ["server-todo", "bitesize", "server-triage-discuss", "regression-update"], + "analysis": "", + "thought_process": "", + "proposed_fix": {"kind": "none|reference|diff", "value": ""}, + "references": [""] +} +``` + +Field guidance: +- `tags`: use an empty list `[]` when none apply. +- `analysis`: if Invalid/Incomplete, explain the reason for the status. If + Triaged/no-change/Duplicate, describe the bug, root-cause analysis, and any + related bugs/patches found. +- `thought_process`: summarize the investigation steps and reasoning, including + which searches were performed and what was or was not found. +- `proposed_fix`: `kind = "reference"` with a URL/commit when an existing fix was + found; `kind = "diff"` with a unified diff only when you generated one; otherwise + `kind = "none"`. + +## When in doubt + +If you cannot confidently determine the correct status or whether a fix applies, recommend **no-change** and add a note explaining the uncertainty. Suggest the `server-triage-discuss` tag so the team can review it. + +# Constraints + +1. **You do not write output files.** Return the JSON result described in Output; + the surrounding tool writes the report. You may freely use your shell/file tools + for *investigation* (e.g. `pull-lp-source`, `grep`, cloning upstream repos) inside + your scratch working directory. +2. **No hallucinated fixes.** If you cannot produce a fix with confidence that it is correct, set `proposed_fix.kind` to `none`. Do not invent plausible-looking patches. +3. **Verify referenced commits and URLs.** Before citing an upstream commit, patch, or issue URL, confirm it actually resolves (fetch/HEAD it, or inspect the repository). Do not cite a commit hash or link you have not confirmed exists — fabricated commits that 404 are worse than none. +4. **No patch application.** Do not generate or apply quilt patches. Do not modify any package source tree as a deliverable. Proposed fixes are returned as a unified diff in the `proposed_fix` field only. +5. **Read-only external access.** Do not post comments on bugs, change bug statuses, subscribe teams, or modify any external system. Your output is recommendations only; a human engineer will act on them. +6. **No speculation on internal architecture.** If you don't have enough information about a package's internals, say so rather than guessing. + +# Assumptions + +1. The bug reporter is not necessarily a software engineer. They may be facing a configuration issue, using an unsupported setup (e.g., third-party packages/PPAs), or misidentifying the faulty package. +2. The bug may be a duplicate of an existing report. +3. The package version cited in the report may be outdated or incorrect. +4. Upstream or Debian may have already fixed the issue in a newer release. +5. The bug may affect multiple Ubuntu releases simultaneously. +6. The agent has read-only access to Launchpad, Debian BTS, upstream trackers, and other external resources unless explicitly stated otherwise. +7. Process tickets (syncs, merges, SRUs, MIRs) are out of scope for this triage workflow. + +# Replayability + +After triaging, perform this self-improvement step: + +## Process improvements +Review your triage thought process and identify: +- Steps that could be automated or made more systematic. +- Information you needed but didn't have. +- Decisions that were difficult or ambiguous. + +**Return** these as a `suggested_improvements` string (markdown) alongside your +result, proposing changes to any of the RACECAR sections (Role, Actions, Context, +Expectations, Constraints, Assumptions, Replayability) of this specification. The +tool appends it to the report as a `## Suggested Improvements` section. Do not write +any file yourself. diff --git a/startriage/data/defaults.toml b/startriage/data/defaults.toml index 58f8c49..35f1376 100644 --- a/startriage/data/defaults.toml +++ b/startriage/data/defaults.toml @@ -7,6 +7,14 @@ lp_triage_updates = "theirs" #default_team = # automatic if 1 defined #lp_extended = # depends on mode +[ai] +# AI-assisted triage backend (see 'startriage config set --help'). +#provider = "copilot" # "copilot" | "openrouter" +#model = "claude-opus-4.8" # copilot default; set your own for openrouter +#github_token = "github_pat_..." # or COPILOT_GITHUB_TOKEN / GH_TOKEN env +#openrouter_api_key = "..." # or OPENROUTER_API_KEY env +#openrouter_base_url = "https://openrouter.ai/api/v1" + [team.ubuntu-server] lp_team = "ubuntu-server" lp_todo_tag = "server-todo" diff --git a/startriage/enums.py b/startriage/enums.py index c2a5cfb..4c29863 100644 --- a/startriage/enums.py +++ b/startriage/enums.py @@ -19,3 +19,28 @@ class FetchMode(StrEnum): triage = "triage" # date-range bugs for daily triage todo = "todo" # tag-filtered housekeeping bugs subscribed = "subscribed" # list subscribed bugs + + +class AIProvider(StrEnum): + """LLM backend used for agentic triage.""" + + copilot = "copilot" # GitHub Copilot SDK (default); GitHub token auth + openrouter = "openrouter" # OpenAI-compatible BYOK endpoint + + +class TriageStatus(StrEnum): + """Status the agent recommends for a triaged bug.""" + + invalid = "Invalid" # not a bug, or already fixed in the reported version + incomplete = "Incomplete" # more information needed from the reporter + triaged = "Triaged" # valid and reproducible + duplicate = "Duplicate" # duplicate of another report + no_change = "no-change" # leave as-is; needs further engineering input + + +class ProposedFixKind(StrEnum): + """How the agent's proposed fix should be interpreted.""" + + none = "none" # no fix proposed + reference = "reference" # value is a URL/commit pointing at an existing fix + diff = "diff" # value is a unified diff generated by the agent diff --git a/startriage/sources/launchpad/models.py b/startriage/sources/launchpad/models.py index 06d12a9..ee5736b 100644 --- a/startriage/sources/launchpad/models.py +++ b/startriage/sources/launchpad/models.py @@ -36,6 +36,37 @@ def mark(text: str, color: str) -> str: return "".join([color, text, COLOR_RESET]) +def _name_from_link(link: str | None) -> str | None: + """Extract a username from a Launchpad person link (``.../~username``).""" + if not link or "~" not in link: + return None + return link.split("~")[-1] + + +def _affected_from_task(lp_task: Any) -> dict[str, Any]: + """Best-effort structured description of one bug task (affected target). + + Always includes the human-readable ``target`` (e.g. ``"pkg (Ubuntu Jammy)"``) + plus the task ``status``/``importance``. For distribution source-package + tasks it additionally resolves ``distro``, ``package`` and ``series``. + Launchpad bug tasks do not expose a package version, so none is reported. + """ + entry: dict[str, Any] = { + "target": lp_task.bug_target_name, + "status": lp_task.status, + "importance": lp_task.importance, + } + parts = str(lp_task).split("/") + if "+source" in parts and len(parts) >= 5: + si = parts.index("+source") + distro = parts[4] + before = parts[si - 1] + entry["distro"] = distro + entry["package"] = parts[si + 1] if si + 1 < len(parts) else None + entry["series"] = before if before != distro else None + return entry + + @dataclass class RenderContext: """Render-time state passed explicitly to Task display methods. @@ -359,6 +390,54 @@ def to_dict(self, ctx: RenderContext) -> dict: "sibling_task_status": sibling_status, } + def to_agent_payload(self) -> dict[str, Any]: + """Build the rich, JSON-serialisable bug context handed to the AI agent. + + Unlike :meth:`to_dict` (terminal/markdown rendering metadata), this pulls + the full report body the agent needs to triage: description, every + comment, attachments, all affected targets, duplicate-of, and heat. + + Accessing these fields triggers lazy launchpadlib fetches, so call this + off the event loop (e.g. via ``asyncio.to_thread``), mirroring the + finder's threaded LP access. + """ + bug = self.lp_task.bug + + comments: list[dict[str, Any]] = [] + # messages[0] is the original report (already captured by ``description``); + # the remainder are follow-up comments. + for msg in list(bug.messages)[1:]: + comments.append( + { + "author": _name_from_link(msg.owner_link), + "date": msg.date_created.isoformat() if msg.date_created else None, + "text": msg.content, + } + ) + + attachments = [ + {"title": att.title, "type": att.type, "is_patch": att.type == "Patch"} for att in bug.attachments + ] + + duplicate_of = bug.duplicate_of + duplicate_of_number = str(duplicate_of.id) if duplicate_of else None + + return { + "number": self.number, + "url": self.url, + "title": self.title, + "short_title": self.short_title, + "description": bug.description, + "status": self.status, + "importance": self.importance, + "tags": list(self.tags), + "heat": bug.heat, + "duplicate_of": duplicate_of_number, + "affected": [_affected_from_task(t) for t in self._all_bug_tasks], + "attachments": attachments, + "comments": comments, + } + @dataclass class LaunchpadTasks: diff --git a/startriage/spinner.py b/startriage/spinner.py index e1890e8..e1c4247 100644 --- a/startriage/spinner.py +++ b/startriage/spinner.py @@ -26,14 +26,22 @@ def __init__( self, pending: set[str], *, + status: str | None = None, out: Callable[[str], None] | None = None, interval: float = 1 / 10, # 10 FPS ) -> None: self._pending = set(pending) + self._status = status + # A spinner only makes sense on an interactive terminal. When stderr is + # piped/redirected (CI, logs) and no explicit sink is given, become a + # no-op so callers can always use the spinner unconditionally. + self._enabled = bool(out) or sys.stderr.isatty() if out: self._write = out - else: + elif self._enabled: self._write = lambda s: (sys.stderr.write(s), sys.stderr.flush()) + else: + self._write = lambda _s: None self._interval = interval self._draw = asyncio.Event() self._draw.set() @@ -44,6 +52,14 @@ def done(self, name: str) -> None: """Mark *name* as no longer pending.""" self._pending.discard(name) + def set_status(self, text: str | None) -> None: + """Set a free-form status line (overrides the pending-set display). + + The next animation frame (within ``interval``) picks it up; no redraw is + forced here so callers can update it cheaply from a hot loop. + """ + self._status = text + def suspend(self) -> None: """Pause spinner writes (call before awaiting section output).""" self._draw.clear() @@ -57,13 +73,20 @@ def clear(self) -> None: self._write("\x1b[2K\r") async def _run(self) -> None: + if not self._enabled: + return i = 0 while not self._stop.is_set(): await self._draw.wait() frame = self._FRAMES[i % len(self._FRAMES)] sources = sorted(self._pending) - msg = f"Fetching: {', '.join(sources)}…" if sources else "Processing…" + if self._status is not None: + msg = self._status + elif sources: + msg = f"Fetching: {', '.join(sources)}…" + else: + msg = "Processing…" line = f"{frame} {msg}" self._write(f"\r{line}") diff --git a/startriage/triage.py b/startriage/triage.py index 74b88d3..c489fec 100644 --- a/startriage/triage.py +++ b/startriage/triage.py @@ -48,8 +48,12 @@ async def run_triage( config: StarTriageConfig, opts: TaskFilterOptions, output_cfg: OutputConfig, -) -> None: - """Daily triage: fetch all sources concurrently, print sections in order as they complete.""" +) -> list[tuple[str, TriageResult]]: + """Daily triage: fetch all sources concurrently, print sections in order as they complete. + + Returns the ``(source_name, result)`` pairs that were fetched successfully so + callers (e.g. ``triage --ai``) can reuse them without re-fetching. + """ range = triage_task_note = "" @@ -121,6 +125,8 @@ async def run_triage( logging.info("Markdown written to %s", output_cfg.markdown_path) + return results + async def run_todo( config: StarTriageConfig, diff --git a/tests/test_ai_agent.py b/tests/test_ai_agent.py new file mode 100644 index 0000000..58f6b0f --- /dev/null +++ b/tests/test_ai_agent.py @@ -0,0 +1,176 @@ +"""Tests for the agent result contract and the sequential triage loop.""" + +from __future__ import annotations + +import json + +import pytest + +from startriage.ai import ( + AgentResult, + AgentResultError, + FakeProvider, + extract_json_block, + load_system_prompt, + parse_agent_result, + triage_bugs, +) +from startriage.enums import ProposedFixKind, TriageStatus + +_VALID_RESULT = { + "bug": "123", + "package": "pkg", + "short_title": "boom on start", + "status": "Triaged", + "tags": ["server-todo"], + "analysis": "It crashes.", + "thought_process": "Looked at logs.", + "proposed_fix": {"kind": "reference", "value": "https://example.test/commit"}, + "references": ["https://bugs.launchpad.net/ubuntu/+bug/123"], + "suggested_improvements": "Add a cache.", +} + + +def _fenced(payload: dict, lang: str = "json") -> str: + return f"Here is the result:\n\n```{lang}\n{json.dumps(payload)}\n```\n" + + +# --- extract_json_block ---------------------------------------------------- + + +def test_extract_json_block_basic(): + text = 'preamble\n```json\n{"a": 1}\n```\ntrailer' + assert extract_json_block(text) == '{"a": 1}' + + +def test_extract_json_block_prefers_last_json_block(): + text = '```json\n{"first": true}\n```\n```json\n{"second": true}\n```' + assert extract_json_block(text) == '{"second": true}' + + +def test_extract_json_block_falls_back_to_untagged_block(): + text = 'no json tag here\n```\n{"untagged": 1}\n```' + assert extract_json_block(text) == '{"untagged": 1}' + + +def test_extract_json_block_prefers_json_over_untagged(): + text = '```\n{"untagged": 1}\n```\n```json\n{"tagged": 2}\n```' + assert extract_json_block(text) == '{"tagged": 2}' + + +def test_extract_json_block_missing_raises(): + with pytest.raises(AgentResultError, match="no fenced code block"): + extract_json_block("just some prose, no block at all") + + +# --- parse_agent_result ---------------------------------------------------- + + +def test_parse_agent_result_valid(): + result = parse_agent_result(_fenced(_VALID_RESULT)) + assert isinstance(result, AgentResult) + assert result.bug == "123" + assert result.status is TriageStatus.triaged + assert result.proposed_fix.kind is ProposedFixKind.reference + assert result.suggested_improvements == "Add a cache." + + +def test_parse_agent_result_no_change_status(): + payload = {**_VALID_RESULT, "status": "no-change"} + assert parse_agent_result(_fenced(payload)).status is TriageStatus.no_change + + +def test_parse_agent_result_ignores_extra_fields(): + payload = {**_VALID_RESULT, "unexpected": "ignored"} + # Extra keys are tolerated; known fields still validated. + assert parse_agent_result(_fenced(payload)).bug == "123" + + +def test_parse_agent_result_invalid_status(): + payload = {**_VALID_RESULT, "status": "Bogus"} + with pytest.raises(AgentResultError, match="validation"): + parse_agent_result(_fenced(payload)) + + +def test_parse_agent_result_invalid_fix_kind(): + payload = {**_VALID_RESULT, "proposed_fix": {"kind": "magic", "value": ""}} + with pytest.raises(AgentResultError, match="validation"): + parse_agent_result(_fenced(payload)) + + +def test_parse_agent_result_missing_required_field(): + payload = {k: v for k, v in _VALID_RESULT.items() if k != "status"} + with pytest.raises(AgentResultError, match="validation"): + parse_agent_result(_fenced(payload)) + + +def test_parse_agent_result_garbled_json(): + text = "```json\n{not valid json,,,}\n```" + with pytest.raises(AgentResultError, match="not valid JSON"): + parse_agent_result(text) + + +def test_parse_agent_result_non_object(): + text = "```json\n[1, 2, 3]\n```" + with pytest.raises(AgentResultError, match="must be an object"): + parse_agent_result(text) + + +# --- system prompt --------------------------------------------------------- + + +def test_load_system_prompt_ships_as_resource(): + prompt = load_system_prompt() + assert "Role" in prompt + assert "proposed_fix" in prompt + + +# --- triage_bugs loop ------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_triage_bugs_success(): + provider = FakeProvider([_fenced(_VALID_RESULT)]) + outcomes = await triage_bugs(provider, [{"number": "123"}], system_prompt="SYS") + + assert len(outcomes) == 1 + outcome = outcomes[0] + assert outcome.ok + assert outcome.bug == "123" + assert outcome.result.status is TriageStatus.triaged + assert outcome.error is None + # The payload is forwarded as a JSON user message under the given system prompt. + assert provider.calls == [("SYS", json.dumps({"number": "123"}, ensure_ascii=False))] + + +@pytest.mark.asyncio +async def test_triage_bugs_skips_and_continues_on_failure(): + # First bug returns garbage, second returns a valid result. + provider = FakeProvider(["no json here", _fenced({**_VALID_RESULT, "bug": "456"})]) + payloads = [{"number": "123"}, {"number": "456"}] + + outcomes = await triage_bugs(provider, payloads, system_prompt="SYS") + + assert len(outcomes) == 2 + assert not outcomes[0].ok + assert outcomes[0].bug == "123" + assert "no fenced code block" in outcomes[0].error + assert outcomes[0].raw == "no json here" + + assert outcomes[1].ok + assert outcomes[1].result.bug == "456" + + +@pytest.mark.asyncio +async def test_triage_bugs_records_provider_exception(): + class _BoomProvider(FakeProvider): + async def run(self, system_prompt: str, user_message: str) -> str: + raise RuntimeError("network down") + + outcomes = await triage_bugs(_BoomProvider(), [{"number": "789"}], system_prompt="S") + + assert len(outcomes) == 1 + assert not outcomes[0].ok + assert outcomes[0].bug == "789" + assert "provider error" in outcomes[0].error + assert "network down" in outcomes[0].error diff --git a/tests/test_ai_logging.py b/tests/test_ai_logging.py new file mode 100644 index 0000000..95d5c73 --- /dev/null +++ b/tests/test_ai_logging.py @@ -0,0 +1,73 @@ +"""Tests for AI step logging (observability under -v / -vv).""" + +from __future__ import annotations + +import logging + +import pytest + +from startriage.ai import FakeProvider, triage_bugs + +_OK = """```json +{ + "bug": "123", + "package": "pkg", + "short_title": "boom", + "status": "Triaged", + "tags": ["server-todo"], + "analysis": "It broke.", + "thought_process": "Checked the logs.", + "proposed_fix": {"kind": "none", "value": ""}, + "references": [], + "suggested_improvements": "" +} +```""" + + +@pytest.mark.asyncio +async def test_triage_bugs_logs_progress_and_decision(caplog): + provider = FakeProvider([_OK]) + with caplog.at_level(logging.INFO, logger="startriage.ai.agent"): + await triage_bugs(provider, [{"number": "123"}], system_prompt="sys") + + messages = [r.getMessage() for r in caplog.records] + assert any("Triaging bug 123 (1/1)" in m for m in messages) + assert any("Bug 123 → status=Triaged" in m for m in messages) + assert any("1 succeeded, 0 failed" in m for m in messages) + + +@pytest.mark.asyncio +async def test_triage_bugs_logs_failure_as_warning(caplog): + provider = FakeProvider(["no json here"]) + with caplog.at_level(logging.INFO, logger="startriage.ai.agent"): + await triage_bugs(provider, [{"number": "999"}], system_prompt="sys") + + warnings = [r.getMessage() for r in caplog.records if r.levelno == logging.WARNING] + assert any("Bug 999 failed" in m for m in warnings) + summaries = [r.getMessage() for r in caplog.records if "complete" in r.getMessage()] + assert any("0 succeeded, 1 failed" in m for m in summaries) + + +@pytest.mark.asyncio +async def test_triage_bugs_debug_logs_thought_process(caplog): + provider = FakeProvider([_OK]) + with caplog.at_level(logging.DEBUG, logger="startriage.ai.agent"): + await triage_bugs(provider, [{"number": "123"}], system_prompt="sys") + + debug = [r.getMessage() for r in caplog.records if r.levelno == logging.DEBUG] + assert any("thought process: Checked the logs." in m for m in debug) + + +@pytest.mark.asyncio +async def test_triage_bugs_reports_progress(): + provider = FakeProvider([_OK, _OK]) + seen: list[tuple[int, int, str]] = [] + + await triage_bugs( + provider, + [{"number": "100"}, {"number": "200"}], + system_prompt="sys", + on_progress=lambda index, total, bug: seen.append((index, total, bug)), + ) + + assert seen == [(1, 2, "100"), (2, 2, "200")] diff --git a/tests/test_ai_provider.py b/tests/test_ai_provider.py new file mode 100644 index 0000000..634c24b --- /dev/null +++ b/tests/test_ai_provider.py @@ -0,0 +1,120 @@ +"""Tests for the AI provider layer (selection, kwargs, fake round-trip).""" + +from __future__ import annotations + +import pytest + +from startriage.ai import ( + CopilotProvider, + FakeProvider, + build_client_kwargs, + build_provider, + build_session_kwargs, +) +from startriage.config import AIConfig, AIConfigError +from startriage.enums import AIProvider + + +@pytest.fixture(autouse=True) +def _clear_ai_env(monkeypatch): + for var in ( + "COPILOT_GITHUB_TOKEN", + "GH_TOKEN", + "GITHUB_TOKEN", + "STARTRIAGE_AI_OPENROUTER_KEY", + "OPENROUTER_API_KEY", + ): + monkeypatch.delenv(var, raising=False) + + +def test_build_session_kwargs_copilot_with_token(): + cfg = AIConfig(github_token="github_pat_abc") + # The Copilot token authenticates the client, not the session. + assert build_client_kwargs(cfg) == {"github_token": "github_pat_abc"} + assert build_session_kwargs(cfg) == {} + + +def test_build_session_kwargs_copilot_without_token(): + # No config token and no env var -> SDK is left to read the env itself. + assert build_client_kwargs(AIConfig()) == {} + assert build_session_kwargs(AIConfig()) == {} + + +def test_build_session_kwargs_copilot_token_from_env(monkeypatch): + monkeypatch.setenv("COPILOT_GITHUB_TOKEN", "env_token") + assert build_client_kwargs(AIConfig()) == {"github_token": "env_token"} + assert build_session_kwargs(AIConfig()) == {} + + +def test_build_session_kwargs_openrouter(): + cfg = AIConfig( + provider=AIProvider.openrouter, + model="anthropic/claude-3.5", + openrouter_api_key="sk-or-1", + ) + # BYOK travels on the session; the client needs no auth. + assert build_client_kwargs(cfg) == {} + assert build_session_kwargs(cfg) == { + "provider": { + "type": "openai", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-1", + } + } + + +def test_build_session_kwargs_openrouter_custom_base_url(): + cfg = AIConfig( + provider=AIProvider.openrouter, + openrouter_api_key="sk-or-2", + openrouter_base_url="https://example.test/v1", + ) + assert build_session_kwargs(cfg)["provider"]["base_url"] == "https://example.test/v1" + + +def test_build_provider_returns_copilot_provider(): + provider = build_provider(AIConfig(github_token="github_pat_abc")) + assert isinstance(provider, CopilotProvider) + assert provider.model == "claude-opus-4.8" + + +def test_build_provider_openrouter_uses_configured_model(): + cfg = AIConfig( + provider=AIProvider.openrouter, + model="anthropic/claude-3.5", + openrouter_api_key="sk-or-1", + ) + assert build_provider(cfg).model == "anthropic/claude-3.5" + + +def test_build_provider_missing_copilot_credential(): + with pytest.raises(AIConfigError, match="Copilot"): + build_provider(AIConfig()) + + +def test_build_provider_missing_openrouter_credential(): + with pytest.raises(AIConfigError, match="OpenRouter"): + build_provider(AIConfig(provider=AIProvider.openrouter)) + + +@pytest.mark.asyncio +async def test_fake_provider_round_trip_queued_responses(): + provider = FakeProvider(["first", "second"], model="fake-x") + assert provider.model == "fake-x" + + assert await provider.run("sys", "bug-1") == "first" + assert await provider.run("sys", "bug-2") == "second" + # Queue drained -> default response. + assert await provider.run("sys", "bug-3") == "" + + assert provider.calls == [ + ("sys", "bug-1"), + ("sys", "bug-2"), + ("sys", "bug-3"), + ] + + +@pytest.mark.asyncio +async def test_fake_provider_default_response(): + provider = FakeProvider(default_response="canned") + assert await provider.run("sys", "anything") == "canned" diff --git a/tests/test_ai_render.py b/tests/test_ai_render.py new file mode 100644 index 0000000..98dac96 --- /dev/null +++ b/tests/test_ai_render.py @@ -0,0 +1,192 @@ +"""Tests for the triage report renderer (golden render + append).""" + +from __future__ import annotations + +from datetime import date + +from startriage.ai import ( + AgentResult, + BugOutcome, + ProposedFix, + append_report, + render_bug_metadata, + render_report, +) +from startriage.ai.render import AI_APPEND_NOTICE, _render_proposed_fix +from startriage.enums import ProposedFixKind, TriageStatus + +_DAY = date(2026, 6, 15) + + +def _result(**overrides) -> AgentResult: + base = { + "bug": "123", + "package": "pkg", + "short_title": "boom on start", + "status": TriageStatus.triaged, + "tags": ["server-todo", "bitesize"], + "analysis": "It crashes immediately.", + "thought_process": "Read the logs, searched LP.", + "proposed_fix": ProposedFix(kind=ProposedFixKind.reference, value="https://example.test/commit"), + "references": ["https://bugs.launchpad.net/ubuntu/+bug/123"], + "suggested_improvements": "Add a version cache.", + } + base.update(overrides) + return AgentResult(**base) + + +def _outcome(result: AgentResult) -> BugOutcome: + return BugOutcome(bug=result.bug, result=result, error=None, raw="{}") + + +# --- bug metadata rendering ------------------------------------------------ + + +def test_render_bug_metadata_renders_key_fields(): + payload = { + "number": "2101234", + "url": "https://bugs.launchpad.net/ubuntu/+bug/2101234", + "short_title": "boom on start", + "status": "Confirmed", + "importance": "High", + "heat": 42, + "tags": ["server-todo", "bitesize"], + "duplicate_of": "999", + "affected": [{"target": "pkg (Ubuntu)", "status": "Confirmed", "importance": "High"}], + "description": "It crashes immediately.", + "attachments": [{"title": "fix.patch", "type": "text/plain", "is_patch": True}], + "comments": [{"author": "alice", "date": "2026-06-15", "text": "Seeing this too."}], + } + rendered = render_bug_metadata([payload]) + assert "# Bug metadata" in rendered + assert "## LP #2101234 — boom on start" in rendered + assert "**Status:** Confirmed" in rendered + assert "**Tags:** server-todo, bitesize" in rendered + assert "**Duplicate of:** LP #999" in rendered + assert "- pkg (Ubuntu) — Confirmed (High)" in rendered + assert "It crashes immediately." in rendered + assert "- fix.patch (text/plain) [patch]" in rendered + assert "**alice** — 2026-06-15" in rendered + + +def test_render_bug_metadata_handles_sparse_payload(): + rendered = render_bug_metadata([{"number": "1"}]) + assert "## LP #1 — (no title)" in rendered + assert "**Tags:** _none_" in rendered + assert "_No description._" in rendered + + +# --- proposed fix rendering ------------------------------------------------ + + +def test_render_proposed_fix_none(): + fix = ProposedFix(kind=ProposedFixKind.none, value="") + assert _render_proposed_fix(fix) == "_No fix proposed._" + + +def test_render_proposed_fix_reference(): + fix = ProposedFix(kind=ProposedFixKind.reference, value="https://x.test/c ") + assert _render_proposed_fix(fix) == "https://x.test/c" + + +def test_render_proposed_fix_diff_is_fenced_not_applied(): + diff = "--- a/f\n+++ b/f\n@@ -1 +1 @@\n-old\n+new" + rendered = _render_proposed_fix(ProposedFix(kind=ProposedFixKind.diff, value=diff)) + assert rendered == f"```diff\n{diff}\n```" + + +def test_render_proposed_fix_empty_diff_falls_back(): + # A diff kind with no value should not emit an empty code block. + fix = ProposedFix(kind=ProposedFixKind.diff, value=" ") + assert _render_proposed_fix(fix) == "_No fix proposed._" + + +# --- full report ----------------------------------------------------------- + + +def test_render_report_golden(): + outcomes = [_outcome(_result())] + expected = ( + "# Automated triage — 2026-06-15\n" + "\n" + "## LP #123 — pkg — boom on start\n" + "\n" + "**Suggested status:** Triaged\n" + "**Suggested tags:** server-todo, bitesize\n" + "\n" + "### Analysis\n" + "\n" + "It crashes immediately.\n" + "\n" + "### Thought Process\n" + "\n" + "Read the logs, searched LP.\n" + "\n" + "### Proposed Fix\n" + "\n" + "https://example.test/commit\n" + "\n" + "### References\n" + "\n" + "- https://bugs.launchpad.net/ubuntu/+bug/123\n" + "\n" + "## Suggested Improvements\n" + "\n" + "Add a version cache.\n" + ) + assert render_report(outcomes, day=_DAY) == expected + + +def test_render_report_no_tags_and_no_references(): + result = _result(tags=[], references=[], suggested_improvements="") + report = render_report([_outcome(result)], day=_DAY) + assert "**Suggested tags:** _none_" in report + assert "### References" not in report + assert "## Suggested Improvements" not in report + + +def test_render_report_no_change_status(): + result = _result(status=TriageStatus.no_change) + report = render_report([_outcome(result)], day=_DAY) + assert "**Suggested status:** no-change" in report + + +def test_render_report_records_failures(): + ok = _outcome(_result(bug="123")) + failed = BugOutcome(bug="456", result=None, error="agent output invalid", raw="junk") + report = render_report([ok, failed], day=_DAY) + + assert "## LP #123 — pkg — boom on start" in report + assert "## LP #456 — triage failed" in report + assert "**Error:** agent output invalid" in report + + +def test_render_report_deduplicates_improvements(): + a = _outcome(_result(bug="1", suggested_improvements="Same note.")) + b = _outcome(_result(bug="2", suggested_improvements="Same note.")) + c = _outcome(_result(bug="3", suggested_improvements="Different note.")) + report = render_report([a, b, c], day=_DAY) + + # "Same note." appears once in the improvements section. + improvements = report.split("## Suggested Improvements", 1)[1] + assert improvements.count("Same note.") == 1 + assert "Different note." in improvements + + +# --- append_report --------------------------------------------------------- + + +def test_append_report_adds_notice_after_existing_content(tmp_path): + md = tmp_path / "triage.md" + md.write_text("# Triage\n\nSome human content.\n") + + returned = append_report(md, "# Automated triage — 2026-06-15\n\n## LP #1\n") + + assert returned == md + text = md.read_text() + # Original content is preserved and comes first. + assert text.startswith("# Triage\n\nSome human content.\n") + # A notice separates the AI section from the human report. + assert AI_APPEND_NOTICE in text + assert text.index("Some human content.") < text.index("Automated triage") + assert text.endswith("## LP #1\n") diff --git a/tests/test_ai_run.py b/tests/test_ai_run.py new file mode 100644 index 0000000..2c5b333 --- /dev/null +++ b/tests/test_ai_run.py @@ -0,0 +1,152 @@ +"""Tests for AI orchestration (run.py) and CLI wiring — all offline.""" + +from __future__ import annotations + +import pytest + +from startriage.ai import ( + FakeProvider, + parse_bug_number, + payloads_from_tasks, + run_agent_on_payloads, +) +from startriage.cli import _build_parser +from startriage.config import StarTriageConfig + +_CANNED = """Here is my analysis. + +```json +{ + "bug": "123", + "package": "pkg", + "short_title": "boom on start", + "status": "Triaged", + "tags": ["server-todo"], + "analysis": "It broke.", + "thought_process": "Looked at logs.", + "proposed_fix": {"kind": "none", "value": ""}, + "references": [], + "suggested_improvements": "" +} +``` +""" + + +# --- parse_bug_number ------------------------------------------------------ + + +@pytest.mark.parametrize( + ("spec", "expected"), + [ + ("123456", "123456"), + ("#123456", "123456"), + ("https://bugs.launchpad.net/ubuntu/+bug/123456", "123456"), + ("https://bugs.launchpad.net/ubuntu/+source/python3.12/+bug/987", "987"), + (" #42 ", "42"), + ("https://launchpad.net/bugs/555", "555"), + ], +) +def test_parse_bug_number(spec, expected): + assert parse_bug_number(spec) == expected + + +@pytest.mark.parametrize( + "spec", + [ + "not-a-bug", + "", + "12ab", + # Arbitrary URLs that merely contain digits must NOT resolve to a bug. + "https://myponyadventure.lol/pages/3133742/cute.png", + "https://example.com/issues/42", + ], +) +def test_parse_bug_number_invalid(spec): + with pytest.raises(ValueError): + parse_bug_number(spec) + + +# --- payloads_from_tasks --------------------------------------------------- + + +class _FakeTask: + def __init__(self, number: str, payload=None, *, raises: bool = False): + self.number = number + self._payload = payload if payload is not None else {"number": number} + self._raises = raises + + def to_agent_payload(self): + if self._raises: + raise RuntimeError("boom") + return self._payload + + +def test_payloads_from_tasks_dedupes_by_number(): + tasks = [_FakeTask("1"), _FakeTask("1"), _FakeTask("2")] + payloads = payloads_from_tasks(tasks) # type: ignore[arg-type] + assert [p["number"] for p in payloads] == ["1", "2"] + + +def test_payloads_from_tasks_skips_failures(): + tasks = [_FakeTask("1"), _FakeTask("2", raises=True), _FakeTask("3")] + payloads = payloads_from_tasks(tasks) # type: ignore[arg-type] + assert [p["number"] for p in payloads] == ["1", "3"] + + +# --- run_agent_on_payloads ------------------------------------------------- + + +@pytest.mark.asyncio +async def test_run_agent_on_payloads_returns_markdown(): + provider = FakeProvider([_CANNED]) + config = StarTriageConfig() + payloads = [{"number": "123", "title": "boom"}] + + report = await run_agent_on_payloads(config, payloads, provider=provider) + + assert report is not None + assert "## LP #123 — pkg — boom on start" in report + assert "**Suggested status:** Triaged" in report + # The agent was asked exactly once, with the payload as the user message. + assert len(provider.calls) == 1 + assert '"number": "123"' in provider.calls[0][1] + + +@pytest.mark.asyncio +async def test_run_agent_on_payloads_empty_returns_none(): + provider = FakeProvider([]) + report = await run_agent_on_payloads(StarTriageConfig(), [], provider=provider) + assert report is None + assert provider.calls == [] + + +@pytest.mark.asyncio +async def test_run_agent_on_payloads_records_bad_agent_output(): + provider = FakeProvider(["no json here"]) + report = await run_agent_on_payloads( + StarTriageConfig(), + [{"number": "999"}], + provider=provider, + ) + assert report is not None + assert "## LP #999 — triage failed" in report + + +# --- CLI parser wiring ----------------------------------------------------- + + +def test_parser_analyze_accepts_multiple_bugs(): + args = _build_parser().parse_args(["analyze", "123", "#456", "https://x/+bug/789"]) + assert args.bug == ["123", "#456", "https://x/+bug/789"] + assert args.func.__name__ == "_run_analyze" + assert args.ai is False + + +def test_parser_analyze_ai_flag(): + assert _build_parser().parse_args(["analyze", "123"]).ai is False + assert _build_parser().parse_args(["analyze", "--ai", "123"]).ai is True + + +def test_parser_triage_ai_flag_defaults_false(): + assert _build_parser().parse_args(["triage"]).ai is False + assert _build_parser().parse_args(["triage", "--ai"]).ai is True diff --git a/tests/test_config.py b/tests/test_config.py index dc1d98b..65ab21d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -8,7 +8,8 @@ import pytest from pydantic import ValidationError -from startriage.config import load_config +from startriage.config import AIConfigError, load_config, update_user_config +from startriage.enums import AIProvider def _write_toml(tmp_path: Path, content: str) -> Path: @@ -36,7 +37,7 @@ def test_general_override(tmp_path): [general] lp_extended = true savebugs_dir = "{bugs_dir}" - """, + """, ) config = load_config(p) assert config.general.lp_extended is True @@ -53,7 +54,7 @@ def test_team_override_replaces_ignore_list(tmp_path): lp_ignore_packages = [] discourse_categories = ["project/server"] github_repos = [] - """, + """, ) config = load_config(p) assert config.team["ubuntu-server"].lp_ignore_packages == [] @@ -69,7 +70,7 @@ def test_custom_team_added(tmp_path): lp_ignore_packages = [] discourse_categories = ["desktop"] github_repos = [] - """, + """, ) config = load_config(p) assert "ubuntu-desktop" in config.team @@ -94,7 +95,7 @@ def test_invalid_lp_triage_updates_filter(tmp_path): """\ [general] lp_triage_updates = "invalid_value" - """, + """, ) with pytest.raises(ValidationError): load_config(p) @@ -106,7 +107,7 @@ def test_extra_field_rejected(tmp_path): """\ [general] typo_field = true - """, + """, ) with pytest.raises(ValidationError): load_config(p) @@ -118,7 +119,110 @@ def test_github_token_config(tmp_path): """\ [general] github_token = "ghp_secret" - """, + """, ) config = load_config(p) assert config.general.github_token == "ghp_secret" + + +def test_ai_defaults(tmp_path): + """No [ai] section yields sensible Copilot defaults.""" + config = load_config(tmp_path / "nonexistent.toml") + assert config.ai.provider is AIProvider.copilot + assert config.ai.model == "claude-opus-4.8" + assert config.ai.openrouter_base_url == "https://openrouter.ai/api/v1" + + +def test_ai_override(tmp_path): + p = _write_toml( + tmp_path, + """\ + [ai] + provider = "openrouter" + model = "anthropic/claude-3.5-sonnet" + openrouter_api_key = "or_secret" + """, + ) + config = load_config(p) + assert config.ai.provider is AIProvider.openrouter + assert config.ai.model == "anthropic/claude-3.5-sonnet" + assert config.ai.openrouter_api_key == "or_secret" + + +def test_ai_invalid_provider(tmp_path): + p = _write_toml( + tmp_path, + """\ + [ai] + provider = "bogus" + """, + ) + with pytest.raises(ValidationError): + load_config(p) + + +def test_ai_extra_field_rejected(tmp_path): + p = _write_toml( + tmp_path, + """\ + [ai] + typo_field = true + """, + ) + with pytest.raises(ValidationError): + load_config(p) + + +def test_ai_resolve_token_prefers_config(tmp_path, monkeypatch): + monkeypatch.setenv("COPILOT_GITHUB_TOKEN", "env_token") + p = _write_toml( + tmp_path, + """\ + [ai] + github_token = "cfg_token" + """, + ) + config = load_config(p) + assert config.ai.resolve_token() == "cfg_token" + + +def test_ai_resolve_token_from_env(tmp_path, monkeypatch): + for var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"): + monkeypatch.delenv(var, raising=False) + monkeypatch.setenv("GH_TOKEN", "env_token") + config = load_config(tmp_path / "nonexistent.toml") + assert config.ai.resolve_token() == "env_token" + + +def test_ai_require_configured_copilot_missing(tmp_path, monkeypatch): + for var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"): + monkeypatch.delenv(var, raising=False) + config = load_config(tmp_path / "nonexistent.toml") + with pytest.raises(AIConfigError, match="Copilot"): + config.ai.require_configured() + + +def test_ai_require_configured_openrouter_missing(tmp_path, monkeypatch): + for var in ("STARTRIAGE_AI_OPENROUTER_KEY", "OPENROUTER_API_KEY"): + monkeypatch.delenv(var, raising=False) + p = _write_toml( + tmp_path, + """\ + [ai] + provider = "openrouter" + """, + ) + config = load_config(p) + with pytest.raises(AIConfigError, match="OpenRouter"): + config.ai.require_configured() + + +def test_ai_secret_written_with_restricted_perms(tmp_path): + path = tmp_path / "startriage.toml" + update_user_config( + {"ai": {"openrouter_api_key": "or_secret"}}, + config_path=path, + sensitive=True, + ) + assert load_config(path).ai.openrouter_api_key == "or_secret" + assert (path.stat().st_mode & 0o777) == 0o600 diff --git a/tests/test_launchpad_models.py b/tests/test_launchpad_models.py new file mode 100644 index 0000000..1127d7d --- /dev/null +++ b/tests/test_launchpad_models.py @@ -0,0 +1,153 @@ +"""Tests for the Launchpad Task model, focused on the AI agent payload.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +from startriage.sources.launchpad.models import ( + DISTRIBUTION_SOURCE_PACKAGE_RESOURCE_TYPE_LINK, + Task, +) + + +class _FakeMessage: + def __init__(self, owner_link, date_created, content): + self.owner_link = owner_link + self.date_created = date_created + self.content = content + + +class _FakeAttachment: + def __init__(self, title, type_): + self.title = title + self.type = type_ + + +class _FakeDuplicate: + def __init__(self, id_): + self.id = id_ + + +class _FakeTarget: + resource_type_link = DISTRIBUTION_SOURCE_PACKAGE_RESOURCE_TYPE_LINK + + +class _FakeLPTask: + """Minimal stand-in for a launchpadlib bug_task entry.""" + + def __init__(self, api_url, *, status, importance, target_name, bug=None): + self._api_url = api_url + self.status = status + self.importance = importance + self.bug_target_name = target_name + self.title = "Bug #123 in pkg (Ubuntu): boom on start" + self.assignee_link = None + self.target = _FakeTarget() + self.bug = bug + + def __str__(self): + return self._api_url + + +class _FakeBug: + def __init__(self, *, bug_tasks, messages, attachments, duplicate_of): + self.description = "It crashes immediately." + self.tags = ["amd64", "regression-release"] + self.date_last_updated = datetime(2026, 6, 1, tzinfo=timezone.utc) + self.heat = 42 + self.messages = messages + self.attachments = attachments + self.duplicate_of = duplicate_of + self.bug_tasks = bug_tasks + + +def _build_task(duplicate_of=None) -> Task: + devel_url = "https://api.launchpad.net/devel/ubuntu/+source/pkg/+bug/123" + jammy_url = "https://api.launchpad.net/devel/ubuntu/jammy/+source/pkg/+bug/123" + + messages = [ + _FakeMessage( + "https://api.launchpad.net/devel/~reporter", + datetime(2026, 5, 1, tzinfo=timezone.utc), + "original report body", + ), + _FakeMessage( + "https://api.launchpad.net/devel/~helper", + datetime(2026, 5, 2, tzinfo=timezone.utc), + "have you tried turning it off and on again?", + ), + ] + attachments = [ + _FakeAttachment("crash.txt", "Unspecified"), + _FakeAttachment("fix.patch", "Patch"), + ] + + devel_task = _FakeLPTask(devel_url, status="New", importance="Undecided", target_name="pkg (Ubuntu)") + jammy_task = _FakeLPTask( + jammy_url, status="Confirmed", importance="High", target_name="pkg (Ubuntu Jammy)" + ) + + bug = _FakeBug( + bug_tasks=[devel_task, jammy_task], + messages=messages, + attachments=attachments, + duplicate_of=duplicate_of, + ) + devel_task.bug = bug + jammy_task.bug = bug + + return Task(devel_task, subscribed=False, last_activity_ours=False) + + +def test_to_agent_payload_core_fields(): + payload = _build_task().to_agent_payload() + assert payload["number"] == "123" + assert payload["url"] == "https://bugs.launchpad.net/ubuntu/+bug/123" + assert payload["description"] == "It crashes immediately." + assert payload["status"] == "New" + assert payload["importance"] == "Undecided" + assert payload["tags"] == ["amd64", "regression-release"] + assert payload["heat"] == 42 + assert payload["duplicate_of"] is None + + +def test_to_agent_payload_comments_skip_original_report(): + payload = _build_task().to_agent_payload() + # The first message is the original report (covered by description). + assert len(payload["comments"]) == 1 + comment = payload["comments"][0] + assert comment["author"] == "helper" + assert comment["text"] == "have you tried turning it off and on again?" + assert comment["date"] == "2026-05-02T00:00:00+00:00" + + +def test_to_agent_payload_attachments(): + payload = _build_task().to_agent_payload() + assert payload["attachments"] == [ + {"title": "crash.txt", "type": "Unspecified", "is_patch": False}, + {"title": "fix.patch", "type": "Patch", "is_patch": True}, + ] + + +def test_to_agent_payload_affected_targets(): + payload = _build_task().to_agent_payload() + affected = payload["affected"] + assert len(affected) == 2 + + devel = affected[0] + assert devel["target"] == "pkg (Ubuntu)" + assert devel["package"] == "pkg" + assert devel["distro"] == "ubuntu" + assert devel["series"] is None + assert devel["status"] == "New" + + jammy = affected[1] + assert jammy["package"] == "pkg" + assert jammy["series"] == "jammy" + assert jammy["status"] == "Confirmed" + assert jammy["importance"] == "High" + + +def test_to_agent_payload_duplicate_of(): + payload = _build_task(duplicate_of=_FakeDuplicate(999)).to_agent_payload() + assert payload["duplicate_of"] == "999" diff --git a/tests/test_spinner.py b/tests/test_spinner.py new file mode 100644 index 0000000..9b3940c --- /dev/null +++ b/tests/test_spinner.py @@ -0,0 +1,45 @@ +"""Tests for the async terminal spinner.""" + +from __future__ import annotations + +import asyncio + +import pytest + +from startriage.spinner import Spinner + + +@pytest.mark.asyncio +async def test_spinner_shows_status_message(): + frames: list[str] = [] + spinner = Spinner(set(), status="Starting…", out=frames.append, interval=1 / 1000) + async with spinner: + await asyncio.sleep(0.02) + spinner.set_status("Triaging LP #123 (1/2)…") + await asyncio.sleep(0.02) + + rendered = "".join(frames) + assert "Starting…" in rendered + assert "Triaging LP #123 (1/2)…" in rendered + + +@pytest.mark.asyncio +async def test_spinner_status_overrides_pending_set(): + frames: list[str] = [] + spinner = Spinner({"launchpad"}, status="Working…", out=frames.append, interval=1 / 1000) + async with spinner: + await asyncio.sleep(0.02) + + rendered = "".join(frames) + assert "Working…" in rendered + assert "launchpad" not in rendered + + +@pytest.mark.asyncio +async def test_spinner_is_noop_when_not_a_tty(monkeypatch): + # With no explicit sink and a non-TTY stderr, the spinner writes nothing. + monkeypatch.setattr("startriage.spinner.sys.stderr.isatty", lambda: False) + spinner = Spinner({"launchpad"}, status="Working…", interval=1 / 1000) + assert spinner._enabled is False + async with spinner: + await asyncio.sleep(0.02)