From 3c5d04fb66a889d0803fbe55a2685b9a4cebaaf1 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Mon, 15 Jun 2026 22:29:20 -0300 Subject: [PATCH 01/11] Add configuration options for the AI-assisted triage --- startriage/cli.py | 47 ++++++++++++++- startriage/config.py | 67 ++++++++++++++++++++- startriage/data/defaults.toml | 8 +++ startriage/enums.py | 7 +++ tests/test_config.py | 106 +++++++++++++++++++++++++++++++++- 5 files changed, 229 insertions(+), 6 deletions(-) diff --git a/startriage/cli.py b/startriage/cli.py index 57ddb84..a5a9923 100644 --- a/startriage/cli.py +++ b/startriage/cli.py @@ -10,7 +10,7 @@ from .config import DEFAULT_USER_CONFIG, StarTriageConfig, load_config, resolve_team_name, update_user_config from .dates import parse_interval, triage_task_date_range -from .enums import UpdateFilter +from .enums import AIProvider, UpdateFilter from .log import log_setup from .output import OutputConfig, OutputFormat from .savebugs import BugPersistor, SaveConfig @@ -208,6 +208,37 @@ def _build_parser() -> argparse.ArgumentParser: "Alternatively set the GITHUB_TOKEN environment variable." ), ) + config_setdefaults_p.add_argument( + "--ai-provider", + choices=AIProvider, + help="Set AI triage provider in config (ai.provider)", + ) + config_setdefaults_p.add_argument( + "--ai-model", + metavar="MODEL", + help="Set AI triage model in config (ai.model)", + ) + config_setdefaults_p.add_argument( + "--ai-github-token", + metavar="TOKEN", + help=( + "Set Copilot GitHub token in config (ai.github_token). " + "Alternatively set the COPILOT_GITHUB_TOKEN environment variable." + ), + ) + config_setdefaults_p.add_argument( + "--ai-openrouter-key", + metavar="KEY", + help=( + "Set OpenRouter API key in config (ai.openrouter_api_key). " + "Alternatively set the OPENROUTER_API_KEY environment variable." + ), + ) + config_setdefaults_p.add_argument( + "--ai-openrouter-base-url", + metavar="URL", + help="Set OpenRouter base URL in config (ai.openrouter_base_url)", + ) config_setdefaults_p.set_defaults(func=_set_config_settings) config_show_p = config_sp.add_parser("show", help="Display resolved configuration") @@ -358,12 +389,24 @@ async def _set_config_settings(args: argparse.Namespace, _config: StarTriageConf updates.setdefault("general", {})["proposed_min_age"] = args.proposed_min_age if args.github_token is not None: updates.setdefault("general", {})["github_token"] = args.github_token + if args.ai_provider: + updates.setdefault("ai", {})["provider"] = str(args.ai_provider) + if args.ai_model: + updates.setdefault("ai", {})["model"] = args.ai_model + if args.ai_github_token is not None: + updates.setdefault("ai", {})["github_token"] = args.ai_github_token + if args.ai_openrouter_key is not None: + updates.setdefault("ai", {})["openrouter_api_key"] = args.ai_openrouter_key + if args.ai_openrouter_base_url: + updates.setdefault("ai", {})["openrouter_base_url"] = args.ai_openrouter_base_url if not updates: print("No settings to update.") return - sensitive = "github_token" in updates.get("general", {}) + sensitive = "github_token" in updates.get("general", {}) or bool( + {"github_token", "openrouter_api_key"} & updates.get("ai", {}).keys() + ) path = update_user_config(updates, config_path=args.config, sensitive=sensitive) print(f"Settings saved to {path!r}") diff --git a/startriage/config.py b/startriage/config.py index 11e1ab9..d63c9c4 100644 --- a/startriage/config.py +++ b/startriage/config.py @@ -11,7 +11,7 @@ import tomli_w from pydantic import BaseModel, ConfigDict, field_validator, model_validator -from .enums import UpdateFilter +from .enums import AIProvider, UpdateFilter def default_config_path() -> Path: @@ -23,6 +23,61 @@ def default_config_path() -> Path: DEFAULT_USER_CONFIG = default_config_path() +# Environment variables consulted for AI credentials, in priority order. +# Copilot mirrors the GitHub Copilot SDK's own precedence. +COPILOT_TOKEN_ENV_VARS = ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN") +OPENROUTER_KEY_ENV_VARS = ("STARTRIAGE_AI_OPENROUTER_KEY", "OPENROUTER_API_KEY") + + +def _first_env(names: tuple[str, ...]) -> str | None: + """Return the first non-empty value among the given environment variables.""" + for name in names: + value = os.environ.get(name) + if value: + return value + return None + + +class AIConfigError(Exception): + """Raised when the [ai] section lacks the credentials required to run.""" + + +class AIConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + provider: AIProvider = AIProvider.copilot + model: str = "claude-opus-4.8" + # Copilot auth (or rely on COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN env). + github_token: str | None = None + # OpenRouter (BYOK) auth. + openrouter_api_key: str | None = None + openrouter_base_url: str = "https://openrouter.ai/api/v1" + + def resolve_token(self) -> str | None: + """Return the effective credential for the active provider. + + Config values take precedence over environment variables. + """ + if self.provider is AIProvider.copilot: + return self.github_token or _first_env(COPILOT_TOKEN_ENV_VARS) + return self.openrouter_api_key or _first_env(OPENROUTER_KEY_ENV_VARS) + + def require_configured(self) -> None: + """Raise AIConfigError with a friendly hint when no credential is available.""" + if self.resolve_token(): + return + if self.provider is AIProvider.copilot: + raise AIConfigError( + "No Copilot credential configured. Run " + "'startriage config set --ai-github-token ' or set the " + "COPILOT_GITHUB_TOKEN environment variable." + ) + raise AIConfigError( + "No OpenRouter API key configured. Run " + "'startriage config set --ai-openrouter-key ' or set the " + "OPENROUTER_API_KEY environment variable." + ) + class GeneralConfig(BaseModel): model_config = ConfigDict(extra="forbid") @@ -86,6 +141,7 @@ class StarTriageConfig(BaseModel): model_config = ConfigDict(extra="forbid") general: GeneralConfig = GeneralConfig() + ai: AIConfig = AIConfig() team: dict[str, TeamConfig] = {} loaded_paths: list[Path] = [] @@ -98,9 +154,11 @@ def get_team(self, name: str) -> TeamConfig: raise KeyError(f"Unknown team '{name}'. Available teams: {available}") from None def show(self) -> str: - data: dict = {"general": {}, "team": {}} + data: dict = {"general": {}, "ai": {}, "team": {}} for field, value in self.general.model_dump(exclude_none=True).items(): data["general"][field] = value + for field, value in self.ai.model_dump(exclude_none=True).items(): + data["ai"][field] = value for team_name, team in self.team.items(): data["team"][team_name] = team.model_dump(exclude_none=True) @@ -184,6 +242,9 @@ def load_config(user_config_path: Path | None) -> StarTriageConfig: # Merge general section merged_general = {**defaults.get("general", {}), **user.get("general", {})} + # Merge ai section (user overrides defaults field-by-field) + merged_ai = {**defaults.get("ai", {}), **user.get("ai", {})} + # Merge team sections field-by-field so a sparse user section doesn't lose defaults default_teams = defaults.get("team", {}) user_teams = user.get("team", {}) @@ -193,7 +254,7 @@ def load_config(user_config_path: Path | None) -> StarTriageConfig: } return StarTriageConfig.model_validate( - {"general": merged_general, "team": merged_teams, "loaded_paths": loaded_paths} + {"general": merged_general, "ai": merged_ai, "team": merged_teams, "loaded_paths": loaded_paths} ) diff --git a/startriage/data/defaults.toml b/startriage/data/defaults.toml index 58f8c49..35f1376 100644 --- a/startriage/data/defaults.toml +++ b/startriage/data/defaults.toml @@ -7,6 +7,14 @@ lp_triage_updates = "theirs" #default_team = # automatic if 1 defined #lp_extended = # depends on mode +[ai] +# AI-assisted triage backend (see 'startriage config set --help'). +#provider = "copilot" # "copilot" | "openrouter" +#model = "claude-opus-4.8" # copilot default; set your own for openrouter +#github_token = "github_pat_..." # or COPILOT_GITHUB_TOKEN / GH_TOKEN env +#openrouter_api_key = "..." # or OPENROUTER_API_KEY env +#openrouter_base_url = "https://openrouter.ai/api/v1" + [team.ubuntu-server] lp_team = "ubuntu-server" lp_todo_tag = "server-todo" diff --git a/startriage/enums.py b/startriage/enums.py index c2a5cfb..0a5507c 100644 --- a/startriage/enums.py +++ b/startriage/enums.py @@ -19,3 +19,10 @@ class FetchMode(StrEnum): triage = "triage" # date-range bugs for daily triage todo = "todo" # tag-filtered housekeeping bugs subscribed = "subscribed" # list subscribed bugs + + +class AIProvider(StrEnum): + """LLM backend used for agentic triage.""" + + copilot = "copilot" # GitHub Copilot SDK (default); GitHub token auth + openrouter = "openrouter" # OpenAI-compatible BYOK endpoint diff --git a/tests/test_config.py b/tests/test_config.py index dc1d98b..6366c7d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -8,7 +8,8 @@ import pytest from pydantic import ValidationError -from startriage.config import load_config +from startriage.config import AIConfigError, load_config, update_user_config +from startriage.enums import AIProvider def _write_toml(tmp_path: Path, content: str) -> Path: @@ -122,3 +123,106 @@ def test_github_token_config(tmp_path): ) config = load_config(p) assert config.general.github_token == "ghp_secret" + + +def test_ai_defaults(tmp_path): + """No [ai] section yields sensible Copilot defaults.""" + config = load_config(tmp_path / "nonexistent.toml") + assert config.ai.provider is AIProvider.copilot + assert config.ai.model == "claude-opus-4.8" + assert config.ai.openrouter_base_url == "https://openrouter.ai/api/v1" + + +def test_ai_override(tmp_path): + p = _write_toml( + tmp_path, + """\ + [ai] + provider = "openrouter" + model = "anthropic/claude-3.5-sonnet" + openrouter_api_key = "or_secret" + """, + ) + config = load_config(p) + assert config.ai.provider is AIProvider.openrouter + assert config.ai.model == "anthropic/claude-3.5-sonnet" + assert config.ai.openrouter_api_key == "or_secret" + + +def test_ai_invalid_provider(tmp_path): + p = _write_toml( + tmp_path, + """\ + [ai] + provider = "bogus" + """, + ) + with pytest.raises(ValidationError): + load_config(p) + + +def test_ai_extra_field_rejected(tmp_path): + p = _write_toml( + tmp_path, + """\ + [ai] + typo_field = true + """, + ) + with pytest.raises(ValidationError): + load_config(p) + + +def test_ai_resolve_token_prefers_config(tmp_path, monkeypatch): + monkeypatch.setenv("COPILOT_GITHUB_TOKEN", "env_token") + p = _write_toml( + tmp_path, + """\ + [ai] + github_token = "cfg_token" + """, + ) + config = load_config(p) + assert config.ai.resolve_token() == "cfg_token" + + +def test_ai_resolve_token_from_env(tmp_path, monkeypatch): + for var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"): + monkeypatch.delenv(var, raising=False) + monkeypatch.setenv("GH_TOKEN", "env_token") + config = load_config(tmp_path / "nonexistent.toml") + assert config.ai.resolve_token() == "env_token" + + +def test_ai_require_configured_copilot_missing(tmp_path, monkeypatch): + for var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"): + monkeypatch.delenv(var, raising=False) + config = load_config(tmp_path / "nonexistent.toml") + with pytest.raises(AIConfigError, match="Copilot"): + config.ai.require_configured() + + +def test_ai_require_configured_openrouter_missing(tmp_path, monkeypatch): + for var in ("STARTRIAGE_AI_OPENROUTER_KEY", "OPENROUTER_API_KEY"): + monkeypatch.delenv(var, raising=False) + p = _write_toml( + tmp_path, + """\ + [ai] + provider = "openrouter" + """, + ) + config = load_config(p) + with pytest.raises(AIConfigError, match="OpenRouter"): + config.ai.require_configured() + + +def test_ai_secret_written_with_restricted_perms(tmp_path): + path = tmp_path / "startriage.toml" + update_user_config( + {"ai": {"openrouter_api_key": "or_secret"}}, + config_path=path, + sensitive=True, + ) + assert load_config(path).ai.openrouter_api_key == "or_secret" + assert (path.stat().st_mode & 0o777) == 0o600 From ff1d8dec78e3e6f94fa2892b8ad21ca53f5dcaa0 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Mon, 15 Jun 2026 22:49:33 -0300 Subject: [PATCH 02/11] Add custom to_agent_payload method --- startriage/sources/launchpad/models.py | 79 +++++++++++++ tests/test_launchpad_models.py | 153 +++++++++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 tests/test_launchpad_models.py diff --git a/startriage/sources/launchpad/models.py b/startriage/sources/launchpad/models.py index 06d12a9..ee5736b 100644 --- a/startriage/sources/launchpad/models.py +++ b/startriage/sources/launchpad/models.py @@ -36,6 +36,37 @@ def mark(text: str, color: str) -> str: return "".join([color, text, COLOR_RESET]) +def _name_from_link(link: str | None) -> str | None: + """Extract a username from a Launchpad person link (``.../~username``).""" + if not link or "~" not in link: + return None + return link.split("~")[-1] + + +def _affected_from_task(lp_task: Any) -> dict[str, Any]: + """Best-effort structured description of one bug task (affected target). + + Always includes the human-readable ``target`` (e.g. ``"pkg (Ubuntu Jammy)"``) + plus the task ``status``/``importance``. For distribution source-package + tasks it additionally resolves ``distro``, ``package`` and ``series``. + Launchpad bug tasks do not expose a package version, so none is reported. + """ + entry: dict[str, Any] = { + "target": lp_task.bug_target_name, + "status": lp_task.status, + "importance": lp_task.importance, + } + parts = str(lp_task).split("/") + if "+source" in parts and len(parts) >= 5: + si = parts.index("+source") + distro = parts[4] + before = parts[si - 1] + entry["distro"] = distro + entry["package"] = parts[si + 1] if si + 1 < len(parts) else None + entry["series"] = before if before != distro else None + return entry + + @dataclass class RenderContext: """Render-time state passed explicitly to Task display methods. @@ -359,6 +390,54 @@ def to_dict(self, ctx: RenderContext) -> dict: "sibling_task_status": sibling_status, } + def to_agent_payload(self) -> dict[str, Any]: + """Build the rich, JSON-serialisable bug context handed to the AI agent. + + Unlike :meth:`to_dict` (terminal/markdown rendering metadata), this pulls + the full report body the agent needs to triage: description, every + comment, attachments, all affected targets, duplicate-of, and heat. + + Accessing these fields triggers lazy launchpadlib fetches, so call this + off the event loop (e.g. via ``asyncio.to_thread``), mirroring the + finder's threaded LP access. + """ + bug = self.lp_task.bug + + comments: list[dict[str, Any]] = [] + # messages[0] is the original report (already captured by ``description``); + # the remainder are follow-up comments. + for msg in list(bug.messages)[1:]: + comments.append( + { + "author": _name_from_link(msg.owner_link), + "date": msg.date_created.isoformat() if msg.date_created else None, + "text": msg.content, + } + ) + + attachments = [ + {"title": att.title, "type": att.type, "is_patch": att.type == "Patch"} for att in bug.attachments + ] + + duplicate_of = bug.duplicate_of + duplicate_of_number = str(duplicate_of.id) if duplicate_of else None + + return { + "number": self.number, + "url": self.url, + "title": self.title, + "short_title": self.short_title, + "description": bug.description, + "status": self.status, + "importance": self.importance, + "tags": list(self.tags), + "heat": bug.heat, + "duplicate_of": duplicate_of_number, + "affected": [_affected_from_task(t) for t in self._all_bug_tasks], + "attachments": attachments, + "comments": comments, + } + @dataclass class LaunchpadTasks: diff --git a/tests/test_launchpad_models.py b/tests/test_launchpad_models.py new file mode 100644 index 0000000..1127d7d --- /dev/null +++ b/tests/test_launchpad_models.py @@ -0,0 +1,153 @@ +"""Tests for the Launchpad Task model, focused on the AI agent payload.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +from startriage.sources.launchpad.models import ( + DISTRIBUTION_SOURCE_PACKAGE_RESOURCE_TYPE_LINK, + Task, +) + + +class _FakeMessage: + def __init__(self, owner_link, date_created, content): + self.owner_link = owner_link + self.date_created = date_created + self.content = content + + +class _FakeAttachment: + def __init__(self, title, type_): + self.title = title + self.type = type_ + + +class _FakeDuplicate: + def __init__(self, id_): + self.id = id_ + + +class _FakeTarget: + resource_type_link = DISTRIBUTION_SOURCE_PACKAGE_RESOURCE_TYPE_LINK + + +class _FakeLPTask: + """Minimal stand-in for a launchpadlib bug_task entry.""" + + def __init__(self, api_url, *, status, importance, target_name, bug=None): + self._api_url = api_url + self.status = status + self.importance = importance + self.bug_target_name = target_name + self.title = "Bug #123 in pkg (Ubuntu): boom on start" + self.assignee_link = None + self.target = _FakeTarget() + self.bug = bug + + def __str__(self): + return self._api_url + + +class _FakeBug: + def __init__(self, *, bug_tasks, messages, attachments, duplicate_of): + self.description = "It crashes immediately." + self.tags = ["amd64", "regression-release"] + self.date_last_updated = datetime(2026, 6, 1, tzinfo=timezone.utc) + self.heat = 42 + self.messages = messages + self.attachments = attachments + self.duplicate_of = duplicate_of + self.bug_tasks = bug_tasks + + +def _build_task(duplicate_of=None) -> Task: + devel_url = "https://api.launchpad.net/devel/ubuntu/+source/pkg/+bug/123" + jammy_url = "https://api.launchpad.net/devel/ubuntu/jammy/+source/pkg/+bug/123" + + messages = [ + _FakeMessage( + "https://api.launchpad.net/devel/~reporter", + datetime(2026, 5, 1, tzinfo=timezone.utc), + "original report body", + ), + _FakeMessage( + "https://api.launchpad.net/devel/~helper", + datetime(2026, 5, 2, tzinfo=timezone.utc), + "have you tried turning it off and on again?", + ), + ] + attachments = [ + _FakeAttachment("crash.txt", "Unspecified"), + _FakeAttachment("fix.patch", "Patch"), + ] + + devel_task = _FakeLPTask(devel_url, status="New", importance="Undecided", target_name="pkg (Ubuntu)") + jammy_task = _FakeLPTask( + jammy_url, status="Confirmed", importance="High", target_name="pkg (Ubuntu Jammy)" + ) + + bug = _FakeBug( + bug_tasks=[devel_task, jammy_task], + messages=messages, + attachments=attachments, + duplicate_of=duplicate_of, + ) + devel_task.bug = bug + jammy_task.bug = bug + + return Task(devel_task, subscribed=False, last_activity_ours=False) + + +def test_to_agent_payload_core_fields(): + payload = _build_task().to_agent_payload() + assert payload["number"] == "123" + assert payload["url"] == "https://bugs.launchpad.net/ubuntu/+bug/123" + assert payload["description"] == "It crashes immediately." + assert payload["status"] == "New" + assert payload["importance"] == "Undecided" + assert payload["tags"] == ["amd64", "regression-release"] + assert payload["heat"] == 42 + assert payload["duplicate_of"] is None + + +def test_to_agent_payload_comments_skip_original_report(): + payload = _build_task().to_agent_payload() + # The first message is the original report (covered by description). + assert len(payload["comments"]) == 1 + comment = payload["comments"][0] + assert comment["author"] == "helper" + assert comment["text"] == "have you tried turning it off and on again?" + assert comment["date"] == "2026-05-02T00:00:00+00:00" + + +def test_to_agent_payload_attachments(): + payload = _build_task().to_agent_payload() + assert payload["attachments"] == [ + {"title": "crash.txt", "type": "Unspecified", "is_patch": False}, + {"title": "fix.patch", "type": "Patch", "is_patch": True}, + ] + + +def test_to_agent_payload_affected_targets(): + payload = _build_task().to_agent_payload() + affected = payload["affected"] + assert len(affected) == 2 + + devel = affected[0] + assert devel["target"] == "pkg (Ubuntu)" + assert devel["package"] == "pkg" + assert devel["distro"] == "ubuntu" + assert devel["series"] is None + assert devel["status"] == "New" + + jammy = affected[1] + assert jammy["package"] == "pkg" + assert jammy["series"] == "jammy" + assert jammy["status"] == "Confirmed" + assert jammy["importance"] == "High" + + +def test_to_agent_payload_duplicate_of(): + payload = _build_task(duplicate_of=_FakeDuplicate(999)).to_agent_payload() + assert payload["duplicate_of"] == "999" From 008d8830ef5a25e46b0ecff0462a4dc0f70c57f9 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Mon, 15 Jun 2026 23:05:02 -0300 Subject: [PATCH 03/11] Add provider layer to copilot+openrouter --- startriage/ai/__init__.py | 21 ++++++ startriage/ai/provider.py | 137 ++++++++++++++++++++++++++++++++++++++ tests/test_ai_provider.py | 120 +++++++++++++++++++++++++++++++++ 3 files changed, 278 insertions(+) create mode 100644 startriage/ai/__init__.py create mode 100644 startriage/ai/provider.py create mode 100644 tests/test_ai_provider.py diff --git a/startriage/ai/__init__.py b/startriage/ai/__init__.py new file mode 100644 index 0000000..59bfa5d --- /dev/null +++ b/startriage/ai/__init__.py @@ -0,0 +1,21 @@ +"""AI/agentic triage layer for startriage.""" + +from __future__ import annotations + +from .provider import ( + CopilotProvider, + FakeProvider, + Provider, + build_client_kwargs, + build_provider, + build_session_kwargs, +) + +__all__ = [ + "CopilotProvider", + "FakeProvider", + "Provider", + "build_client_kwargs", + "build_provider", + "build_session_kwargs", +] diff --git a/startriage/ai/provider.py b/startriage/ai/provider.py new file mode 100644 index 0000000..e79c695 --- /dev/null +++ b/startriage/ai/provider.py @@ -0,0 +1,137 @@ +"""Provider abstraction over the Copilot SDK for agentic triage. + +The Copilot CLI is itself the agent loop (built-in shell/file/web tools plus its +own tool-calling loop), so a "provider" is deliberately thin: it only starts a +session with the right auth/model and returns the agent's final assistant message. + +The only thing that differs between providers is *where* the credential goes: + +- **Copilot** authenticates the CLI process itself, so its GitHub token is a + ``CopilotClient(...)`` kwarg (see :func:`build_client_kwargs`). +- **OpenRouter** is BYOK through the same loop, supplied as the + ``create_session(provider=...)`` kwarg (see :func:`build_session_kwargs`). +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any + +from ..config import AIConfig +from ..enums import AIProvider + + +class Provider(ABC): + """A backend capable of running one agent session and returning its final text.""" + + #: Model id passed to the underlying session. + model: str + + @abstractmethod + async def run(self, system_prompt: str, user_message: str) -> str: + """Run a single agent session and return the final assistant text.""" + raise NotImplementedError + + +def build_client_kwargs(ai_config: AIConfig) -> dict[str, Any]: + """Build the ``CopilotClient(...)`` kwargs for ``ai_config``. + + For the Copilot provider this carries the GitHub token that authenticates the + CLI process (optional here — the SDK also reads it from the environment). For + OpenRouter (BYOK) the credential travels on the session instead, so no client + auth is needed. The token is resolved with config-over-env precedence via + :meth:`AIConfig.resolve_token`. + """ + if ai_config.provider is AIProvider.copilot: + token = ai_config.resolve_token() + if token: + return {"github_token": token} + return {} + + +def build_session_kwargs(ai_config: AIConfig) -> dict[str, Any]: + """Build the ``create_session(...)`` provider kwargs for ``ai_config``. + + Only OpenRouter (BYOK) contributes here, as an OpenAI-compatible ``provider`` + block; the Copilot provider authenticates at the client level instead. + """ + if ai_config.provider is AIProvider.openrouter: + return { + "provider": { + "type": "openai", + "base_url": ai_config.openrouter_base_url, + "api_key": ai_config.resolve_token(), + } + } + return {} + + +class CopilotProvider(Provider): + """Real provider backed by the Copilot Python SDK (lazily imported). + + The SDK (and the Node Copilot CLI it spawns) is imported only when a session is + actually run, so non-AI commands and offline tests never need it installed. + All tools are auto-approved so unattended runs never block on a prompt; the + safety boundary is snap confinement plus a dedicated scratch dir, not an + allow-list. + """ + + def __init__(self, ai_config: AIConfig) -> None: + self._ai_config = ai_config + self.model = ai_config.model + + async def run(self, system_prompt: str, user_message: str) -> str: + # Lazy import keeps the SDK (and the Node CLI it spawns) optional; it is + # bundled by the snap rather than declared as a hard Python dependency. + from copilot import CopilotClient # ty: ignore[unresolved-import] + from copilot.session import PermissionHandler # ty: ignore[unresolved-import] + + async with CopilotClient(**build_client_kwargs(self._ai_config)) as client: + async with await client.create_session( + on_permission_request=PermissionHandler.approve_all, + model=self.model, + # "append" keeps the CLI's tool-use foundation and layers our + # behavioural prompt on top ("replace" would drop its guardrails). + system_message={"mode": "append", "content": system_prompt}, + **build_session_kwargs(self._ai_config), + ) as session: + message = await session.send_and_wait(user_message) + return (message.data.content or "") if message else "" + + +class FakeProvider(Provider): + """Deterministic in-memory provider for offline tests. + + Returns queued ``responses`` in order, falling back to ``default_response`` once + the queue is drained, and records every ``(system_prompt, user_message)`` call + on :attr:`calls` for assertions. + """ + + def __init__( + self, + responses: list[str] | None = None, + *, + model: str = "fake-model", + default_response: str = "", + ) -> None: + self.model = model + self._responses = list(responses or []) + self._default_response = default_response + self.calls: list[tuple[str, str]] = [] + + async def run(self, system_prompt: str, user_message: str) -> str: + self.calls.append((system_prompt, user_message)) + if self._responses: + return self._responses.pop(0) + return self._default_response + + +def build_provider(ai_config: AIConfig) -> Provider: + """Return a ready provider for ``ai_config``, validating credentials first. + + Raises :class:`AIConfigError` (via :meth:`AIConfig.require_configured`) when the + active provider has no usable credential, so callers fail smoothly before any + session is started. + """ + ai_config.require_configured() + return CopilotProvider(ai_config) diff --git a/tests/test_ai_provider.py b/tests/test_ai_provider.py new file mode 100644 index 0000000..634c24b --- /dev/null +++ b/tests/test_ai_provider.py @@ -0,0 +1,120 @@ +"""Tests for the AI provider layer (selection, kwargs, fake round-trip).""" + +from __future__ import annotations + +import pytest + +from startriage.ai import ( + CopilotProvider, + FakeProvider, + build_client_kwargs, + build_provider, + build_session_kwargs, +) +from startriage.config import AIConfig, AIConfigError +from startriage.enums import AIProvider + + +@pytest.fixture(autouse=True) +def _clear_ai_env(monkeypatch): + for var in ( + "COPILOT_GITHUB_TOKEN", + "GH_TOKEN", + "GITHUB_TOKEN", + "STARTRIAGE_AI_OPENROUTER_KEY", + "OPENROUTER_API_KEY", + ): + monkeypatch.delenv(var, raising=False) + + +def test_build_session_kwargs_copilot_with_token(): + cfg = AIConfig(github_token="github_pat_abc") + # The Copilot token authenticates the client, not the session. + assert build_client_kwargs(cfg) == {"github_token": "github_pat_abc"} + assert build_session_kwargs(cfg) == {} + + +def test_build_session_kwargs_copilot_without_token(): + # No config token and no env var -> SDK is left to read the env itself. + assert build_client_kwargs(AIConfig()) == {} + assert build_session_kwargs(AIConfig()) == {} + + +def test_build_session_kwargs_copilot_token_from_env(monkeypatch): + monkeypatch.setenv("COPILOT_GITHUB_TOKEN", "env_token") + assert build_client_kwargs(AIConfig()) == {"github_token": "env_token"} + assert build_session_kwargs(AIConfig()) == {} + + +def test_build_session_kwargs_openrouter(): + cfg = AIConfig( + provider=AIProvider.openrouter, + model="anthropic/claude-3.5", + openrouter_api_key="sk-or-1", + ) + # BYOK travels on the session; the client needs no auth. + assert build_client_kwargs(cfg) == {} + assert build_session_kwargs(cfg) == { + "provider": { + "type": "openai", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-1", + } + } + + +def test_build_session_kwargs_openrouter_custom_base_url(): + cfg = AIConfig( + provider=AIProvider.openrouter, + openrouter_api_key="sk-or-2", + openrouter_base_url="https://example.test/v1", + ) + assert build_session_kwargs(cfg)["provider"]["base_url"] == "https://example.test/v1" + + +def test_build_provider_returns_copilot_provider(): + provider = build_provider(AIConfig(github_token="github_pat_abc")) + assert isinstance(provider, CopilotProvider) + assert provider.model == "claude-opus-4.8" + + +def test_build_provider_openrouter_uses_configured_model(): + cfg = AIConfig( + provider=AIProvider.openrouter, + model="anthropic/claude-3.5", + openrouter_api_key="sk-or-1", + ) + assert build_provider(cfg).model == "anthropic/claude-3.5" + + +def test_build_provider_missing_copilot_credential(): + with pytest.raises(AIConfigError, match="Copilot"): + build_provider(AIConfig()) + + +def test_build_provider_missing_openrouter_credential(): + with pytest.raises(AIConfigError, match="OpenRouter"): + build_provider(AIConfig(provider=AIProvider.openrouter)) + + +@pytest.mark.asyncio +async def test_fake_provider_round_trip_queued_responses(): + provider = FakeProvider(["first", "second"], model="fake-x") + assert provider.model == "fake-x" + + assert await provider.run("sys", "bug-1") == "first" + assert await provider.run("sys", "bug-2") == "second" + # Queue drained -> default response. + assert await provider.run("sys", "bug-3") == "" + + assert provider.calls == [ + ("sys", "bug-1"), + ("sys", "bug-2"), + ("sys", "bug-3"), + ] + + +@pytest.mark.asyncio +async def test_fake_provider_default_response(): + provider = FakeProvider(default_response="canned") + assert await provider.run("sys", "anything") == "canned" From 3cb5ec6fbb3967b2b7b033543b70bf0923c87648 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Mon, 15 Jun 2026 23:15:55 -0300 Subject: [PATCH 04/11] Add agent loop and result contract for the AI-assisted triage Also make the agent perma prompt part of the package --- pyproject.toml | 2 +- startriage/ai/__init__.py | 17 ++ startriage/ai/agent.py | 73 +++++++++ startriage/ai/contract.py | 90 +++++++++++ startriage/data/agents_prompt.md | 259 +++++++++++++++++++++++++++++++ startriage/enums.py | 18 +++ tests/test_ai_agent.py | 176 +++++++++++++++++++++ 7 files changed, 634 insertions(+), 1 deletion(-) create mode 100644 startriage/ai/agent.py create mode 100644 startriage/ai/contract.py create mode 100644 startriage/data/agents_prompt.md create mode 100644 tests/test_ai_agent.py diff --git a/pyproject.toml b/pyproject.toml index 81b415a..4ac126f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ build-backend = "setuptools.build_meta" [tool.setuptools] packages = {find = {where = ["."]}} -package-data = {"startriage" = ["data/*.toml"]} +package-data = {"startriage" = ["data/*.toml", "data/*.md"]} [tool.setuptools_scm] version_scheme = "only-version" diff --git a/startriage/ai/__init__.py b/startriage/ai/__init__.py index 59bfa5d..7324732 100644 --- a/startriage/ai/__init__.py +++ b/startriage/ai/__init__.py @@ -2,6 +2,14 @@ from __future__ import annotations +from .agent import BugOutcome, load_system_prompt, triage_bug, triage_bugs +from .contract import ( + AgentResult, + AgentResultError, + ProposedFix, + extract_json_block, + parse_agent_result, +) from .provider import ( CopilotProvider, FakeProvider, @@ -12,10 +20,19 @@ ) __all__ = [ + "AgentResult", + "AgentResultError", + "BugOutcome", "CopilotProvider", "FakeProvider", + "ProposedFix", "Provider", "build_client_kwargs", "build_provider", "build_session_kwargs", + "extract_json_block", + "load_system_prompt", + "parse_agent_result", + "triage_bug", + "triage_bugs", ] diff --git a/startriage/ai/agent.py b/startriage/ai/agent.py new file mode 100644 index 0000000..a9912a2 --- /dev/null +++ b/startriage/ai/agent.py @@ -0,0 +1,73 @@ +"""Sequential agent loop: run one triage session per bug, skip-and-continue. + +The provider (see :mod:`startriage.ai.provider`) runs the agent and returns its +final text; this module loads the behavioural system prompt, feeds each bug's +payload as the user message, and parses the result via the contract. A failure on +one bug is recorded and the run continues with the next, never aborting the batch. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from importlib.resources import files + +from .contract import AgentResult, AgentResultError, parse_agent_result +from .provider import Provider + + +@dataclass +class BugOutcome: + """Result of triaging a single bug: either a parsed result or a failure.""" + + bug: str + result: AgentResult | None + error: str | None + raw: str + + @property + def ok(self) -> bool: + return self.result is not None + + +def load_system_prompt() -> str: + """Load the agent behavioural prompt shipped as a package resource.""" + prompt_path = files("startriage") / "data" / "agents_prompt.md" + return prompt_path.read_text(encoding="utf-8") + + +async def triage_bug( + provider: Provider, + payload: dict, + system_prompt: str, +) -> BugOutcome: + """Run one agent session for ``payload`` and parse its result. + + Never raises for triage/agent failures: any error is captured on the returned + :class:`BugOutcome` so the caller can record it and continue. + """ + bug = str(payload.get("number", "")) + user_message = json.dumps(payload, ensure_ascii=False) + try: + raw = await provider.run(system_prompt, user_message) + except Exception as exc: + # Record any provider/runtime failure and keep going (skip-and-continue). + return BugOutcome(bug=bug, result=None, error=f"provider error: {exc}", raw="") + try: + result = parse_agent_result(raw) + except AgentResultError as exc: + return BugOutcome(bug=bug, result=None, error=str(exc), raw=raw) + return BugOutcome(bug=bug, result=result, error=None, raw=raw) + + +async def triage_bugs( + provider: Provider, + payloads: list[dict], + system_prompt: str | None = None, +) -> list[BugOutcome]: + """Triage ``payloads`` sequentially, recording per-bug failures and continuing.""" + prompt = system_prompt if system_prompt is not None else load_system_prompt() + outcomes: list[BugOutcome] = [] + for payload in payloads: + outcomes.append(await triage_bug(provider, payload, prompt)) + return outcomes diff --git a/startriage/ai/contract.py b/startriage/ai/contract.py new file mode 100644 index 0000000..2fb1c4e --- /dev/null +++ b/startriage/ai/contract.py @@ -0,0 +1,90 @@ +"""Agent → tool result contract: the JSON each bug triage must return. + +The Copilot CLI returns a free-text final assistant message, so the agent is +instructed to end with a single fenced ``json`` block. This module extracts that +block, parses it, and validates it against the schema in ``agents_prompt.md``. +Validation is enforced in code (status / fix-kind enums) so a hallucinated or +malformed result is rejected rather than trusted. +""" + +from __future__ import annotations + +import json +import re + +from pydantic import BaseModel, ConfigDict, ValidationError + +from ..enums import ProposedFixKind, TriageStatus + +# Matches fenced code blocks, optionally tagged with a language (e.g. ```json). +_FENCED_BLOCK = re.compile( + r"```[ \t]*([A-Za-z0-9_+-]*)[ \t]*\r?\n(.*?)\r?\n```", + re.DOTALL, +) + + +class AgentResultError(ValueError): + """Raised when the agent's output cannot be parsed/validated as a result.""" + + +class ProposedFix(BaseModel): + model_config = ConfigDict(extra="forbid") + + kind: ProposedFixKind + value: str = "" + + +class AgentResult(BaseModel): + """One bug's triage result, as returned by the agent and rendered by the tool.""" + + # Tolerate extra keys: LLM output is noisy and harmless additions should not + # fail an otherwise-valid result. The fields below are still validated strictly. + model_config = ConfigDict(extra="ignore") + + bug: str + package: str = "" + short_title: str = "" + status: TriageStatus + tags: list[str] = [] + analysis: str = "" + thought_process: str = "" + proposed_fix: ProposedFix + references: list[str] = [] + suggested_improvements: str = "" + + +def extract_json_block(text: str) -> str: + """Return the JSON payload of the last fenced block in ``text``. + + Prefers a ```json-tagged block; falls back to the last untagged fenced block so + a missing language hint does not break parsing. Raises :class:`AgentResultError` + when no fenced block is present. + """ + matches = _FENCED_BLOCK.findall(text) + if not matches: + raise AgentResultError("no fenced code block found in agent output") + + json_blocks = [body for lang, body in matches if lang.lower() == "json"] + if json_blocks: + return json_blocks[-1].strip() + # No language-tagged json block; use the last fenced block of any kind. + return matches[-1][1].strip() + + +def parse_agent_result(text: str) -> AgentResult: + """Extract, decode, and validate a single :class:`AgentResult` from agent text. + + Raises :class:`AgentResultError` on a missing block, invalid JSON, or schema / + enum validation failure. + """ + block = extract_json_block(text) + try: + data = json.loads(block) + except json.JSONDecodeError as exc: + raise AgentResultError(f"agent output is not valid JSON: {exc}") from exc + if not isinstance(data, dict): + raise AgentResultError("agent JSON result must be an object") + try: + return AgentResult.model_validate(data) + except ValidationError as exc: + raise AgentResultError(f"agent result failed validation: {exc}") from exc diff --git a/startriage/data/agents_prompt.md b/startriage/data/agents_prompt.md new file mode 100644 index 0000000..c9963cc --- /dev/null +++ b/startriage/data/agents_prompt.md @@ -0,0 +1,259 @@ + +Role + +You are a Senior software engineer working for Ubuntu, responsible for triaging bugs so other engineers work on it if needed. Your job is not to fix everything, but rather filter out and point engineers to good resources about the bug and give them options on how to proceed. You get a list of bugs as input and iterate through them, performing the actions below. + +Actions # what are the possible actions it should perform, and how to perform them + +Perform these actions in order for each bug. Use the results of earlier steps to inform later ones. + +### 1. Validate the Report + +Check the following criteria: +- Does the report identify at least one specific source package? +- Does the described problem target the correct package? (e.g., is the user blaming package A when the fault is in package B?) +- Does it describe a specific fault, error, or incorrect behavior? + +For feature requests are valid but need to be flagged so, and does not need to be triaged further once it makes sense. To validate a feature request, consider: +- Is it available on a new version? +- Does it exist already or needs to be implemented? +- Is it suitable for upstreaming? +- Just a simple flag change or bigger effort? + +If validation fails: +- Missing information (no package, no version, no reproduction steps) → recommend status **Incomplete**. Specify what information is needed. +- Not a bug (support request, expected behavior, configuration error, unsupported setup) → recommend status **Invalid**. Explain why. +- Process ticket (sync request, merge request, SRU, MIR, freeze exception) → recommend **no-change**. These are tracked separately. + + +If validation passes, proceed to step 2. + + + +### 2. Search for Duplicates and Existing Fixes + + +Perform these searches in parallel: + +#### 2.1 Search Launchpad for duplicates +- URL pattern: `https://bugs.launchpad.net/ubuntu/+source/SOURCE_PACKAGE/+bugs?field.searchtext=SEARCH_TERMS` +- Look for bugs with matching symptoms. If a duplicate is found, recommend marking the current bug as a duplicate of the older/better-reported one. + + +#### 2.2 Search Debian for related bugs or fixes +- URL pattern: `https://bugs.debian.org/cgi-bin/pkgreport.cgi?archive=both;src=SOURCE_PACKAGE` +- Look for matching bugs. If a fix exists in Debian, note the Debian bug number and the fix (patch, version, or commit). + + +If either search finds a clear solution (existing duplicate, or fix already in Debian), note it and proceed to step 5 (Describe the Bug). + +If it does not, still give us the references so we may use it as part of the final decision. + + + +### 3. Search Upstream + + +- Determine the upstream repository, homepage, and bug tracker. Sources (in order): + the bug's affected source package page on Launchpad, the package's `debian/control` + `Homepage:` field, and `debian/watch`. (A package-metadata cache is planned for a + future iteration but is NOT available yet — do not rely on it.) +- Search the upstream bug tracker and/or git repository for matching issues or commits. +- If the upstream project uses GitHub, GitLab, or similar, search the issues and recent commits. + + +If a fix is found upstream, note the commit hash or issue URL and proceed to step 5. +If it does not, still give us the references so we may use it as part of the final decision. + + + +### 4. Search Other Distributions + + +If steps 2-3 did not yield a solution, search other distributions. Prioritize in this order: + + +- **Fedora**: `https://bugzilla.redhat.com/buglist.cgi?query_format=specific&order=relevance+desc&bug_status=__open__&product=Fedora&content=SEARCH_TERMS` +- **Arch Linux**: `https://gitlab.archlinux.org/archlinux/packaging/packages/PACKAGE_NAME/-/issues` or `https://bugs.archlinux.org/` (for legacy bugs) +- **Gentoo**: `https://bugs.gentoo.org/buglist.cgi?query_format=specific&order=relevance+desc&bug_status=__open__&content=SEARCH_TERMS` +- Other distros may be searched if the above yield nothing. + + + +### 5. Describe the Bug + + +Write a structured description containing: +- **Affected package(s):** source package name(s) +- **Affected version(s):** package version and Ubuntu release(s) +- **Symptoms:** what goes wrong (error messages, crashes, incorrect output) +- **Reproduction steps:** how to trigger the bug (if known) +- **Impact:** who is affected and how severely (data loss? service interruption? cosmetic?) +- **Related bugs:** LP duplicates, Debian bugs, upstream issues found in steps 2-4 + + +### 6. Analyze the Source Code + + +If the bug appears valid and no fix was found in steps 2-4: +- Obtain the source code. Methods (in order of preference): + + 1. Download the source from launchpad directly using pull-lp-source, from the ubuntu-dev-tools package. This can fetch specific versions from specific releases. + +If it doesnt work for any reason (unexpected) + + 2. Find the upstream repository from the `debian/watch` file or `debian/control` Homepage field and clone/inspect it. +- Search for the code responsible for the reported error (grep for error messages, function names, etc.). +- Identify the offending lines and explain the root cause. + + +### 7. Propose a Fix + + +- If a fix was found in steps 2-4 (Debian patch, upstream commit, other distro patch), reference it and confirm it applies to the affected Ubuntu source version. +- If no existing fix was found but the root cause is clear from step 6, write a proposed fix as a unified diff. +- The proposed fix goes ONLY in your returned result (the `proposed_fix` field). Do not apply it to any source tree. +- If you cannot produce a fix with reasonable confidence, set `proposed_fix.kind` to `none`. + + + +Context # What is needed as background to perform the actions + +### Reference documentation +- Ubuntu Maintainers Handbook — Bug Triage: https://github.com/canonical/ubuntu-maintainers-handbook/blob/main/BugTriage.md + + +### Bug statuses +When recommending a status change, use one of these: +- **Invalid**: the report is not a bug, or the issue is already fixed in the reported version(s). +- **Incomplete**: more information is needed from the reporter before the bug can be acted on. +- **Triaged**: the bug is valid and reproducible; there may or may not be a known fix. +- Duplicate: self explanatory +- **no-change** (not a Launchpad status): leave the bug as-is in its current status ("New", “Confirmed”, etc). This means the bug needs more engineering input beyond what this triage can provide. + + +### Optional tags +- `server-todo`: the bug has a known fix or very high priority. The team should work on it soon. +- `bitesize`: the bug is actionable and the fix is straightforward (e.g., a patch is already available upstream or in Debian and applies cleanly). +- `server-triage-discuss`: the bug is ambiguous and should be discussed by the team in the next standup or weekly meeting. +- `regression-update`: the bug appears to be a regression caused by an SRU or security update. + + +### Definitions +- **Debdiff**: a unified diff between two versions of a Debian/Ubuntu source package, generated by `debdiff old.dsc new.dsc`. It shows all changes between the two versions. +- **SRU**: Stable Release Update — a bug fix backported to a stable (non-development) Ubuntu release. +- **MIR**: Main Inclusion Request — a request to promote a package from Universe to Main. + + +### Package cache +A per-package metadata cache is planned for a future iteration but is NOT available +yet. For now, derive upstream/Debian/homepage information from the bug's affected +source package, the package's `debian/control` `Homepage:` field, and `debian/watch`. + +### Special cases to be aware of +Certain packages have known triaging patterns (from the handbook): +- **MySQL**: check for duplicates first; many reports are common usage errors. Check `mysql-8.0` bugs sorted by heat. +- **libvirt/virtualization**: "permission denied" issues are often caused by AppArmor profiles applied by libvirt. Ask for `dmesg` AppArmor denials. + + + +Expectation # What do we expect as output/result + + +### Workflow + + +For each bug in the input: + + +1. **Validate** the report (Action step 1). If invalid or incomplete, record the status recommendation and stop processing this bug. +2. **Search** for duplicates and existing fixes (Action steps 2-4). Stop searching as soon as a feasible solution is found. +3. **Describe** the bug (Action step 5). +4. If the bug is valid and actionable: + a. **Analyze** the source code (Action step 6). + b. **Propose** a fix if possible (Action step 7). + + +### Output + + +You do NOT write any files. For each bug, **return a single JSON object** with this +exact schema (the surrounding tool renders it into the `autotriage-YYYY-MM-DD.md` +report): + +```json +{ + "bug": "NNNNNN", + "package": "", + "short_title": "", + "status": "Invalid|Incomplete|Triaged|Duplicate|no-change", + "tags": ["server-todo", "bitesize", "server-triage-discuss", "regression-update"], + "analysis": "", + "thought_process": "", + "proposed_fix": {"kind": "none|reference|diff", "value": ""}, + "references": [""] +} +``` + +Field guidance: +- `tags`: use an empty list `[]` when none apply. +- `analysis`: if Invalid/Incomplete, explain the reason for the status. If + Triaged/no-change/Duplicate, describe the bug, root-cause analysis, and any + related bugs/patches found. +- `thought_process`: summarize the investigation steps and reasoning, including + which searches were performed and what was or was not found. +- `proposed_fix`: `kind = "reference"` with a URL/commit when an existing fix was + found; `kind = "diff"` with a unified diff only when you generated one; otherwise + `kind = "none"`. + + +### When in doubt + + +If you cannot confidently determine the correct status or whether a fix applies, recommend **no-change** and add a note explaining the uncertainty. Suggest the `server-triage-discuss` tag so the team can review it. + + + + +Constraints # What the agent should explicitly NOT do + +1. **You do not write output files.** Return the JSON result described in Output; + the surrounding tool writes the report. You may freely use your shell/file tools + for *investigation* (e.g. `pull-lp-source`, `grep`, cloning upstream repos) inside + your scratch working directory. +2. **No hallucinated fixes.** If you cannot produce a fix with confidence that it is correct, set `proposed_fix.kind` to `none`. Do not invent plausible-looking patches. +3. **No patch application.** Do not generate or apply quilt patches. Do not modify any package source tree as a deliverable. Proposed fixes are returned as a unified diff in the `proposed_fix` field only. +4. **Read-only external access.** Do not post comments on bugs, change bug statuses, subscribe teams, or modify any external system. Your output is recommendations only; a human engineer will act on them. +5. **No speculation on internal architecture.** If you don't have enough information about a package's internals, say so rather than guessing. + + + +Assumptions # What the agent needs to assume before thinking about it + +1. The bug reporter is not necessarily a software engineer. They may be facing a configuration issue, using an unsupported setup (e.g., third-party packages/PPAs), or misidentifying the faulty package. +2. The bug may be a duplicate of an existing report. +3. The package version cited in the report may be outdated or incorrect. +4. Upstream or Debian may have already fixed the issue in a newer release. +5. The bug may affect multiple Ubuntu releases simultaneously. +6. The agent has read-only access to Launchpad, Debian BTS, upstream trackers, and other external resources unless explicitly stated otherwise. +7. Process tickets (syncs, merges, SRUs, MIRs) are out of scope for this triage workflow. + + +Replayability # How can the agent improve itself as we run it again and again + + +After triaging, perform this self-improvement step: + + +### Process improvements +Review your triage thought process and identify: +- Steps that could be automated or made more systematic. +- Information you needed but didn't have. +- Decisions that were difficult or ambiguous. + + +**Return** these as a `suggested_improvements` string (markdown) alongside your +result, proposing changes to any of the RACECAR sections (Role, Actions, Context, +Expectations, Constraints, Assumptions, Replayability) of this specification. The +tool appends it to the report as a `## Suggested Improvements` section. Do not write +any file yourself. diff --git a/startriage/enums.py b/startriage/enums.py index 0a5507c..4c29863 100644 --- a/startriage/enums.py +++ b/startriage/enums.py @@ -26,3 +26,21 @@ class AIProvider(StrEnum): copilot = "copilot" # GitHub Copilot SDK (default); GitHub token auth openrouter = "openrouter" # OpenAI-compatible BYOK endpoint + + +class TriageStatus(StrEnum): + """Status the agent recommends for a triaged bug.""" + + invalid = "Invalid" # not a bug, or already fixed in the reported version + incomplete = "Incomplete" # more information needed from the reporter + triaged = "Triaged" # valid and reproducible + duplicate = "Duplicate" # duplicate of another report + no_change = "no-change" # leave as-is; needs further engineering input + + +class ProposedFixKind(StrEnum): + """How the agent's proposed fix should be interpreted.""" + + none = "none" # no fix proposed + reference = "reference" # value is a URL/commit pointing at an existing fix + diff = "diff" # value is a unified diff generated by the agent diff --git a/tests/test_ai_agent.py b/tests/test_ai_agent.py new file mode 100644 index 0000000..58f6b0f --- /dev/null +++ b/tests/test_ai_agent.py @@ -0,0 +1,176 @@ +"""Tests for the agent result contract and the sequential triage loop.""" + +from __future__ import annotations + +import json + +import pytest + +from startriage.ai import ( + AgentResult, + AgentResultError, + FakeProvider, + extract_json_block, + load_system_prompt, + parse_agent_result, + triage_bugs, +) +from startriage.enums import ProposedFixKind, TriageStatus + +_VALID_RESULT = { + "bug": "123", + "package": "pkg", + "short_title": "boom on start", + "status": "Triaged", + "tags": ["server-todo"], + "analysis": "It crashes.", + "thought_process": "Looked at logs.", + "proposed_fix": {"kind": "reference", "value": "https://example.test/commit"}, + "references": ["https://bugs.launchpad.net/ubuntu/+bug/123"], + "suggested_improvements": "Add a cache.", +} + + +def _fenced(payload: dict, lang: str = "json") -> str: + return f"Here is the result:\n\n```{lang}\n{json.dumps(payload)}\n```\n" + + +# --- extract_json_block ---------------------------------------------------- + + +def test_extract_json_block_basic(): + text = 'preamble\n```json\n{"a": 1}\n```\ntrailer' + assert extract_json_block(text) == '{"a": 1}' + + +def test_extract_json_block_prefers_last_json_block(): + text = '```json\n{"first": true}\n```\n```json\n{"second": true}\n```' + assert extract_json_block(text) == '{"second": true}' + + +def test_extract_json_block_falls_back_to_untagged_block(): + text = 'no json tag here\n```\n{"untagged": 1}\n```' + assert extract_json_block(text) == '{"untagged": 1}' + + +def test_extract_json_block_prefers_json_over_untagged(): + text = '```\n{"untagged": 1}\n```\n```json\n{"tagged": 2}\n```' + assert extract_json_block(text) == '{"tagged": 2}' + + +def test_extract_json_block_missing_raises(): + with pytest.raises(AgentResultError, match="no fenced code block"): + extract_json_block("just some prose, no block at all") + + +# --- parse_agent_result ---------------------------------------------------- + + +def test_parse_agent_result_valid(): + result = parse_agent_result(_fenced(_VALID_RESULT)) + assert isinstance(result, AgentResult) + assert result.bug == "123" + assert result.status is TriageStatus.triaged + assert result.proposed_fix.kind is ProposedFixKind.reference + assert result.suggested_improvements == "Add a cache." + + +def test_parse_agent_result_no_change_status(): + payload = {**_VALID_RESULT, "status": "no-change"} + assert parse_agent_result(_fenced(payload)).status is TriageStatus.no_change + + +def test_parse_agent_result_ignores_extra_fields(): + payload = {**_VALID_RESULT, "unexpected": "ignored"} + # Extra keys are tolerated; known fields still validated. + assert parse_agent_result(_fenced(payload)).bug == "123" + + +def test_parse_agent_result_invalid_status(): + payload = {**_VALID_RESULT, "status": "Bogus"} + with pytest.raises(AgentResultError, match="validation"): + parse_agent_result(_fenced(payload)) + + +def test_parse_agent_result_invalid_fix_kind(): + payload = {**_VALID_RESULT, "proposed_fix": {"kind": "magic", "value": ""}} + with pytest.raises(AgentResultError, match="validation"): + parse_agent_result(_fenced(payload)) + + +def test_parse_agent_result_missing_required_field(): + payload = {k: v for k, v in _VALID_RESULT.items() if k != "status"} + with pytest.raises(AgentResultError, match="validation"): + parse_agent_result(_fenced(payload)) + + +def test_parse_agent_result_garbled_json(): + text = "```json\n{not valid json,,,}\n```" + with pytest.raises(AgentResultError, match="not valid JSON"): + parse_agent_result(text) + + +def test_parse_agent_result_non_object(): + text = "```json\n[1, 2, 3]\n```" + with pytest.raises(AgentResultError, match="must be an object"): + parse_agent_result(text) + + +# --- system prompt --------------------------------------------------------- + + +def test_load_system_prompt_ships_as_resource(): + prompt = load_system_prompt() + assert "Role" in prompt + assert "proposed_fix" in prompt + + +# --- triage_bugs loop ------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_triage_bugs_success(): + provider = FakeProvider([_fenced(_VALID_RESULT)]) + outcomes = await triage_bugs(provider, [{"number": "123"}], system_prompt="SYS") + + assert len(outcomes) == 1 + outcome = outcomes[0] + assert outcome.ok + assert outcome.bug == "123" + assert outcome.result.status is TriageStatus.triaged + assert outcome.error is None + # The payload is forwarded as a JSON user message under the given system prompt. + assert provider.calls == [("SYS", json.dumps({"number": "123"}, ensure_ascii=False))] + + +@pytest.mark.asyncio +async def test_triage_bugs_skips_and_continues_on_failure(): + # First bug returns garbage, second returns a valid result. + provider = FakeProvider(["no json here", _fenced({**_VALID_RESULT, "bug": "456"})]) + payloads = [{"number": "123"}, {"number": "456"}] + + outcomes = await triage_bugs(provider, payloads, system_prompt="SYS") + + assert len(outcomes) == 2 + assert not outcomes[0].ok + assert outcomes[0].bug == "123" + assert "no fenced code block" in outcomes[0].error + assert outcomes[0].raw == "no json here" + + assert outcomes[1].ok + assert outcomes[1].result.bug == "456" + + +@pytest.mark.asyncio +async def test_triage_bugs_records_provider_exception(): + class _BoomProvider(FakeProvider): + async def run(self, system_prompt: str, user_message: str) -> str: + raise RuntimeError("network down") + + outcomes = await triage_bugs(_BoomProvider(), [{"number": "789"}], system_prompt="S") + + assert len(outcomes) == 1 + assert not outcomes[0].ok + assert outcomes[0].bug == "789" + assert "provider error" in outcomes[0].error + assert "network down" in outcomes[0].error From 14996daced10203837f28e71478539cc940270f5 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Mon, 15 Jun 2026 23:20:39 -0300 Subject: [PATCH 05/11] Render triage results into the autotriage report --- startriage/ai/__init__.py | 4 + startriage/ai/render.py | 140 +++++++++++++++++++++++++++ tests/test_ai_render.py | 194 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 338 insertions(+) create mode 100644 startriage/ai/render.py create mode 100644 tests/test_ai_render.py diff --git a/startriage/ai/__init__.py b/startriage/ai/__init__.py index 7324732..af53e81 100644 --- a/startriage/ai/__init__.py +++ b/startriage/ai/__init__.py @@ -18,6 +18,7 @@ build_provider, build_session_kwargs, ) +from .render import render_report, report_filename, write_report __all__ = [ "AgentResult", @@ -33,6 +34,9 @@ "extract_json_block", "load_system_prompt", "parse_agent_result", + "render_report", + "report_filename", "triage_bug", "triage_bugs", + "write_report", ] diff --git a/startriage/ai/render.py b/startriage/ai/render.py new file mode 100644 index 0000000..f38aad5 --- /dev/null +++ b/startriage/ai/render.py @@ -0,0 +1,140 @@ +"""Render triage results into the ``autotriage-YYYY-MM-DD.md`` report. + +This is the tool side of the agent→tool contract: the agent only returns JSON, and +this module turns a batch of :class:`~startriage.ai.agent.BugOutcome` into markdown. +Proposed fixes are only *rendered* (a ``diff`` is shown in a fenced block, never +applied to any source tree), and per-bug failures are recorded so a skipped bug is +still visible in the report. +""" + +from __future__ import annotations + +import os +from datetime import date +from pathlib import Path + +from ..enums import ProposedFixKind +from .agent import BugOutcome +from .contract import AgentResult, ProposedFix + + +def report_filename(day: date | None = None) -> str: + """Return the report file name for ``day`` (defaults to today).""" + return f"autotriage-{(day or date.today()).isoformat()}.md" + + +def _render_proposed_fix(fix: ProposedFix) -> str: + value = fix.value.strip() + if fix.kind is ProposedFixKind.none or not value: + return "_No fix proposed._" + if fix.kind is ProposedFixKind.reference: + return value + # kind == diff: render only; the tool never applies it to a source tree. + return f"```diff\n{value}\n```" + + +def _render_bug(result: AgentResult) -> str: + package = result.package or "unknown" + title = result.short_title or "(no title)" + tags = ", ".join(result.tags) if result.tags else "_none_" + + lines = [ + f"## LP #{result.bug} — {package} — {title}", + "", + f"**Suggested status:** {result.status.value}", + f"**Suggested tags:** {tags}", + "", + "### Analysis", + "", + result.analysis.strip() or "_No analysis provided._", + "", + "### Thought Process", + "", + result.thought_process.strip() or "_No thought process provided._", + "", + "### Proposed Fix", + "", + _render_proposed_fix(result.proposed_fix), + ] + if result.references: + lines += ["", "### References", ""] + lines += [f"- {ref}" for ref in result.references] + return "\n".join(lines) + + +def _render_failure(outcome: BugOutcome) -> str: + bug = outcome.bug or "(unknown)" + return "\n".join( + [ + f"## LP #{bug} — triage failed", + "", + f"**Error:** {outcome.error}", + ] + ) + + +def _render_suggested_improvements(results: list[AgentResult]) -> str | None: + """Aggregate non-empty, de-duplicated improvement notes across results.""" + seen: set[str] = set() + blocks: list[str] = [] + for result in results: + note = result.suggested_improvements.strip() + if note and note not in seen: + seen.add(note) + blocks.append(note) + if not blocks: + return None + return "\n\n".join(blocks) + + +def render_report(outcomes: list[BugOutcome], day: date | None = None) -> str: + """Render a full markdown report for ``outcomes``. + + Successful results render as per-bug sections; failures are recorded inline. + A trailing ``## Suggested Improvements`` section aggregates the agent's + self-improvement notes when any were returned. + """ + report_day = day or date.today() + sections = [f"# Automated triage — {report_day.isoformat()}"] + + results = [o.result for o in outcomes if o.result is not None] + + for outcome in outcomes: + if outcome.result is not None: + sections.append(_render_bug(outcome.result)) + else: + sections.append(_render_failure(outcome)) + + improvements = _render_suggested_improvements(results) + if improvements: + sections.append(f"## Suggested Improvements\n\n{improvements}") + + return "\n\n".join(sections) + "\n" + + +def write_report( + content: str, + day: date | None = None, + preferred_dir: Path | None = None, +) -> Path: + """Write ``content`` to the report file, falling back to ``SNAP_USER_DATA``. + + Writes into ``preferred_dir`` (default: cwd). If that is not writable (e.g. a + strict-snap read-only cwd), fall back to ``$SNAP_USER_DATA`` when set, otherwise + re-raise the original error. + """ + name = report_filename(day) + target_dir = preferred_dir or Path.cwd() + target = target_dir / name + try: + target.write_text(content, encoding="utf-8") + return target + except OSError: + snap_data = os.environ.get("SNAP_USER_DATA") + if not snap_data: + raise + fallback_dir = Path(snap_data) + fallback_dir.mkdir(parents=True, exist_ok=True) + fallback = fallback_dir / name + fallback.write_text(content, encoding="utf-8") + return fallback diff --git a/tests/test_ai_render.py b/tests/test_ai_render.py new file mode 100644 index 0000000..5ef06cf --- /dev/null +++ b/tests/test_ai_render.py @@ -0,0 +1,194 @@ +"""Tests for the triage report renderer (golden render + write fallback).""" + +from __future__ import annotations + +from datetime import date +from pathlib import Path + +from startriage.ai import ( + AgentResult, + BugOutcome, + ProposedFix, + render_report, + report_filename, + write_report, +) +from startriage.ai.render import _render_proposed_fix +from startriage.enums import ProposedFixKind, TriageStatus + +_DAY = date(2026, 6, 15) + + +def _result(**overrides) -> AgentResult: + base = { + "bug": "123", + "package": "pkg", + "short_title": "boom on start", + "status": TriageStatus.triaged, + "tags": ["server-todo", "bitesize"], + "analysis": "It crashes immediately.", + "thought_process": "Read the logs, searched LP.", + "proposed_fix": ProposedFix(kind=ProposedFixKind.reference, value="https://example.test/commit"), + "references": ["https://bugs.launchpad.net/ubuntu/+bug/123"], + "suggested_improvements": "Add a version cache.", + } + base.update(overrides) + return AgentResult(**base) + + +def _outcome(result: AgentResult) -> BugOutcome: + return BugOutcome(bug=result.bug, result=result, error=None, raw="{}") + + +# --- report_filename ------------------------------------------------------- + + +def test_report_filename(): + assert report_filename(_DAY) == "autotriage-2026-06-15.md" + + +# --- proposed fix rendering ------------------------------------------------ + + +def test_render_proposed_fix_none(): + fix = ProposedFix(kind=ProposedFixKind.none, value="") + assert _render_proposed_fix(fix) == "_No fix proposed._" + + +def test_render_proposed_fix_reference(): + fix = ProposedFix(kind=ProposedFixKind.reference, value="https://x.test/c ") + assert _render_proposed_fix(fix) == "https://x.test/c" + + +def test_render_proposed_fix_diff_is_fenced_not_applied(): + diff = "--- a/f\n+++ b/f\n@@ -1 +1 @@\n-old\n+new" + rendered = _render_proposed_fix(ProposedFix(kind=ProposedFixKind.diff, value=diff)) + assert rendered == f"```diff\n{diff}\n```" + + +def test_render_proposed_fix_empty_diff_falls_back(): + # A diff kind with no value should not emit an empty code block. + fix = ProposedFix(kind=ProposedFixKind.diff, value=" ") + assert _render_proposed_fix(fix) == "_No fix proposed._" + + +# --- full report ----------------------------------------------------------- + + +def test_render_report_golden(): + outcomes = [_outcome(_result())] + expected = ( + "# Automated triage — 2026-06-15\n" + "\n" + "## LP #123 — pkg — boom on start\n" + "\n" + "**Suggested status:** Triaged\n" + "**Suggested tags:** server-todo, bitesize\n" + "\n" + "### Analysis\n" + "\n" + "It crashes immediately.\n" + "\n" + "### Thought Process\n" + "\n" + "Read the logs, searched LP.\n" + "\n" + "### Proposed Fix\n" + "\n" + "https://example.test/commit\n" + "\n" + "### References\n" + "\n" + "- https://bugs.launchpad.net/ubuntu/+bug/123\n" + "\n" + "## Suggested Improvements\n" + "\n" + "Add a version cache.\n" + ) + assert render_report(outcomes, day=_DAY) == expected + + +def test_render_report_no_tags_and_no_references(): + result = _result(tags=[], references=[], suggested_improvements="") + report = render_report([_outcome(result)], day=_DAY) + assert "**Suggested tags:** _none_" in report + assert "### References" not in report + assert "## Suggested Improvements" not in report + + +def test_render_report_no_change_status(): + result = _result(status=TriageStatus.no_change) + report = render_report([_outcome(result)], day=_DAY) + assert "**Suggested status:** no-change" in report + + +def test_render_report_records_failures(): + ok = _outcome(_result(bug="123")) + failed = BugOutcome(bug="456", result=None, error="agent output invalid", raw="junk") + report = render_report([ok, failed], day=_DAY) + + assert "## LP #123 — pkg — boom on start" in report + assert "## LP #456 — triage failed" in report + assert "**Error:** agent output invalid" in report + + +def test_render_report_deduplicates_improvements(): + a = _outcome(_result(bug="1", suggested_improvements="Same note.")) + b = _outcome(_result(bug="2", suggested_improvements="Same note.")) + c = _outcome(_result(bug="3", suggested_improvements="Different note.")) + report = render_report([a, b, c], day=_DAY) + + # "Same note." appears once in the improvements section. + improvements = report.split("## Suggested Improvements", 1)[1] + assert improvements.count("Same note.") == 1 + assert "Different note." in improvements + + +# --- write_report ---------------------------------------------------------- + + +def test_write_report_to_preferred_dir(tmp_path): + path = write_report("content", day=_DAY, preferred_dir=tmp_path) + assert path == tmp_path / "autotriage-2026-06-15.md" + assert path.read_text() == "content" + + +def test_write_report_falls_back_to_snap_user_data(tmp_path, monkeypatch): + readonly = tmp_path / "readonly" + readonly.mkdir() + readonly.chmod(0o500) + snap = tmp_path / "snap" + monkeypatch.setenv("SNAP_USER_DATA", str(snap)) + + try: + path = write_report("body", day=_DAY, preferred_dir=readonly) + finally: + readonly.chmod(0o700) + + assert path == snap / "autotriage-2026-06-15.md" + assert path.read_text() == "body" + + +def test_write_report_reraises_without_snap(tmp_path, monkeypatch): + readonly = tmp_path / "readonly" + readonly.mkdir() + readonly.chmod(0o500) + monkeypatch.delenv("SNAP_USER_DATA", raising=False) + + try: + with_error = None + try: + write_report("body", day=_DAY, preferred_dir=readonly) + except OSError as exc: + with_error = exc + finally: + readonly.chmod(0o700) + + assert with_error is not None + + +def test_write_report_defaults_to_cwd(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + path = write_report("x", day=_DAY) + assert path == Path.cwd() / "autotriage-2026-06-15.md" + assert path.read_text() == "x" From 940728a83ea0a060c5cccc45698d9e1681a24326 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Mon, 15 Jun 2026 23:32:49 -0300 Subject: [PATCH 06/11] Add CLI --ai option and ai-triage command --- startriage/ai/__init__.py | 10 +++ startriage/ai/run.py | 117 ++++++++++++++++++++++++++++++++ startriage/cli.py | 92 ++++++++++++++++++++++++- startriage/triage.py | 10 ++- tests/test_ai_run.py | 138 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 362 insertions(+), 5 deletions(-) create mode 100644 startriage/ai/run.py create mode 100644 tests/test_ai_run.py diff --git a/startriage/ai/__init__.py b/startriage/ai/__init__.py index af53e81..c86c3a6 100644 --- a/startriage/ai/__init__.py +++ b/startriage/ai/__init__.py @@ -19,6 +19,12 @@ build_session_kwargs, ) from .render import render_report, report_filename, write_report +from .run import ( + gather_user_bug_payloads, + parse_bug_number, + payloads_from_tasks, + run_agent_on_payloads, +) __all__ = [ "AgentResult", @@ -32,10 +38,14 @@ "build_provider", "build_session_kwargs", "extract_json_block", + "gather_user_bug_payloads", "load_system_prompt", "parse_agent_result", + "parse_bug_number", + "payloads_from_tasks", "render_report", "report_filename", + "run_agent_on_payloads", "triage_bug", "triage_bugs", "write_report", diff --git a/startriage/ai/run.py b/startriage/ai/run.py new file mode 100644 index 0000000..d0091e1 --- /dev/null +++ b/startriage/ai/run.py @@ -0,0 +1,117 @@ +"""End-to-end orchestration that wires the AI layer to the CLI. + +Two entry points feed bugs to the agent and write a dated report: + +- :func:`gather_user_bug_payloads` resolves user-supplied bug specs (URL, + ``NNNNNN`` or ``#NNNNNN``) into agent payloads (``ai-triage``). +- :func:`payloads_from_tasks` turns already-fetched triage tasks into payloads + (``triage --ai``). + +Both hand their payloads to :func:`run_agent_on_payloads`, which runs the agent +sequentially and writes ``autotriage-YYYY-MM-DD.md``. Launchpad access is lazily +imported inside the gather helpers so non-AI commands and offline tests never +pull in launchpadlib. +""" + +from __future__ import annotations + +import logging +import re +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from ..config import StarTriageConfig +from .agent import load_system_prompt, triage_bugs +from .provider import Provider, build_provider +from .render import render_report, write_report + +if TYPE_CHECKING: + from ..sources.launchpad.models import Task + +logger = logging.getLogger(__name__) + +_BUG_DIGITS = re.compile(r"\d+") + + +def parse_bug_number(spec: str) -> str: + """Extract a Launchpad bug number from a URL, ``#NNNNNN`` or bare ``NNNNNN``. + + The last run of digits wins, so package names containing digits in a full + ``.../+source//+bug/`` URL do not confuse the parse. + """ + matches = _BUG_DIGITS.findall(spec) + if not matches: + raise ValueError(f"could not parse a Launchpad bug number from {spec!r}") + return matches[-1] + + +def gather_user_bug_payloads(bug_specs: list[str]) -> list[dict[str, Any]]: + """Resolve user-supplied bug specs into agent payloads (blocking LP access).""" + from ..sources.launchpad.finder import connect_launchpad + from ..sources.launchpad.models import Task + + lp = connect_launchpad() + payloads: list[dict[str, Any]] = [] + seen: set[str] = set() + for spec in bug_specs: + number = parse_bug_number(spec) + if number in seen: + continue + seen.add(number) + try: + bug_tasks = list(lp.bugs[number].bug_tasks) + if not bug_tasks: + logger.warning("Skipping bug %s: no bug tasks found", number) + continue + task = Task(bug_tasks[0], subscribed=False, last_activity_ours=False) + payloads.append(task.to_agent_payload()) + except Exception: + logger.warning("Skipping bug %s: failed to fetch", number, exc_info=True) + return payloads + + +def payloads_from_tasks(tasks: list[Task]) -> list[dict[str, Any]]: + """Build agent payloads from already-fetched tasks (blocking LP access). + + Tasks are de-duplicated by bug number so a bug with multiple affected + targets is triaged once. + """ + payloads: list[dict[str, Any]] = [] + seen: set[str] = set() + for task in tasks: + if task.number in seen: + continue + seen.add(task.number) + try: + payloads.append(task.to_agent_payload()) + except Exception: + logger.warning("Skipping bug %s: failed to build payload", task.number, exc_info=True) + return payloads + + +async def run_agent_on_payloads( + config: StarTriageConfig, + payloads: list[dict[str, Any]], + *, + provider: Provider | None = None, + preferred_dir: Path | None = None, +) -> Path | None: + """Run the agent over ``payloads`` and write the dated report. + + Returns the report path, or ``None`` when there is nothing to triage. When + ``provider`` is omitted it is built from ``config`` (validating credentials, + which may raise :class:`~startriage.config.AIConfigError`). + """ + if not payloads: + logger.info("No bugs to triage with the AI agent.") + return None + + if provider is None: + provider = build_provider(config.ai) + + system_prompt = load_system_prompt() + outcomes = await triage_bugs(provider, payloads, system_prompt) + report = render_report(outcomes) + path = write_report(report, preferred_dir=preferred_dir) + logger.info("AI triage report written to %s", path) + return path diff --git a/startriage/cli.py b/startriage/cli.py index a5a9923..2485127 100644 --- a/startriage/cli.py +++ b/startriage/cli.py @@ -5,14 +5,22 @@ import argparse import asyncio import sys +from collections.abc import Sequence from datetime import datetime, timedelta, timezone from pathlib import Path -from .config import DEFAULT_USER_CONFIG, StarTriageConfig, load_config, resolve_team_name, update_user_config +from .config import ( + DEFAULT_USER_CONFIG, + AIConfigError, + StarTriageConfig, + load_config, + resolve_team_name, + update_user_config, +) from .dates import parse_interval, triage_task_date_range from .enums import AIProvider, UpdateFilter from .log import log_setup -from .output import OutputConfig, OutputFormat +from .output import OutputConfig, OutputFormat, TriageResult from .savebugs import BugPersistor, SaveConfig from .source import TaskFilterOptions from .sources.github.auth import _run_github_login @@ -167,6 +175,11 @@ def _build_parser() -> argparse.ArgumentParser: metavar="DAYS", help="Minimum days of being stuck in proposed to be included in triage", ) + triage_p.add_argument( + "--ai", + action="store_true", + help="Also run AI triage on every bug found, writing autotriage-YYYY-MM-DD.md", + ) triage_p.set_defaults(func=_run_triage) # --- todo --- @@ -183,6 +196,19 @@ def _build_parser() -> argparse.ArgumentParser: todo_p.add_argument("-C", "--compare", metavar="PATH", help="Set path to saved file to compare bugs to") todo_p.set_defaults(func=_run_todo) + # --- ai-triage --- + ai_triage_p = sp.add_parser( + "ai-triage", + help="AI-triage one or more Launchpad bugs", + ) + ai_triage_p.add_argument( + "bug", + nargs="+", + metavar="BUG", + help="Launchpad bug to triage: full URL, NNNNNN, or #NNNNNN", + ) + ai_triage_p.set_defaults(func=_run_ai_triage) + # --- config --- config_p = sp.add_parser("config", help="Manage configuration") config_sp = config_p.add_subparsers(required=True) @@ -325,6 +351,14 @@ async def _run() -> None: async def _run_triage(args: argparse.Namespace, config: StarTriageConfig) -> None: + provider = None + if args.ai: + # Validate AI credentials up-front so a misconfig fails before the (slow) + # normal triage run rather than after it. + provider = _build_ai_provider(config) + if provider is None: + return + filter = _filter_from_args(config, args) team = config.get_team(filter.team) if args.no_ignore_list: @@ -342,7 +376,10 @@ async def _run_triage(args: argparse.Namespace, config: StarTriageConfig) -> Non config.general = general output_cfg = _outputcfg_from_args(args) - await run_triage(config, filter, output_cfg) + results = await run_triage(config, filter, output_cfg) + + if args.ai: + await _ai_triage_results(config, results, provider) async def _run_todo(args: argparse.Namespace, config: StarTriageConfig) -> None: @@ -368,6 +405,55 @@ async def _run_todo(args: argparse.Namespace, config: StarTriageConfig) -> None: ) +def _build_ai_provider(config: StarTriageConfig): + """Build the AI provider, printing a friendly hint and returning None on misconfig.""" + from .ai import build_provider + + try: + return build_provider(config.ai) + except AIConfigError as exc: + print(f"error: {exc}", file=sys.stderr) + return None + + +async def _ai_triage_results( + config: StarTriageConfig, + results: Sequence[tuple[str, TriageResult]], + provider, +) -> None: + """Run the AI agent over the Launchpad tasks gathered by a normal triage run.""" + from .ai import payloads_from_tasks, run_agent_on_payloads + from .sources.launchpad.triage import LaunchpadTriage + + tasks: list = [] + for _, result in results: + if isinstance(result, LaunchpadTriage): + tasks = list(result.tasks.tasks) + break + + payloads = await asyncio.to_thread(payloads_from_tasks, tasks) + path = await run_agent_on_payloads(config, payloads, provider=provider) + if path is not None: + print(f"AI triage report written to {path}") + + +async def _run_ai_triage(args: argparse.Namespace, config: StarTriageConfig) -> None: + from .ai import gather_user_bug_payloads, run_agent_on_payloads + + provider = _build_ai_provider(config) + if provider is None: + return + + payloads = await asyncio.to_thread(gather_user_bug_payloads, args.bug) + if not payloads: + print("No valid bugs to triage.", file=sys.stderr) + return + + path = await run_agent_on_payloads(config, payloads, provider=provider) + if path is not None: + print(f"AI triage report written to {path}") + + async def _set_config_settings(args: argparse.Namespace, _config: StarTriageConfig) -> None: updates: dict[str, dict] = {} diff --git a/startriage/triage.py b/startriage/triage.py index 74b88d3..c489fec 100644 --- a/startriage/triage.py +++ b/startriage/triage.py @@ -48,8 +48,12 @@ async def run_triage( config: StarTriageConfig, opts: TaskFilterOptions, output_cfg: OutputConfig, -) -> None: - """Daily triage: fetch all sources concurrently, print sections in order as they complete.""" +) -> list[tuple[str, TriageResult]]: + """Daily triage: fetch all sources concurrently, print sections in order as they complete. + + Returns the ``(source_name, result)`` pairs that were fetched successfully so + callers (e.g. ``triage --ai``) can reuse them without re-fetching. + """ range = triage_task_note = "" @@ -121,6 +125,8 @@ async def run_triage( logging.info("Markdown written to %s", output_cfg.markdown_path) + return results + async def run_todo( config: StarTriageConfig, diff --git a/tests/test_ai_run.py b/tests/test_ai_run.py new file mode 100644 index 0000000..09aafb3 --- /dev/null +++ b/tests/test_ai_run.py @@ -0,0 +1,138 @@ +"""Tests for AI orchestration (run.py) and CLI wiring — all offline.""" + +from __future__ import annotations + +from datetime import date + +import pytest + +from startriage.ai import ( + FakeProvider, + parse_bug_number, + payloads_from_tasks, + run_agent_on_payloads, +) +from startriage.cli import _build_parser +from startriage.config import StarTriageConfig + +_CANNED = """Here is my analysis. + +```json +{ + "bug": "123", + "package": "pkg", + "short_title": "boom on start", + "status": "Triaged", + "tags": ["server-todo"], + "analysis": "It broke.", + "thought_process": "Looked at logs.", + "proposed_fix": {"kind": "none", "value": ""}, + "references": [], + "suggested_improvements": "" +} +``` +""" + + +# --- parse_bug_number ------------------------------------------------------ + + +@pytest.mark.parametrize( + ("spec", "expected"), + [ + ("123456", "123456"), + ("#123456", "123456"), + ("https://bugs.launchpad.net/ubuntu/+bug/123456", "123456"), + ("https://bugs.launchpad.net/ubuntu/+source/python3.12/+bug/987", "987"), + (" #42 ", "42"), + ], +) +def test_parse_bug_number(spec, expected): + assert parse_bug_number(spec) == expected + + +def test_parse_bug_number_invalid(): + with pytest.raises(ValueError): + parse_bug_number("not-a-bug") + + +# --- payloads_from_tasks --------------------------------------------------- + + +class _FakeTask: + def __init__(self, number: str, payload=None, *, raises: bool = False): + self.number = number + self._payload = payload if payload is not None else {"number": number} + self._raises = raises + + def to_agent_payload(self): + if self._raises: + raise RuntimeError("boom") + return self._payload + + +def test_payloads_from_tasks_dedupes_by_number(): + tasks = [_FakeTask("1"), _FakeTask("1"), _FakeTask("2")] + payloads = payloads_from_tasks(tasks) # type: ignore[arg-type] + assert [p["number"] for p in payloads] == ["1", "2"] + + +def test_payloads_from_tasks_skips_failures(): + tasks = [_FakeTask("1"), _FakeTask("2", raises=True), _FakeTask("3")] + payloads = payloads_from_tasks(tasks) # type: ignore[arg-type] + assert [p["number"] for p in payloads] == ["1", "3"] + + +# --- run_agent_on_payloads ------------------------------------------------- + + +@pytest.mark.asyncio +async def test_run_agent_on_payloads_writes_report(tmp_path): + provider = FakeProvider([_CANNED]) + config = StarTriageConfig() + payloads = [{"number": "123", "title": "boom"}] + + path = await run_agent_on_payloads(config, payloads, provider=provider, preferred_dir=tmp_path) + + assert path == tmp_path / f"autotriage-{date.today().isoformat()}.md" + content = path.read_text() + assert "## LP #123 — pkg — boom on start" in content + assert "**Suggested status:** Triaged" in content + # The agent was asked exactly once, with the payload as the user message. + assert len(provider.calls) == 1 + assert '"number": "123"' in provider.calls[0][1] + + +@pytest.mark.asyncio +async def test_run_agent_on_payloads_empty_returns_none(tmp_path): + provider = FakeProvider([]) + path = await run_agent_on_payloads(StarTriageConfig(), [], provider=provider, preferred_dir=tmp_path) + assert path is None + assert provider.calls == [] + + +@pytest.mark.asyncio +async def test_run_agent_on_payloads_records_bad_agent_output(tmp_path): + provider = FakeProvider(["no json here"]) + path = await run_agent_on_payloads( + StarTriageConfig(), + [{"number": "999"}], + provider=provider, + preferred_dir=tmp_path, + ) + assert path is not None + assert "## LP #999 — triage failed" in path.read_text() + + +# --- CLI parser wiring ----------------------------------------------------- + + +def test_parser_ai_triage_accepts_multiple_bugs(): + args = _build_parser().parse_args(["ai-triage", "123", "#456", "https://x/+bug/789"]) + assert args.bug == ["123", "#456", "https://x/+bug/789"] + assert args.func.__name__ == "_run_ai_triage" + + +def test_parser_triage_ai_flag_defaults_false(): + assert _build_parser().parse_args(["triage"]).ai is False + assert _build_parser().parse_args(["triage", "--ai"]).ai is True From ac1c44a99414c5d9dd4da934b78cd1be35a4d47d Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Mon, 15 Jun 2026 23:41:04 -0300 Subject: [PATCH 07/11] Package the Copilot runtime and ubuntu-dev-tools --- README.md | 33 +++++++++++++++++++++++++++++++ pyproject.toml | 5 +++++ snapcraft.yaml | 14 ++++++++++++++ tests/test_packaging.py | 43 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 tests/test_packaging.py diff --git a/README.md b/README.md index e3fb590..08ed930 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,39 @@ startriage todo --subscribed Run `startriage triage --help` for the full option reference, including the bug flags legend. +## AI Triage (experimental) + +Run an AI agent over Launchpad bugs to produce a dated `autotriage-YYYY-MM-DD.md` +report with a suggested status, tags, analysis, and (where applicable) a proposed +fix. The agent never edits bugs and never applies patches — it only writes the +report. + +```bash +# Triage one or more specific bugs (URL, NNNNNN, or #NNNNNN) +startriage ai-triage 2101234 '#2105678' + +# Run the normal daily triage, then AI-triage every bug found +startriage triage --ai +``` + +Configure a provider first (credentials are written to the 0600 config, never +echoed): + +```bash +# Default provider: GitHub Copilot (needs a Copilot-enabled account) +startriage config set --ai-provider copilot --ai-github-token github_pat_... + +# Or bring your own key via an OpenAI-compatible provider (e.g. OpenRouter) +startriage config set --ai-provider openrouter \ + --ai-model anthropic/claude-opus-4.1 \ + --ai-openrouter-key sk-or-... +``` + +The Copilot token may also come from `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / +`GITHUB_TOKEN`, and the OpenRouter key from `OPENROUTER_API_KEY`. The snap bundles +the Copilot runtime and `ubuntu-dev-tools`, so source analysis works inside strict +confinement; from a git checkout install the extra with `uv sync --extra ai`. + ## Configuration adjust [the defaults](startriage/data/defaults.toml) with your user configuration file: diff --git a/pyproject.toml b/pyproject.toml index 4ac126f..2a08dbb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,11 @@ dependencies = [ "platformdirs", ] +[project.optional-dependencies] +# Agentic AI triage. The SDK bundles the Copilot CLI runtime it spawns; kept +# optional so non-AI installs stay lean. The snap ships it (see snapcraft.yaml). +ai = ["github-copilot-sdk"] + [project.scripts] startriage = "startriage.__main__:main" diff --git a/snapcraft.yaml b/snapcraft.yaml index 73cda3a..4dc539c 100644 --- a/snapcraft.yaml +++ b/snapcraft.yaml @@ -29,6 +29,13 @@ grade: stable apps: startriage: command: bin/startriage + environment: + # The Copilot runtime defaults COPILOT_HOME to ~/.copilot, a hidden path the + # `home` plug cannot write to. Point it at SNAP_USER_DATA, always writable. + COPILOT_HOME: $SNAP_USER_DATA/.copilot + # Expose staged ubuntu-dev-tools helpers (pull-lp-source, debdiff, …) on PATH + # so the agent's shell tool can pull and diff package source. + PATH: $SNAP/usr/bin:$SNAP/bin:$PATH plugs: - network - network-bind @@ -39,3 +46,10 @@ parts: plugin: python source: . source-type: git + # github-copilot-sdk bundles the Copilot CLI runtime binary it spawns, so no + # separate Node part is needed; pip ships the runtime inside the snap. + python-packages: + - github-copilot-sdk + stage-packages: + # pull-lp-source / dpkg-source / debdiff for the agent's source analysis. + - ubuntu-dev-tools diff --git a/tests/test_packaging.py b/tests/test_packaging.py new file mode 100644 index 0000000..7f125f8 --- /dev/null +++ b/tests/test_packaging.py @@ -0,0 +1,43 @@ +"""Guard the snap/pyproject plumbing that ships the AI triage feature. + +These are offline structural checks — they do not build the snap — so a future +edit cannot silently drop the Copilot runtime, ubuntu-dev-tools, the writable +COPILOT_HOME, or the required plugs. +""" + +from __future__ import annotations + +import tomllib +from pathlib import Path + +import yaml + +_ROOT = Path(__file__).resolve().parent.parent +_SNAPCRAFT = _ROOT / "snapcraft.yaml" +_PYPROJECT = _ROOT / "pyproject.toml" + + +def _snapcraft() -> dict: + return yaml.safe_load(_SNAPCRAFT.read_text()) + + +def test_app_keeps_network_and_home_plugs(): + plugs = _snapcraft()["apps"]["startriage"]["plugs"] + assert {"network", "network-bind", "home"} <= set(plugs) + + +def test_copilot_home_points_at_writable_dir(): + env = _snapcraft()["apps"]["startriage"]["environment"] + # ~/.copilot is hidden and blocked by the home plug; must be under SNAP_USER_DATA. + assert "SNAP_USER_DATA" in env["COPILOT_HOME"] + + +def test_part_ships_copilot_sdk_and_ubuntu_dev_tools(): + part = _snapcraft()["parts"]["startriage"] + assert "github-copilot-sdk" in part["python-packages"] + assert "ubuntu-dev-tools" in part["stage-packages"] + + +def test_pyproject_exposes_optional_ai_extra(): + data = tomllib.loads(_PYPROJECT.read_text()) + assert data["project"]["optional-dependencies"]["ai"] == ["github-copilot-sdk"] From 3f209406c3b1aa5ea9a9983d532356e0064aa931 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Mon, 15 Jun 2026 23:44:22 -0300 Subject: [PATCH 08/11] Add step logging for AI triage observability --- startriage/ai/agent.py | 33 ++++++++++++++++++++-- startriage/ai/provider.py | 14 ++++++++++ tests/test_ai_logging.py | 58 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 tests/test_ai_logging.py diff --git a/startriage/ai/agent.py b/startriage/ai/agent.py index a9912a2..276da55 100644 --- a/startriage/ai/agent.py +++ b/startriage/ai/agent.py @@ -9,12 +9,15 @@ from __future__ import annotations import json +import logging from dataclasses import dataclass from importlib.resources import files from .contract import AgentResult, AgentResultError, parse_agent_result from .provider import Provider +logger = logging.getLogger(__name__) + @dataclass class BugOutcome: @@ -36,6 +39,23 @@ def load_system_prompt() -> str: return prompt_path.read_text(encoding="utf-8") +def _log_outcome(outcome: BugOutcome) -> None: + """Emit a per-bug step log: the decision at -v, deeper detail at -vv.""" + if outcome.ok and outcome.result is not None: + result = outcome.result + logger.info( + "Bug %s → status=%s, tags=%s", + outcome.bug, + result.status.value, + ", ".join(result.tags) or "(none)", + ) + logger.debug("Bug %s proposed fix: %s", outcome.bug, result.proposed_fix.kind.value) + if result.thought_process: + logger.debug("Bug %s thought process: %s", outcome.bug, result.thought_process) + else: + logger.warning("Bug %s failed: %s", outcome.bug, outcome.error) + + async def triage_bug( provider: Provider, payload: dict, @@ -48,11 +68,13 @@ async def triage_bug( """ bug = str(payload.get("number", "")) user_message = json.dumps(payload, ensure_ascii=False) + logger.debug("Bug %s: sending %d-char payload to the agent", bug, len(user_message)) try: raw = await provider.run(system_prompt, user_message) except Exception as exc: # Record any provider/runtime failure and keep going (skip-and-continue). return BugOutcome(bug=bug, result=None, error=f"provider error: {exc}", raw="") + logger.debug("Bug %s: received %d-char agent response", bug, len(raw)) try: result = parse_agent_result(raw) except AgentResultError as exc: @@ -67,7 +89,14 @@ async def triage_bugs( ) -> list[BugOutcome]: """Triage ``payloads`` sequentially, recording per-bug failures and continuing.""" prompt = system_prompt if system_prompt is not None else load_system_prompt() + total = len(payloads) outcomes: list[BugOutcome] = [] - for payload in payloads: - outcomes.append(await triage_bug(provider, payload, prompt)) + for index, payload in enumerate(payloads, start=1): + bug = str(payload.get("number", "")) + logger.info("Triaging bug %s (%d/%d)…", bug, index, total) + outcome = await triage_bug(provider, payload, prompt) + _log_outcome(outcome) + outcomes.append(outcome) + succeeded = sum(o.ok for o in outcomes) + logger.info("AI triage complete: %d succeeded, %d failed", succeeded, total - succeeded) return outcomes diff --git a/startriage/ai/provider.py b/startriage/ai/provider.py index e79c695..426b01e 100644 --- a/startriage/ai/provider.py +++ b/startriage/ai/provider.py @@ -14,12 +14,15 @@ from __future__ import annotations +import logging from abc import ABC, abstractmethod from typing import Any from ..config import AIConfig from ..enums import AIProvider +logger = logging.getLogger(__name__) + class Provider(ABC): """A backend capable of running one agent session and returning its final text.""" @@ -66,6 +69,12 @@ def build_session_kwargs(ai_config: AIConfig) -> dict[str, Any]: return {} +def _log_session_event(event: Any) -> None: + """Log a Copilot session step event at DEBUG (subscribed only under -vv).""" + event_type = getattr(event, "type", None) or type(event).__name__ + logger.debug("Copilot session event: %s", event_type) + + class CopilotProvider(Provider): """Real provider backed by the Copilot Python SDK (lazily imported). @@ -86,6 +95,7 @@ async def run(self, system_prompt: str, user_message: str) -> str: from copilot import CopilotClient # ty: ignore[unresolved-import] from copilot.session import PermissionHandler # ty: ignore[unresolved-import] + logger.debug("Starting Copilot session (model=%s)", self.model) async with CopilotClient(**build_client_kwargs(self._ai_config)) as client: async with await client.create_session( on_permission_request=PermissionHandler.approve_all, @@ -95,6 +105,10 @@ async def run(self, system_prompt: str, user_message: str) -> str: system_message={"mode": "append", "content": system_prompt}, **build_session_kwargs(self._ai_config), ) as session: + # At -vv, stream the agent's step events (tool calls, reasoning) + # so unattended runs are auditable; cheap no-op otherwise. + if logger.isEnabledFor(logging.DEBUG): + session.on(_log_session_event) message = await session.send_and_wait(user_message) return (message.data.content or "") if message else "" diff --git a/tests/test_ai_logging.py b/tests/test_ai_logging.py new file mode 100644 index 0000000..ce1a5ee --- /dev/null +++ b/tests/test_ai_logging.py @@ -0,0 +1,58 @@ +"""Tests for AI step logging (observability under -v / -vv).""" + +from __future__ import annotations + +import logging + +import pytest + +from startriage.ai import FakeProvider, triage_bugs + +_OK = """```json +{ + "bug": "123", + "package": "pkg", + "short_title": "boom", + "status": "Triaged", + "tags": ["server-todo"], + "analysis": "It broke.", + "thought_process": "Checked the logs.", + "proposed_fix": {"kind": "none", "value": ""}, + "references": [], + "suggested_improvements": "" +} +```""" + + +@pytest.mark.asyncio +async def test_triage_bugs_logs_progress_and_decision(caplog): + provider = FakeProvider([_OK]) + with caplog.at_level(logging.INFO, logger="startriage.ai.agent"): + await triage_bugs(provider, [{"number": "123"}], system_prompt="sys") + + messages = [r.getMessage() for r in caplog.records] + assert any("Triaging bug 123 (1/1)" in m for m in messages) + assert any("Bug 123 → status=Triaged" in m for m in messages) + assert any("1 succeeded, 0 failed" in m for m in messages) + + +@pytest.mark.asyncio +async def test_triage_bugs_logs_failure_as_warning(caplog): + provider = FakeProvider(["no json here"]) + with caplog.at_level(logging.INFO, logger="startriage.ai.agent"): + await triage_bugs(provider, [{"number": "999"}], system_prompt="sys") + + warnings = [r.getMessage() for r in caplog.records if r.levelno == logging.WARNING] + assert any("Bug 999 failed" in m for m in warnings) + summaries = [r.getMessage() for r in caplog.records if "complete" in r.getMessage()] + assert any("0 succeeded, 1 failed" in m for m in summaries) + + +@pytest.mark.asyncio +async def test_triage_bugs_debug_logs_thought_process(caplog): + provider = FakeProvider([_OK]) + with caplog.at_level(logging.DEBUG, logger="startriage.ai.agent"): + await triage_bugs(provider, [{"number": "123"}], system_prompt="sys") + + debug = [r.getMessage() for r in caplog.records if r.levelno == logging.DEBUG] + assert any("thought process: Checked the logs." in m for m in debug) From 4f1acf2b33afdff676c3baeb68d8f565f65949b6 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Tue, 16 Jun 2026 00:23:05 -0300 Subject: [PATCH 09/11] Remove timeouts and add a spinner to follow AI triage execution --- startriage/ai/agent.py | 11 ++++++++++- startriage/ai/provider.py | 5 ++++- startriage/ai/run.py | 29 ++++++++++++++++++++++++++++- startriage/spinner.py | 17 ++++++++++++++++- tests/test_ai_logging.py | 15 +++++++++++++++ tests/test_spinner.py | 35 +++++++++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 tests/test_spinner.py diff --git a/startriage/ai/agent.py b/startriage/ai/agent.py index 276da55..65cc68c 100644 --- a/startriage/ai/agent.py +++ b/startriage/ai/agent.py @@ -10,6 +10,7 @@ import json import logging +from collections.abc import Callable from dataclasses import dataclass from importlib.resources import files @@ -86,13 +87,21 @@ async def triage_bugs( provider: Provider, payloads: list[dict], system_prompt: str | None = None, + *, + on_progress: Callable[[int, int, str], None] | None = None, ) -> list[BugOutcome]: - """Triage ``payloads`` sequentially, recording per-bug failures and continuing.""" + """Triage ``payloads`` sequentially, recording per-bug failures and continuing. + + ``on_progress`` (when given) is called as ``(index, total, bug)`` just before + each bug is sent to the agent, so a caller can drive a spinner/progress line. + """ prompt = system_prompt if system_prompt is not None else load_system_prompt() total = len(payloads) outcomes: list[BugOutcome] = [] for index, payload in enumerate(payloads, start=1): bug = str(payload.get("number", "")) + if on_progress is not None: + on_progress(index, total, bug) logger.info("Triaging bug %s (%d/%d)…", bug, index, total) outcome = await triage_bug(provider, payload, prompt) _log_outcome(outcome) diff --git a/startriage/ai/provider.py b/startriage/ai/provider.py index 426b01e..b3e8753 100644 --- a/startriage/ai/provider.py +++ b/startriage/ai/provider.py @@ -109,7 +109,10 @@ async def run(self, system_prompt: str, user_message: str) -> str: # so unattended runs are auditable; cheap no-op otherwise. if logger.isEnabledFor(logging.DEBUG): session.on(_log_session_event) - message = await session.send_and_wait(user_message) + # timeout=None waits until the agent is idle rather than aborting + # after the SDK's 60s default; triage turns routinely run longer + # (source pulls, debdiffs). The user can cancel with Ctrl-C. + message = await session.send_and_wait(user_message, timeout=None) return (message.data.content or "") if message else "" diff --git a/startriage/ai/run.py b/startriage/ai/run.py index d0091e1..cc0a812 100644 --- a/startriage/ai/run.py +++ b/startriage/ai/run.py @@ -15,12 +15,15 @@ from __future__ import annotations +import contextlib import logging import re +import sys from pathlib import Path from typing import TYPE_CHECKING, Any from ..config import StarTriageConfig +from ..spinner import Spinner from .agent import load_system_prompt, triage_bugs from .provider import Provider, build_provider from .render import render_report, write_report @@ -89,6 +92,21 @@ def payloads_from_tasks(tasks: list[Task]) -> list[dict[str, Any]]: return payloads +def _make_spinner(total: int) -> Spinner | None: + """Return a status spinner, or ``None`` when one would be unhelpful/noisy. + + Suppressed when stderr is not a TTY (piped/CI) or when INFO logging is on + (``-v``/``-vv``), since the agent loop already logs per-bug progress there + and a redrawing spinner would corrupt the log stream. + """ + if not sys.stderr.isatty(): + return None + if logging.getLogger("startriage").isEnabledFor(logging.INFO): + return None + noun = "bug" if total == 1 else "bugs" + return Spinner(set(), status=f"Preparing to triage {total} {noun}…") + + async def run_agent_on_payloads( config: StarTriageConfig, payloads: list[dict[str, Any]], @@ -110,7 +128,16 @@ async def run_agent_on_payloads( provider = build_provider(config.ai) system_prompt = load_system_prompt() - outcomes = await triage_bugs(provider, payloads, system_prompt) + spinner = _make_spinner(len(payloads)) + + def on_progress(index: int, total: int, bug: str) -> None: + if spinner is not None: + label = f"LP #{bug}" if bug else "bug" + spinner.set_status(f"Triaging {label} ({index}/{total})…") + + async with spinner if spinner is not None else contextlib.nullcontext(): + outcomes = await triage_bugs(provider, payloads, system_prompt, on_progress=on_progress) + report = render_report(outcomes) path = write_report(report, preferred_dir=preferred_dir) logger.info("AI triage report written to %s", path) diff --git a/startriage/spinner.py b/startriage/spinner.py index e1890e8..2bb8f1c 100644 --- a/startriage/spinner.py +++ b/startriage/spinner.py @@ -26,10 +26,12 @@ def __init__( self, pending: set[str], *, + status: str | None = None, out: Callable[[str], None] | None = None, interval: float = 1 / 10, # 10 FPS ) -> None: self._pending = set(pending) + self._status = status if out: self._write = out else: @@ -44,6 +46,14 @@ def done(self, name: str) -> None: """Mark *name* as no longer pending.""" self._pending.discard(name) + def set_status(self, text: str | None) -> None: + """Set a free-form status line (overrides the pending-set display). + + The next animation frame (within ``interval``) picks it up; no redraw is + forced here so callers can update it cheaply from a hot loop. + """ + self._status = text + def suspend(self) -> None: """Pause spinner writes (call before awaiting section output).""" self._draw.clear() @@ -63,7 +73,12 @@ async def _run(self) -> None: frame = self._FRAMES[i % len(self._FRAMES)] sources = sorted(self._pending) - msg = f"Fetching: {', '.join(sources)}…" if sources else "Processing…" + if self._status is not None: + msg = self._status + elif sources: + msg = f"Fetching: {', '.join(sources)}…" + else: + msg = "Processing…" line = f"{frame} {msg}" self._write(f"\r{line}") diff --git a/tests/test_ai_logging.py b/tests/test_ai_logging.py index ce1a5ee..95d5c73 100644 --- a/tests/test_ai_logging.py +++ b/tests/test_ai_logging.py @@ -56,3 +56,18 @@ async def test_triage_bugs_debug_logs_thought_process(caplog): debug = [r.getMessage() for r in caplog.records if r.levelno == logging.DEBUG] assert any("thought process: Checked the logs." in m for m in debug) + + +@pytest.mark.asyncio +async def test_triage_bugs_reports_progress(): + provider = FakeProvider([_OK, _OK]) + seen: list[tuple[int, int, str]] = [] + + await triage_bugs( + provider, + [{"number": "100"}, {"number": "200"}], + system_prompt="sys", + on_progress=lambda index, total, bug: seen.append((index, total, bug)), + ) + + assert seen == [(1, 2, "100"), (2, 2, "200")] diff --git a/tests/test_spinner.py b/tests/test_spinner.py new file mode 100644 index 0000000..a32c3b5 --- /dev/null +++ b/tests/test_spinner.py @@ -0,0 +1,35 @@ +"""Tests for the async terminal spinner.""" + +from __future__ import annotations + +import asyncio + +import pytest + +from startriage.spinner import Spinner + + +@pytest.mark.asyncio +async def test_spinner_shows_status_message(): + frames: list[str] = [] + spinner = Spinner(set(), status="Starting…", out=frames.append, interval=1 / 1000) + async with spinner: + await asyncio.sleep(0.02) + spinner.set_status("Triaging LP #123 (1/2)…") + await asyncio.sleep(0.02) + + rendered = "".join(frames) + assert "Starting…" in rendered + assert "Triaging LP #123 (1/2)…" in rendered + + +@pytest.mark.asyncio +async def test_spinner_status_overrides_pending_set(): + frames: list[str] = [] + spinner = Spinner({"launchpad"}, status="Working…", out=frames.append, interval=1 / 1000) + async with spinner: + await asyncio.sleep(0.02) + + rendered = "".join(frames) + assert "Working…" in rendered + assert "launchpad" not in rendered From 5851c2fb90116c641735d1e275a5fb2524006ba9 Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Tue, 16 Jun 2026 02:00:23 -0300 Subject: [PATCH 10/11] Reformat AI generated output - make it part of the markdown when that is used - save to separate file if not used - print to stdout for ai-triage command --- startriage/ai/__init__.py | 3 ++- startriage/ai/render.py | 21 +++++++++++++++++++++ startriage/ai/run.py | 23 ++++++++++------------- startriage/cli.py | 36 +++++++++++++++++++++++++++++------- tests/test_ai_render.py | 19 ++++++++++++++++++- tests/test_ai_run.py | 28 ++++++++++++---------------- 6 files changed, 92 insertions(+), 38 deletions(-) diff --git a/startriage/ai/__init__.py b/startriage/ai/__init__.py index c86c3a6..e6e1f51 100644 --- a/startriage/ai/__init__.py +++ b/startriage/ai/__init__.py @@ -18,7 +18,7 @@ build_provider, build_session_kwargs, ) -from .render import render_report, report_filename, write_report +from .render import append_report, render_report, report_filename, write_report from .run import ( gather_user_bug_payloads, parse_bug_number, @@ -34,6 +34,7 @@ "FakeProvider", "ProposedFix", "Provider", + "append_report", "build_client_kwargs", "build_provider", "build_session_kwargs", diff --git a/startriage/ai/render.py b/startriage/ai/render.py index f38aad5..1d654cb 100644 --- a/startriage/ai/render.py +++ b/startriage/ai/render.py @@ -17,6 +17,15 @@ from .agent import BugOutcome from .contract import AgentResult, ProposedFix +#: Heading + notice prepended when an AI report is appended to a triage markdown +#: file, to keep the AI-generated content clearly separated from the human report. +AI_APPEND_NOTICE = ( + "---\n\n" + "> **AI-generated triage aid.** The section below was produced by an automated " + "agent. Review it critically — do **not** paste it into the official triage " + "report verbatim.\n\n" +) + def report_filename(day: date | None = None) -> str: """Return the report file name for ``day`` (defaults to today).""" @@ -138,3 +147,15 @@ def write_report( fallback = fallback_dir / name fallback.write_text(content, encoding="utf-8") return fallback + + +def append_report(path: Path, content: str) -> Path: + """Append an AI ``content`` report to an existing markdown file at ``path``. + + A horizontal rule and a notice (:data:`AI_APPEND_NOTICE`) are inserted first so + the AI-generated section is clearly separated from the human-written triage + report and is not mistaken for part of it. + """ + with path.open("a", encoding="utf-8") as fh: + fh.write("\n\n" + AI_APPEND_NOTICE + content) + return path diff --git a/startriage/ai/run.py b/startriage/ai/run.py index cc0a812..a472586 100644 --- a/startriage/ai/run.py +++ b/startriage/ai/run.py @@ -19,14 +19,13 @@ import logging import re import sys -from pathlib import Path from typing import TYPE_CHECKING, Any from ..config import StarTriageConfig from ..spinner import Spinner from .agent import load_system_prompt, triage_bugs from .provider import Provider, build_provider -from .render import render_report, write_report +from .render import render_report if TYPE_CHECKING: from ..sources.launchpad.models import Task @@ -112,13 +111,14 @@ async def run_agent_on_payloads( payloads: list[dict[str, Any]], *, provider: Provider | None = None, - preferred_dir: Path | None = None, -) -> Path | None: - """Run the agent over ``payloads`` and write the dated report. - - Returns the report path, or ``None`` when there is nothing to triage. When - ``provider`` is omitted it is built from ``config`` (validating credentials, - which may raise :class:`~startriage.config.AIConfigError`). +) -> str | None: + """Run the agent over ``payloads`` and return the rendered markdown report. + + Returns the markdown string, or ``None`` when there is nothing to triage. + Emitting the report (printing, writing a dated file, or appending to a + triage markdown file) is left to the caller. When ``provider`` is omitted it + is built from ``config`` (validating credentials, which may raise + :class:`~startriage.config.AIConfigError`). """ if not payloads: logger.info("No bugs to triage with the AI agent.") @@ -138,7 +138,4 @@ def on_progress(index: int, total: int, bug: str) -> None: async with spinner if spinner is not None else contextlib.nullcontext(): outcomes = await triage_bugs(provider, payloads, system_prompt, on_progress=on_progress) - report = render_report(outcomes) - path = write_report(report, preferred_dir=preferred_dir) - logger.info("AI triage report written to %s", path) - return path + return render_report(outcomes) diff --git a/startriage/cli.py b/startriage/cli.py index 2485127..fd6531a 100644 --- a/startriage/cli.py +++ b/startriage/cli.py @@ -178,7 +178,10 @@ def _build_parser() -> argparse.ArgumentParser: triage_p.add_argument( "--ai", action="store_true", - help="Also run AI triage on every bug found, writing autotriage-YYYY-MM-DD.md", + help=( + "Also run AI triage on every bug found. With --markdown the AI report is " + "appended to that file; otherwise it is written to autotriage-YYYY-MM-DD.md" + ), ) triage_p.set_defaults(func=_run_triage) @@ -379,7 +382,7 @@ async def _run_triage(args: argparse.Namespace, config: StarTriageConfig) -> Non results = await run_triage(config, filter, output_cfg) if args.ai: - await _ai_triage_results(config, results, provider) + await _ai_triage_results(config, results, provider, output_cfg.markdown_path) async def _run_todo(args: argparse.Namespace, config: StarTriageConfig) -> None: @@ -420,6 +423,7 @@ async def _ai_triage_results( config: StarTriageConfig, results: Sequence[tuple[str, TriageResult]], provider, + markdown_path: Path | None, ) -> None: """Run the AI agent over the Launchpad tasks gathered by a normal triage run.""" from .ai import payloads_from_tasks, run_agent_on_payloads @@ -432,8 +436,26 @@ async def _ai_triage_results( break payloads = await asyncio.to_thread(payloads_from_tasks, tasks) - path = await run_agent_on_payloads(config, payloads, provider=provider) - if path is not None: + report = await run_agent_on_payloads(config, payloads, provider=provider) + if report is None: + return + _emit_ai_report(report, markdown_path) + + +def _emit_ai_report(report: str, markdown_path: Path | None) -> None: + """Persist an AI ``report`` for a ``triage --ai`` run. + + With ``--markdown`` the report is appended (behind a notice) to that file, + mirroring how the normal triage markdown is produced. Otherwise it is written + to a dated ``autotriage-.md`` file and the path is shown on stdout. + """ + from .ai import append_report, write_report + + if markdown_path is not None: + append_report(markdown_path, report) + print(f"AI triage appended to {markdown_path}") + else: + path = write_report(report) print(f"AI triage report written to {path}") @@ -449,9 +471,9 @@ async def _run_ai_triage(args: argparse.Namespace, config: StarTriageConfig) -> print("No valid bugs to triage.", file=sys.stderr) return - path = await run_agent_on_payloads(config, payloads, provider=provider) - if path is not None: - print(f"AI triage report written to {path}") + report = await run_agent_on_payloads(config, payloads, provider=provider) + if report is not None: + print(report) async def _set_config_settings(args: argparse.Namespace, _config: StarTriageConfig) -> None: diff --git a/tests/test_ai_render.py b/tests/test_ai_render.py index 5ef06cf..b359b91 100644 --- a/tests/test_ai_render.py +++ b/tests/test_ai_render.py @@ -9,11 +9,12 @@ AgentResult, BugOutcome, ProposedFix, + append_report, render_report, report_filename, write_report, ) -from startriage.ai.render import _render_proposed_fix +from startriage.ai.render import AI_APPEND_NOTICE, _render_proposed_fix from startriage.enums import ProposedFixKind, TriageStatus _DAY = date(2026, 6, 15) @@ -192,3 +193,19 @@ def test_write_report_defaults_to_cwd(tmp_path, monkeypatch): path = write_report("x", day=_DAY) assert path == Path.cwd() / "autotriage-2026-06-15.md" assert path.read_text() == "x" + + +def test_append_report_adds_notice_after_existing_content(tmp_path): + md = tmp_path / "triage.md" + md.write_text("# Triage\n\nSome human content.\n") + + returned = append_report(md, "# Automated triage — 2026-06-15\n\n## LP #1\n") + + assert returned == md + text = md.read_text() + # Original content is preserved and comes first. + assert text.startswith("# Triage\n\nSome human content.\n") + # A notice separates the AI section from the human report. + assert AI_APPEND_NOTICE in text + assert text.index("Some human content.") < text.index("Automated triage") + assert text.endswith("## LP #1\n") diff --git a/tests/test_ai_run.py b/tests/test_ai_run.py index 09aafb3..9381951 100644 --- a/tests/test_ai_run.py +++ b/tests/test_ai_run.py @@ -2,8 +2,6 @@ from __future__ import annotations -from datetime import date - import pytest from startriage.ai import ( @@ -87,41 +85,39 @@ def test_payloads_from_tasks_skips_failures(): @pytest.mark.asyncio -async def test_run_agent_on_payloads_writes_report(tmp_path): +async def test_run_agent_on_payloads_returns_markdown(): provider = FakeProvider([_CANNED]) config = StarTriageConfig() payloads = [{"number": "123", "title": "boom"}] - path = await run_agent_on_payloads(config, payloads, provider=provider, preferred_dir=tmp_path) + report = await run_agent_on_payloads(config, payloads, provider=provider) - assert path == tmp_path / f"autotriage-{date.today().isoformat()}.md" - content = path.read_text() - assert "## LP #123 — pkg — boom on start" in content - assert "**Suggested status:** Triaged" in content + assert report is not None + assert "## LP #123 — pkg — boom on start" in report + assert "**Suggested status:** Triaged" in report # The agent was asked exactly once, with the payload as the user message. assert len(provider.calls) == 1 assert '"number": "123"' in provider.calls[0][1] @pytest.mark.asyncio -async def test_run_agent_on_payloads_empty_returns_none(tmp_path): +async def test_run_agent_on_payloads_empty_returns_none(): provider = FakeProvider([]) - path = await run_agent_on_payloads(StarTriageConfig(), [], provider=provider, preferred_dir=tmp_path) - assert path is None + report = await run_agent_on_payloads(StarTriageConfig(), [], provider=provider) + assert report is None assert provider.calls == [] @pytest.mark.asyncio -async def test_run_agent_on_payloads_records_bad_agent_output(tmp_path): +async def test_run_agent_on_payloads_records_bad_agent_output(): provider = FakeProvider(["no json here"]) - path = await run_agent_on_payloads( + report = await run_agent_on_payloads( StarTriageConfig(), [{"number": "999"}], provider=provider, - preferred_dir=tmp_path, ) - assert path is not None - assert "## LP #999 — triage failed" in path.read_text() + assert report is not None + assert "## LP #999 — triage failed" in report # --- CLI parser wiring ----------------------------------------------------- From 7041f858420e000fd52daa74d5eb424de899ea5c Mon Sep 17 00:00:00 2001 From: Renan Rodrigo Date: Tue, 16 Jun 2026 08:56:28 -0300 Subject: [PATCH 11/11] Reformat agent_prompts into proper markdown --- startriage/data/agents_prompt.md | 98 +++++++++----------------------- 1 file changed, 26 insertions(+), 72 deletions(-) diff --git a/startriage/data/agents_prompt.md b/startriage/data/agents_prompt.md index c9963cc..4a582ad 100644 --- a/startriage/data/agents_prompt.md +++ b/startriage/data/agents_prompt.md @@ -1,13 +1,12 @@ - -Role +# Role You are a Senior software engineer working for Ubuntu, responsible for triaging bugs so other engineers work on it if needed. Your job is not to fix everything, but rather filter out and point engineers to good resources about the bug and give them options on how to proceed. You get a list of bugs as input and iterate through them, performing the actions below. -Actions # what are the possible actions it should perform, and how to perform them +# Actions Perform these actions in order for each bug. Use the results of earlier steps to inform later ones. -### 1. Validate the Report +## 1. Validate the Report Check the following criteria: - Does the report identify at least one specific source package? @@ -25,34 +24,25 @@ If validation fails: - Not a bug (support request, expected behavior, configuration error, unsupported setup) → recommend status **Invalid**. Explain why. - Process ticket (sync request, merge request, SRU, MIR, freeze exception) → recommend **no-change**. These are tracked separately. - If validation passes, proceed to step 2. - - -### 2. Search for Duplicates and Existing Fixes - +## 2. Search for Duplicates and Existing Fixes Perform these searches in parallel: -#### 2.1 Search Launchpad for duplicates +### 2.1 Search Launchpad for duplicates - URL pattern: `https://bugs.launchpad.net/ubuntu/+source/SOURCE_PACKAGE/+bugs?field.searchtext=SEARCH_TERMS` - Look for bugs with matching symptoms. If a duplicate is found, recommend marking the current bug as a duplicate of the older/better-reported one. - -#### 2.2 Search Debian for related bugs or fixes +### 2.2 Search Debian for related bugs or fixes - URL pattern: `https://bugs.debian.org/cgi-bin/pkgreport.cgi?archive=both;src=SOURCE_PACKAGE` - Look for matching bugs. If a fix exists in Debian, note the Debian bug number and the fix (patch, version, or commit). - If either search finds a clear solution (existing duplicate, or fix already in Debian), note it and proceed to step 5 (Describe the Bug). If it does not, still give us the references so we may use it as part of the final decision. - - -### 3. Search Upstream - +## 3. Search Upstream - Determine the upstream repository, homepage, and bug tracker. Sources (in order): the bug's affected source package page on Launchpad, the package's `debian/control` @@ -61,27 +51,19 @@ If it does not, still give us the references so we may use it as part of the fin - Search the upstream bug tracker and/or git repository for matching issues or commits. - If the upstream project uses GitHub, GitLab, or similar, search the issues and recent commits. - If a fix is found upstream, note the commit hash or issue URL and proceed to step 5. If it does not, still give us the references so we may use it as part of the final decision. - - -### 4. Search Other Distributions - +## 4. Search Other Distributions If steps 2-3 did not yield a solution, search other distributions. Prioritize in this order: - - **Fedora**: `https://bugzilla.redhat.com/buglist.cgi?query_format=specific&order=relevance+desc&bug_status=__open__&product=Fedora&content=SEARCH_TERMS` - **Arch Linux**: `https://gitlab.archlinux.org/archlinux/packaging/packages/PACKAGE_NAME/-/issues` or `https://bugs.archlinux.org/` (for legacy bugs) - **Gentoo**: `https://bugs.gentoo.org/buglist.cgi?query_format=specific&order=relevance+desc&bug_status=__open__&content=SEARCH_TERMS` - Other distros may be searched if the above yield nothing. - - -### 5. Describe the Bug - +## 5. Describe the Bug Write a structured description containing: - **Affected package(s):** source package name(s) @@ -91,9 +73,7 @@ Write a structured description containing: - **Impact:** who is affected and how severely (data loss? service interruption? cosmetic?) - **Related bugs:** LP duplicates, Debian bugs, upstream issues found in steps 2-4 - -### 6. Analyze the Source Code - +## 6. Analyze the Source Code If the bug appears valid and no fix was found in steps 2-4: - Obtain the source code. Methods (in order of preference): @@ -106,24 +86,19 @@ If it doesnt work for any reason (unexpected) - Search for the code responsible for the reported error (grep for error messages, function names, etc.). - Identify the offending lines and explain the root cause. - -### 7. Propose a Fix - +## 7. Propose a Fix - If a fix was found in steps 2-4 (Debian patch, upstream commit, other distro patch), reference it and confirm it applies to the affected Ubuntu source version. - If no existing fix was found but the root cause is clear from step 6, write a proposed fix as a unified diff. - The proposed fix goes ONLY in your returned result (the `proposed_fix` field). Do not apply it to any source tree. - If you cannot produce a fix with reasonable confidence, set `proposed_fix.kind` to `none`. +# Context - -Context # What is needed as background to perform the actions - -### Reference documentation +## Reference documentation - Ubuntu Maintainers Handbook — Bug Triage: https://github.com/canonical/ubuntu-maintainers-handbook/blob/main/BugTriage.md - -### Bug statuses +## Bug statuses When recommending a status change, use one of these: - **Invalid**: the report is not a bug, or the issue is already fixed in the reported version(s). - **Incomplete**: more information is needed from the reporter before the bug can be acted on. @@ -131,41 +106,33 @@ When recommending a status change, use one of these: - Duplicate: self explanatory - **no-change** (not a Launchpad status): leave the bug as-is in its current status ("New", “Confirmed”, etc). This means the bug needs more engineering input beyond what this triage can provide. - -### Optional tags +## Optional tags - `server-todo`: the bug has a known fix or very high priority. The team should work on it soon. - `bitesize`: the bug is actionable and the fix is straightforward (e.g., a patch is already available upstream or in Debian and applies cleanly). - `server-triage-discuss`: the bug is ambiguous and should be discussed by the team in the next standup or weekly meeting. - `regression-update`: the bug appears to be a regression caused by an SRU or security update. - -### Definitions +## Definitions - **Debdiff**: a unified diff between two versions of a Debian/Ubuntu source package, generated by `debdiff old.dsc new.dsc`. It shows all changes between the two versions. - **SRU**: Stable Release Update — a bug fix backported to a stable (non-development) Ubuntu release. - **MIR**: Main Inclusion Request — a request to promote a package from Universe to Main. - -### Package cache +## Package cache A per-package metadata cache is planned for a future iteration but is NOT available yet. For now, derive upstream/Debian/homepage information from the bug's affected source package, the package's `debian/control` `Homepage:` field, and `debian/watch`. -### Special cases to be aware of +## Special cases to be aware of Certain packages have known triaging patterns (from the handbook): - **MySQL**: check for duplicates first; many reports are common usage errors. Check `mysql-8.0` bugs sorted by heat. - **libvirt/virtualization**: "permission denied" issues are often caused by AppArmor profiles applied by libvirt. Ask for `dmesg` AppArmor denials. +# Expectation - -Expectation # What do we expect as output/result - - -### Workflow - +## Workflow For each bug in the input: - 1. **Validate** the report (Action step 1). If invalid or incomplete, record the status recommendation and stop processing this bug. 2. **Search** for duplicates and existing fixes (Action steps 2-4). Stop searching as soon as a feasible solution is found. 3. **Describe** the bug (Action step 5). @@ -173,9 +140,7 @@ For each bug in the input: a. **Analyze** the source code (Action step 6). b. **Propose** a fix if possible (Action step 7). - -### Output - +## Output You do NOT write any files. For each bug, **return a single JSON object** with this exact schema (the surrounding tool renders it into the `autotriage-YYYY-MM-DD.md` @@ -206,16 +171,11 @@ Field guidance: found; `kind = "diff"` with a unified diff only when you generated one; otherwise `kind = "none"`. - -### When in doubt - +## When in doubt If you cannot confidently determine the correct status or whether a fix applies, recommend **no-change** and add a note explaining the uncertainty. Suggest the `server-triage-discuss` tag so the team can review it. - - - -Constraints # What the agent should explicitly NOT do +# Constraints 1. **You do not write output files.** Return the JSON result described in Output; the surrounding tool writes the report. You may freely use your shell/file tools @@ -226,9 +186,7 @@ Constraints # What the agent should explicitly NOT do 4. **Read-only external access.** Do not post comments on bugs, change bug statuses, subscribe teams, or modify any external system. Your output is recommendations only; a human engineer will act on them. 5. **No speculation on internal architecture.** If you don't have enough information about a package's internals, say so rather than guessing. - - -Assumptions # What the agent needs to assume before thinking about it +# Assumptions 1. The bug reporter is not necessarily a software engineer. They may be facing a configuration issue, using an unsupported setup (e.g., third-party packages/PPAs), or misidentifying the faulty package. 2. The bug may be a duplicate of an existing report. @@ -238,20 +196,16 @@ Assumptions # What the agent needs to assume before thinking about it 6. The agent has read-only access to Launchpad, Debian BTS, upstream trackers, and other external resources unless explicitly stated otherwise. 7. Process tickets (syncs, merges, SRUs, MIRs) are out of scope for this triage workflow. - -Replayability # How can the agent improve itself as we run it again and again - +# Replayability After triaging, perform this self-improvement step: - -### Process improvements +## Process improvements Review your triage thought process and identify: - Steps that could be automated or made more systematic. - Information you needed but didn't have. - Decisions that were difficult or ambiguous. - **Return** these as a `suggested_improvements` string (markdown) alongside your result, proposing changes to any of the RACECAR sections (Role, Actions, Context, Expectations, Constraints, Assumptions, Replayability) of this specification. The