From 14b99201066d6eabe6f62e9b1fa1de4b977e9051 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:05:16 -0400 Subject: [PATCH 01/10] feat(parser): migrate codex provider Codex sessions have an append-only JSONL transcript plus a session_index.jsonl title sidecar. Moving Codex behind a concrete provider keeps that composite source identity and incremental append capability explicit at the provider boundary. The provider preserves dated and archived discovery, live-over-archived lookup, shallow index watch planning, index-event classification, index-aware mtimes, source hashing, full parse output, and append parsing with full-parse fallback signals. fix(parser): preserve codex provider sidecar semantics Codex index changes are part of source freshness, so the provider cannot treat unchanged transcript size as no new data when the index mtime drove the fingerprint. The provider also needs to keep legacy live-over-archived UUID behavior and classify removed transcript paths syntactically. Index events now conservatively refresh sibling Codex sources because this provider layer has no DB state for title diffing; the sync engine can still apply its DB-aware filtering before provider dispatch is fully authoritative. Validation: go test -tags "fts5" ./internal/parser -run TestCodexProvider -count=1; go vet ./...; git diff --check. go test -tags "fts5" ./internal/parser -count=1 currently fails on TestProviderMigrationModes because inherited lower provider branches such as claude still need their branch-local shadow opt-ins. fix(parser): make codex provider sidecars authoritative The Codex provider could not safely infer sidecar-only freshness from a single max mtime. Rather than advertise append-only parsing with incomplete sidecar state, keep provider-authoritative Codex parses on the full-parse path until the facade can model sidecar dirtiness explicitly. Also route persisted path lookup and changed-path classification through the same UUID canonicalization as discovery so archived duplicates do not win over live dated transcripts. Validation: go test -tags "fts5" ./internal/parser -run 'Test(CodexProvider|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare codex shadow parity Codex is shadow-compared on this branch, so add source-level migration coverage that compares provider observation with ParseCodexSession. The fixture uses the real sessions/YYYY/MM/DD layout plus sibling session_index.jsonl, proving the provider preserves title sidecar behavior, parser output, and data-version planning. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesCodexLegacyParser|TestCodexProvider|TestParseCodex|TestProviderMigrationModes' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check fix(parser): accept codex legacy-shaped sources Provider-authoritative Codex sync still has to rediscover sessions that were stored by the legacy parser even when their rollout filename does not expose a UUID-shaped session id. Without that compatibility path, the later dispatch migration can drop or fail to reprocess valid Codex transcripts that ParseCodexSession can read from session metadata. Keep the UUID-aware source contract as the preferred path and fall back to root-scoped JSONL sources only when Codex path metadata does not apply, so normal duplicate canonicalization remains unchanged while legacy-shaped fixtures stay reachable. Validation: go test ./internal/parser -count=1; go fmt ./...; go vet ./...; go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestCodexProvider|TestSyncEngineCodex|TestSyncSingleSessionHashCodex|TestSyncEngineSkipCache' -count=1; git diff --check refactor(parser): fold codex into provider Make the Codex provider own its source discovery, lookup, and parse behavior instead of shimming the package-level free functions. Delete DiscoverCodexSessions, FindCodexSourceFile, ParseCodexSession, and ParseCodexSessionFrom: discovery and find-source bodies move onto the codex source set (discoverSessionPaths, findSourceFile), and parse moves onto the provider (parseSession, parseSessionFrom). Drop the Codex AgentDef DiscoverFunc/FindSourceFunc hooks and make Codex provider-authoritative; ShallowWatchRootsFunc and the exec-source helpers (IsCodexExecSessionFile, ResolveCodexShallowWatchRoots, the one-time codex_exec skip migration) stay since only the four parser entrypoints must go. A provider has no database handle, so the engine reproduces the DB-aware and mtime-aware bookkeeping the legacy single-session JSONL path performed, scoped to Codex to preserve behavior exactly: - shouldSkipProviderSourceByDB folds the session_index.jsonl sidecar into a DB-stored fingerprint skip, so an unchanged transcript is not reparsed when only the shared index mtime advanced and this session's title did not change, and a resync still skips after the in-memory skip cache is cleared. - The provider Parse force-replaces stored rows because Codex emits a full parse (it does not advertise incremental append); a late token_count line appended to an existing turn rewrites the stored message instead of being dropped by an append-only write. - Index events keep flowing through the engine's DB-aware classifyCodexIndexPath rather than the provider's broad index fan-out: the engine fans out only to sessions whose stored title changed and pins the chosen on-disk copy (SourceRefForPath) so the provider's live-over-archived canonicalization cannot resurrect a stale duplicate over the stored copy. - SyncAllSince re-expands a UUID's live and archived duplicates (AllSourcePathsForUUID) before the mtime cutoff filter, restoring the legacy discover-then-filter order so a changed archived copy newer than the cutoff is not lost behind an older live copy. Route parse-diff, the token-use disk probe, and the SSH remote resolve script through provider Discover/FindSource for provider-authoritative agents that no longer carry a DiscoverFunc, so Codex sources stay discoverable, resolvable on disk, and transferable (including the session_index.jsonl sidecar). Replace the deleted shadow-baseline test with provider-API coverage (provider Discover/Parse through ObserveProviderSource) plus a guard that the four legacy entrypoints stay gone, route the package and engine tests through the provider methods, and remove codex_provider.go from the pending shim scan list. This also fixes the previously known-failing TestSyncPathsCodexIndexEventRefreshesStoredDuplicate, since the index event now honors the stored archived copy. test(sync): host shared shadow source helper at codex fold The per-provider shadow/parse tests share writeProviderShadowSourceFile to write source fixtures. The Codex fold is the lowest branch that calls it, so the canonical definition lives here; later provider folds inherit it instead of redeclaring their own copies. test(sync): remove unused codex stat assignments The pre-commit lint hook rejects two Codex appended-fixture tests because they assign os.Stat results back to info without using the value. The tests already assert the append and close operations that matter for setup. Removing the unused assignments keeps staticcheck clean for the Codex provider migration branch. fix(parser): pin codex duplicate sources Codex discovery and raw-ID lookup should still prefer the live dated transcript, but exact filesystem events and DB-stored source hints are different: the caller has already selected a concrete source path. Canonicalizing those paths back to a stale live duplicate can overwrite an updated archived transcript. Changed-path classification now returns the source pinned to the event path, and non-fresh stored path/fingerprint lookup returns the exact source so SyncSingleSession preserves the archived path already recorded in the database. Validation: go test -tags "fts5" ./internal/parser -run 'TestCodexProvider(FindSourcePinsExactArchivedDuplicate|ChangedPathPinsArchivedDuplicate|SourceMethods|DiscoverDedupesLiveAndArchivedByUUID)' -count=1; go test -tags "fts5" ./internal/sync -run 'TestSync(PathsCodexArchivedDuplicateEventPinsChangedFile|SingleSessionCodexPreservesStoredArchivedDuplicate|PathsCodexIndexEventRefreshesStoredDuplicate|AllSinceCodexKeepsChangedArchivedDuplicate)' -count=1; go test -tags "fts5" ./internal/parser -run 'TestCodexProvider|TestParseCodex|TestDiscoverCodex' -count=1; go test -tags "fts5" ./internal/sync -run 'Test.*Codex.*' -count=1; go vet ./...; git diff --check fix(sync): keep codex freshness skips out of cache Codex provider DB-fresh skips are successful freshness decisions, not parse failures or intentional no-session skips. Recording them in the persistent skip cache can hide a later parser data-version bump because the cache check runs before the DB freshness check.\n\nKeep DB-fresh provider skips non-cacheable and make existing skip-cache entries fall through when a stored row at that path has a stale data version. The same bypass helper still preserves the existing stale-project self-healing behavior.\n\nValidation: go test -tags "fts5" ./internal/sync -run 'TestProcessFile(SkipCacheReparsesStaleCodex(Project|DataVersion)|CodexDBFreshSkipIsNotCached)|Test.*Codex.*' -count=1; go test -tags "fts5" ./internal/parser -run 'TestCodexProvider|TestParseCodex|TestDiscoverCodex' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check fix(sync): surface codex provider discovery failures Provider-backed parse-diff should not report a clean or incomplete diff when provider discovery failed. Returning that error keeps requested provider-authoritative agents honest and matches the expectation that parse-diff is a verification surface, not a best-effort sync.\n\nAlso pin coverage for stale Codex index entries whose transcripts no longer resolve, so the existing empty-candidate guard cannot regress into an invalid empty work item.\n\nValidation: go test -tags "fts5" ./internal/sync -run 'Test(ParseDiffProviderDiscoveryErrorFails|ClassifyCodexIndexPathSkipsMissingTranscript|ProcessFile(SkipCacheReparsesStaleCodex(Project|DataVersion)|CodexDBFreshSkipIsNotCached))' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check fix(sync): drop duplicate shadowCallerProvider Discover in codex test --- cmd/agentsview/token_use.go | 67 +- internal/parser/codex.go | 40 +- internal/parser/codex_parser_test.go | 123 +++- internal/parser/codex_provider.go | 627 ++++++++++++++++++ internal/parser/codex_provider_test.go | 368 ++++++++++ internal/parser/discovery.go | 60 +- internal/parser/discovery_test.go | 4 +- internal/parser/parser_test.go | 16 +- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/provider_shim_scan_test.go | 1 - internal/parser/provider_test.go | 22 +- internal/parser/skill_inference_test.go | 4 +- internal/parser/types.go | 2 - internal/sync/engine.go | 401 +++++++++-- internal/sync/engine_integration_test.go | 113 +++- internal/sync/engine_test.go | 191 +++++- internal/sync/parsediff.go | 21 +- internal/sync/provider_shadow_caller_test.go | 36 + internal/sync/provider_shadow_codex_test.go | 83 +++ internal/sync/provider_shadow_support_test.go | 19 + 21 files changed, 1992 insertions(+), 210 deletions(-) create mode 100644 internal/parser/codex_provider.go create mode 100644 internal/parser/codex_provider_test.go create mode 100644 internal/sync/provider_shadow_codex_test.go create mode 100644 internal/sync/provider_shadow_support_test.go diff --git a/cmd/agentsview/token_use.go b/cmd/agentsview/token_use.go index bccd5e55d..8d3799603 100644 --- a/cmd/agentsview/token_use.go +++ b/cmd/agentsview/token_use.go @@ -86,11 +86,11 @@ func resolveRawSessionID( // Canonical disk probe: if the input starts with a known // agent prefix, trust that interpretation first and strip - // before calling FindSourceFunc (which rejects IDs with + // before resolving the source (which rejects IDs with // colons via IsValidSessionID). for _, def := range parser.Registry { if def.IDPrefix == "" || !def.FileBased || - def.FindSourceFunc == nil { + !agentHasDiskSourceLookup(def) { continue } if !strings.HasPrefix(input, def.IDPrefix) { @@ -98,7 +98,7 @@ func resolveRawSessionID( } bareID := strings.TrimPrefix(input, def.IDPrefix) for _, dir := range agentDirs[def.Type] { - if def.FindSourceFunc(dir, bareID) != "" { + if findAgentSourceFile(def, dir, bareID) != "" { return input, true } } @@ -110,11 +110,11 @@ func resolveRawSessionID( // colon-bearing raw IDs (Kimi, OpenClaw, Kiro IDE) may // match. for _, def := range parser.Registry { - if !def.FileBased || def.FindSourceFunc == nil { + if !def.FileBased || !agentHasDiskSourceLookup(def) { continue } for _, dir := range agentDirs[def.Type] { - if def.FindSourceFunc(dir, input) != "" { + if findAgentSourceFile(def, dir, input) != "" { return def.IDPrefix + input, true } } @@ -123,6 +123,63 @@ func resolveRawSessionID( return input, false } +// agentHasDiskSourceLookup reports whether a session source can be located on +// disk by raw ID for the agent: via the legacy AgentDef FindSourceFunc hook, or +// via a provider-authoritative provider's FindSource for agents whose lookup was +// folded onto the provider (e.g. Codex). +func agentHasDiskSourceLookup(def parser.AgentDef) bool { + if def.FindSourceFunc != nil { + return true + } + if parser.ProviderMigrationModes()[def.Type] != + parser.ProviderMigrationProviderAuthoritative { + return false + } + _, ok := parser.ProviderFactoryByType(def.Type) + return ok +} + +// findAgentSourceFile resolves a raw agent session ID to an on-disk source path +// under dir, using the legacy FindSourceFunc when present and otherwise the +// provider's FindSource (RawSessionID lookup). Returns "" when no source +// resolves or the agent has no on-disk lookup. +func findAgentSourceFile(def parser.AgentDef, dir, rawID string) string { + if def.FindSourceFunc != nil { + return def.FindSourceFunc(dir, rawID) + } + factory, ok := parser.ProviderFactoryByType(def.Type) + if !ok { + return "" + } + provider := factory.NewProvider(parser.ProviderConfig{Roots: []string{dir}}) + source, found, err := provider.FindSource( + context.Background(), + parser.FindSourceRequest{RawSessionID: rawID}, + ) + if err != nil || !found { + return "" + } + if path, ok := providerSourcePath(source); ok { + return path + } + return "" +} + +// providerSourcePath extracts the on-disk path a provider SourceRef points to, +// preferring the display path and falling back to the fingerprint key or key. +func providerSourcePath(source parser.SourceRef) (string, bool) { + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if candidate != "" { + return candidate, true + } + } + return "", false +} + // usageExitCode classifies a SessionUsage into an exit code: 2 when // the session is not in the DB, 0 when token data OR cost is present, // 3 when the session exists but has neither. Cost-only sessions diff --git a/internal/parser/codex.go b/internal/parser/codex.go index af763aada..d104b73ef 100644 --- a/internal/parser/codex.go +++ b/internal/parser/codex.go @@ -1305,12 +1305,31 @@ func IsCodexExecSessionFile(path string) bool { return false } -// ParseCodexSession parses a Codex JSONL session file. -// The includeExec parameter is retained for backward -// compatibility; exec-originated sessions are now always -// parsed and imported. +// ParseCodexSession and ParseCodexSessionFrom are the exported seam used by the +// S3 sync path (internal/sync), which buffers an s3:// Codex object to a temp +// file and parses it through the legacy processCodex. The Codex provider owns +// these bodies as receiver methods that use no receiver state, so the wrappers +// invoke them on a zero-value provider. They are removed once S3 support folds +// into the JSONL source sets. func ParseCodexSession( path, machine string, includeExec bool, +) (*ParsedSession, []ParsedMessage, error) { + return (&codexProvider{}).parseSession(path, machine, includeExec) +} + +func ParseCodexSessionFrom( + path string, offset int64, startOrdinal int, includeExec bool, +) ([]ParsedMessage, time.Time, int64, error) { + return (&codexProvider{}).parseSessionFrom(path, offset, startOrdinal, includeExec) +} + +// parseSession parses a Codex JSONL session file into a session and its +// messages. The includeExec parameter is retained for backward compatibility; +// exec-originated sessions are now always parsed and imported. This is the +// provider-owned parse entrypoint; the package-level free function was folded +// onto the provider. +func (p *codexProvider) parseSession( + path, machine string, includeExec bool, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) if err != nil { @@ -1675,12 +1694,13 @@ func CodexTranscriptConsumedSize(path string) (int64, error) { return readJSONLFrom(path, 0, func(line string) {}) } -// ParseCodexSessionFrom parses only new lines from a Codex -// JSONL file starting at the given byte offset. Returns only -// the newly parsed messages (with ordinals starting at -// startOrdinal) and the latest timestamp seen. Used for -// incremental re-parsing of large append-only session files. -func ParseCodexSessionFrom( +// parseSessionFrom parses only new lines from a Codex JSONL file starting at +// the given byte offset. Returns only the newly parsed messages (with ordinals +// starting at startOrdinal) and the latest timestamp seen. Used for incremental +// re-parsing of large append-only session files. This is the provider-owned +// incremental parse entrypoint; the package-level free function was folded onto +// the provider. +func (p *codexProvider) parseSessionFrom( path string, offset int64, startOrdinal int, diff --git a/internal/parser/codex_parser_test.go b/internal/parser/codex_parser_test.go index cc2acf282..d01fff3df 100644 --- a/internal/parser/codex_parser_test.go +++ b/internal/parser/codex_parser_test.go @@ -19,11 +19,74 @@ func runCodexParserTest(t *testing.T, fileName, content string, includeExec bool fileName = "test.jsonl" } path := createTestFile(t, fileName, content) - sess, msgs, err := ParseCodexSession(path, "local", includeExec) + sess, msgs, err := parseCodexTestSession(t, path, "local", includeExec) require.NoError(t, err) return sess, msgs } +// newCodexTestProvider builds a concrete codexProvider so package tests can +// exercise the folded parse, discovery, and source-lookup behavior directly +// through provider methods now that the package-level ParseCodexSession, +// ParseCodexSessionFrom, DiscoverCodexSessions, and FindCodexSourceFile free +// functions are gone. +func newCodexTestProvider(t *testing.T, roots ...string) *codexProvider { + t.Helper() + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + cp, ok := provider.(*codexProvider) + require.True(t, ok) + return cp +} + +// parseCodexTestSession parses a Codex session through the provider-owned +// parseSession method, replacing the removed package-level ParseCodexSession. +func parseCodexTestSession( + t *testing.T, path, machine string, includeExec bool, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newCodexTestProvider(t).parseSession(path, machine, includeExec) +} + +// parseCodexTestSessionFrom parses appended Codex lines through the +// provider-owned parseSessionFrom method, replacing the removed package-level +// ParseCodexSessionFrom. +func parseCodexTestSessionFrom( + t *testing.T, path string, offset int64, startOrdinal int, includeExec bool, +) ([]ParsedMessage, time.Time, int64, error) { + t.Helper() + return newCodexTestProvider(t).parseSessionFrom(path, offset, startOrdinal, includeExec) +} + +// discoverCodexTestSessions discovers Codex session paths under root through +// the provider source set, returning the legacy DiscoveredFile shape the tests +// assert against, replacing the removed DiscoverCodexSessions. +func discoverCodexTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newCodexTestProvider(t, root) + paths := provider.sources.discoverSessionPaths(root) + if len(paths) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(paths)) + for _, path := range paths { + files = append(files, DiscoveredFile{ + Path: path, + Agent: AgentCodex, + }) + } + return files +} + +// findCodexTestSourceFile resolves a Codex session UUID to a transcript path +// through the provider source set, replacing the removed FindCodexSourceFile. +func findCodexTestSourceFile(t *testing.T, root, sessionID string) string { + t.Helper() + return newCodexTestProvider(t, root).sources.findSourceFile(root, sessionID) +} + func assertToolResultEvents( t *testing.T, got []ParsedToolResultEvent, @@ -65,7 +128,7 @@ func TestParseCodexSession_UsesThreadNameFromSessionIndex(t *testing.T) { index := `{"id":"abc-123","thread_name":"Renamed from Codex","updated_at":"2026-06-11T17:34:20.3755243Z"}` + "\n" require.NoError(t, os.WriteFile(indexPath, []byte(index), 0o644)) - sess, msgs, err := ParseCodexSession(sessionPath, "local", false) + sess, msgs, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "Renamed from Codex", sess.SessionName) @@ -86,7 +149,7 @@ func TestParseCodexSession_LeavesSessionNameEmptyWithoutThreadName(t *testing.T) index := `{"id":"abc-123","updated_at":"2026-06-11T17:34:20.3755243Z"}` + "\n" require.NoError(t, os.WriteFile(indexPath, []byte(index), 0o644)) - sess, _, err := ParseCodexSession(sessionPath, "local", false) + sess, _, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Empty(t, sess.SessionName) @@ -106,7 +169,7 @@ func TestParseCodexSession_UsesThreadNameFromArchivedSessions(t *testing.T) { index := `{"id":"abc-123","thread_name":"Archived title","updated_at":"2026-06-11T17:34:20.3755243Z"}` + "\n" require.NoError(t, os.WriteFile(indexPath, []byte(index), 0o644)) - sess, _, err := ParseCodexSession(sessionPath, "local", false) + sess, _, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "Archived title", sess.SessionName) @@ -125,7 +188,7 @@ func TestParseCodexSession_MtimeIncludesSessionIndex(t *testing.T) { index := `{"id":"abc-123","thread_name":"Original","updated_at":"2026-06-11T17:34:20Z"}` + "\n" require.NoError(t, os.WriteFile(indexPath, []byte(index), 0o644)) - sess1, _, err := ParseCodexSession(sessionPath, "local", false) + sess1, _, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) mtime1 := sess1.File.Mtime @@ -135,7 +198,7 @@ func TestParseCodexSession_MtimeIncludesSessionIndex(t *testing.T) { require.NoError(t, os.WriteFile(indexPath, []byte(renamed), 0o644)) require.NoError(t, os.Chtimes(indexPath, future, future)) - sess2, _, err := ParseCodexSession(sessionPath, "local", false) + sess2, _, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) assert.Greater(t, sess2.File.Mtime, mtime1, "mtime must advance when session_index.jsonl is updated") assert.Equal(t, "Renamed", sess2.SessionName) @@ -1345,7 +1408,7 @@ func TestParseCodexSessionFrom_ForkReplaySpansOffset(t *testing.T) { ) path := createTestFile(t, "fork-incremental.jsonl", initial) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "codex:"+forkID, sess.ID) @@ -1371,7 +1434,7 @@ func TestParseCodexSessionFrom_ForkReplaySpansOffset(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, _, _, err := ParseCodexSessionFrom(path, offset, 0, false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, 0, false) require.NoError(t, err) // Only the genuine turn survives; the replayed assistant answer @@ -1732,7 +1795,7 @@ func TestParseCodexSessionFrom_Incremental(t *testing.T) { path := createTestFile(t, "incremental.jsonl", initial) // Full parse to get baseline. - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "codex:inc-1", sess.ID) @@ -1762,7 +1825,7 @@ func TestParseCodexSessionFrom_Incremental(t *testing.T) { require.NoError(t, f.Close()) // Incremental parse from the offset. - newMsgs, endedAt, _, err := ParseCodexSessionFrom( + newMsgs, endedAt, _, err := parseCodexTestSessionFrom(t, path, offset, 1, false, ) require.NoError(t, err) @@ -1814,7 +1877,7 @@ func TestParseCodexSessionFrom_LateTokenCountRequiresFullParse(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 2, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 2, false) require.Error(t, err) assert.True(t, IsIncrementalFullParseFallback(err)) } @@ -1849,7 +1912,7 @@ func TestParseCodexSessionFrom_FunctionCallOutputRequiresFullParse(t *testing.T) require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 2, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 2, false) require.Error(t, err) assert.True(t, IsIncrementalFullParseFallback(err)) } @@ -1879,7 +1942,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "looking", tsEarlyS5), ) path := createTestFile(t, "incremental.jsonl", initial) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.Equal(t, 1, sess.UserMessageCount) require.Len(t, msgs, 2) @@ -1893,7 +1956,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "No issues found.", tsLateS5), )) - newMsgs, _, _, err := ParseCodexSessionFrom(path, offset, len(msgs), false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, len(msgs), false) require.NoError(t, err) require.Len(t, newMsgs, 2) assert.Equal(t, RoleUser, newMsgs[0].Role) @@ -1908,7 +1971,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "looking", tsEarlyS5), ) path := createTestFile(t, "incremental.jsonl", initial) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.Equal(t, 1, sess.UserMessageCount) require.Len(t, msgs, 2) @@ -1923,7 +1986,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "No issues found.", tsLateS5), )) - newMsgs, _, _, err := ParseCodexSessionFrom(path, offset, len(msgs), false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, len(msgs), false) require.NoError(t, err) require.Len(t, newMsgs, 1) assert.Equal(t, RoleAssistant, newMsgs[0].Role) @@ -1940,7 +2003,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("user", "something else entirely", "2024-01-01T10:00:03Z"), ) path := createTestFile(t, "incremental.jsonl", initial) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.Equal(t, 2, sess.UserMessageCount) @@ -1950,7 +2013,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { appendLines(t, path, testjsonl.CodexMsgJSON("user", prompt, tsLate)) - newMsgs, _, _, err := ParseCodexSessionFrom(path, offset, len(msgs), false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, len(msgs), false) require.NoError(t, err) require.Len(t, newMsgs, 1) assert.Equal(t, RoleUser, newMsgs[0].Role) @@ -1989,7 +2052,7 @@ func TestParseCodexSessionFrom_SkipsSessionMeta(t *testing.T) { f.WriteString(extra) f.Close() - newMsgs, _, _, err := ParseCodexSessionFrom( + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, 5, false, ) require.NoError(t, err) @@ -2013,7 +2076,7 @@ func TestParseCodexSessionFrom_NoNewData(t *testing.T) { offset := info.Size() // Parse from end of file — no new data. - newMsgs, endedAt, _, err := ParseCodexSessionFrom( + newMsgs, endedAt, _, err := parseCodexTestSessionFrom(t, path, offset, 10, false, ) require.NoError(t, err) @@ -2046,7 +2109,7 @@ func TestParseCodexSessionFrom_SubagentOutputRequiresFullParse(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 2, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 2, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2077,7 +2140,7 @@ func TestParseCodexSessionFrom_CollabAgentSpawnEndRequiresFullParse(t *testing.T require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 2, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 2, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2115,7 +2178,7 @@ func TestParseCodexSessionFrom_WaitCallRequiresFullParse(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 4, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 4, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2153,7 +2216,7 @@ func TestParseCodexSessionFrom_WaitAgentCallRequiresFullParse(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 4, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 4, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2179,7 +2242,7 @@ func TestParseCodexSessionFrom_SystemMessageDoesNotRequireFullParse(t *testing.T require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, endedAt, _, err := ParseCodexSessionFrom(path, offset, 1, false) + newMsgs, endedAt, _, err := parseCodexTestSessionFrom(t, path, offset, 1, false) require.NoError(t, err) assert.Equal(t, 0, len(newMsgs)) assert.False(t, endedAt.IsZero()) @@ -2210,7 +2273,7 @@ func TestParseCodexSessionFrom_RunningNotificationRequiresFullParse(t *testing.T require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 1, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 1, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2236,7 +2299,7 @@ func TestParseCodexSessionFrom_NonSubagentFunctionOutputRequiresFullParse(t *tes require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 1, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 1, false) require.Error(t, err) assert.True(t, IsIncrementalFullParseFallback(err)) } @@ -2277,7 +2340,7 @@ func TestParseCodexSessionFrom_SeedsModelFromTurnContext( require.NoError(t, err) require.NoError(t, f2.Close()) - newMsgs2, _, _, err := ParseCodexSessionFrom( + newMsgs2, _, _, err := parseCodexTestSessionFrom(t, path, offset, 2, false, ) require.NoError(t, err) @@ -2324,7 +2387,7 @@ func TestParseCodexSessionFrom_SeedsBoundaryAfterTurnContext( require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, _, _, err := ParseCodexSessionFrom( + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, 0, false, ) require.NoError(t, err) @@ -2373,7 +2436,7 @@ func TestParseCodexSessionFrom_EmptyModelReset( require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, _, _, err := ParseCodexSessionFrom( + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, 2, false, ) require.NoError(t, err) diff --git a/internal/parser/codex_provider.go b/internal/parser/codex_provider.go new file mode 100644 index 000000000..cd63eca70 --- /dev/null +++ b/internal/parser/codex_provider.go @@ -0,0 +1,627 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "slices" + "strings" +) + +var _ Provider = (*codexProvider)(nil) + +type codexProviderFactory struct { + def AgentDef +} + +func newCodexProviderFactory(def AgentDef) ProviderFactory { + return codexProviderFactory{def: cloneAgentDef(def)} +} + +func (f codexProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f codexProviderFactory) Capabilities() Capabilities { + return codexProviderCapabilities() +} + +func (f codexProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &codexProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: codexProviderCapabilities(), + Config: cfg, + }, + sources: newCodexSourceSet(cfg.Roots), + } +} + +type codexProvider struct { + ProviderBase + sources codexSourceSet +} + +func (p *codexProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *codexProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *codexProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *codexProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +// AllSourcePathsForUUID returns every on-disk Codex transcript path under the +// provider's roots whose filename carries the given session UUID, without the +// live-over-archived deduplication Discover applies. A UUID can exist as both a +// live dated copy and a flat archived copy under the same root; the sync engine +// uses the full set so an mtime cutoff can judge each copy independently. +func (p *codexProvider) AllSourcePathsForUUID(uuid string) []string { + if uuid == "" { + return nil + } + seen := make(map[string]struct{}) + var paths []string + for _, root := range p.sources.roots { + for _, path := range p.sources.discoverSessionPaths(root) { + if CodexSessionUUIDFromFilename(filepath.Base(path)) != uuid { + continue + } + clean := filepath.Clean(path) + if _, ok := seen[clean]; ok { + continue + } + seen[clean] = struct{}{} + paths = append(paths, path) + } + } + return paths +} + +// SourceRefForPath builds a SourceRef pinned to the exact transcript path, +// without live-over-archived canonicalization. Discovery, raw-ID lookup, and +// fresh stored-source lookup still prefer the live dated transcript, but +// changed-path events and non-fresh stored paths preserve the already-selected +// on-disk copy. The sync engine uses this when its DB-aware or mtime-aware +// logic has chosen a duplicated Codex UUID source (e.g. a stored archived copy +// or the cutoff-newer copy), so that choice is honored instead of being flipped +// back to the preferred dated layout. Returns false when the path is not a +// recognizable Codex source. +func (p *codexProvider) SourceRefForPath(path string) (SourceRef, bool) { + for _, root := range p.sources.roots { + if source, ok := p.sources.sourceRef(root, path, true); ok { + return source, true + } + if source, ok := p.sources.directPathSource(root, path, true); ok { + return source, true + } + } + return SourceRef{}, false +} + +func (p *codexProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *codexProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("codex source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, machine, false) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + // Codex transcripts are append-only and the provider always emits a full + // parse (it does not advertise incremental append). A full parse is the + // authoritative message set, so force-replace the stored rows; this + // preserves the legacy behavior where a late token_count line appended to + // an existing turn rewrites the stored message instead of being dropped by + // an append-only write. + ForceReplace: true, + }, nil +} + +func (p *codexProvider) ParseIncremental( + ctx context.Context, + _ IncrementalRequest, +) (IncrementalOutcome, IncrementalStatus, error) { + if err := ctx.Err(); err != nil { + return IncrementalOutcome{}, IncrementalUnsupported, err + } + return IncrementalOutcome{}, IncrementalUnsupported, nil +} + +type codexSource struct { + Root string + Path string + UUID string + Layout CodexLayout +} + +type codexSourceSet struct { + roots []string +} + +func newCodexSourceSet(roots []string) codexSourceSet { + return codexSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s codexSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + return s.discover(ctx, func(string) bool { return true }) +} + +func (s codexSourceSet) discover( + ctx context.Context, + includeRoot func(string) bool, +) ([]SourceRef, error) { + var sources []SourceRef + byKey := make(map[string]SourceRef) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + if !includeRoot(root) { + continue + } + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path, true) + if !ok { + source, ok = s.directPathSource(root, path, true) + } + if !ok { + continue + } + if current, ok := byKey[source.Key]; ok && + !preferCodexSource(source, current) { + continue + } + byKey[source.Key] = source + } + } + for _, source := range byKey { + sources = append(sources, source) + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Codex JSONL session file paths under +// sessionsDir, covering both the standard year/month/day layout and a flat +// archived directory. Paths are returned sorted for deterministic discovery, +// matching the behavior the package-level entrypoint provided before the fold. +func (s codexSourceSet) discoverSessionPaths(sessionsDir string) []string { + var paths []string + + entries, err := os.ReadDir(sessionsDir) + if err != nil { + return nil + } + for _, entry := range entries { + if entry.IsDir() { + continue + } + if !isCodexSessionFilename(entry.Name()) { + continue + } + paths = append(paths, filepath.Join(sessionsDir, entry.Name())) + } + + walkCodexDayDirs(sessionsDir, func(dayPath string) bool { + dayEntries, err := os.ReadDir(dayPath) + if err != nil { + return true + } + for _, sf := range dayEntries { + if sf.IsDir() { + continue + } + if !isCodexSessionFilename(sf.Name()) { + continue + } + paths = append(paths, filepath.Join(dayPath, sf.Name())) + } + return true + }) + + slices.Sort(paths) + return paths +} + +// findSourceFile resolves a Codex session file by UUID under sessionsDir. +// It prefers the standard year/month/day live path when present, then falls +// back to a flat archived directory entry, matching the lookup precedence the +// package-level entrypoint provided before the fold. +func (s codexSourceSet) findSourceFile(sessionsDir, sessionID string) string { + if !IsValidSessionID(sessionID) { + return "" + } + + var archived string + entries, err := os.ReadDir(sessionsDir) + if err == nil { + for _, f := range entries { + if f.IsDir() { + continue + } + name := f.Name() + if !isCodexSessionFilename(name) { + continue + } + if extractUUIDFromRollout(name) == sessionID { + archived = filepath.Join(sessionsDir, name) + break + } + } + } + + var live string + walkCodexDayDirs(sessionsDir, func(dayPath string) bool { + if live != "" { + return false + } + dayEntries, err := os.ReadDir(dayPath) + if err != nil { + return true + } + for _, f := range dayEntries { + if f.IsDir() { + continue + } + name := f.Name() + if !isCodexSessionFilename(name) { + continue + } + if extractUUIDFromRollout(name) == sessionID { + live = filepath.Join(dayPath, name) + return false + } + } + return true + }) + if live != "" { + return live + } + return archived +} + +func (s codexSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)*2) + seenShallow := make(map[string]struct{}) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"*.jsonl"}, + DebounceKey: string(AgentCodex) + ":sessions:" + root, + }) + for _, shallow := range ResolveCodexShallowWatchRoots(root) { + shallow = filepath.Clean(shallow) + if _, ok := seenShallow[shallow]; ok { + continue + } + seenShallow[shallow] = struct{}{} + roots = append(roots, WatchRoot{ + Path: shallow, + Recursive: false, + IncludeGlobs: []string{CodexSessionIndexFilename}, + DebounceKey: string(AgentCodex) + ":index:" + shallow, + }) + } + } + return WatchPlan{Roots: roots}, nil +} + +func (s codexSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if filepath.Base(req.Path) == CodexSessionIndexFilename { + return s.sourcesForIndexPath(ctx, req.Path) + } + for _, root := range s.roots { + source, ok := s.sourceRef(root, req.Path, true) + if !ok { + source, ok = s.directPathSource(root, req.Path, true) + } + if ok { + return []SourceRef{source}, nil + } + if !jsonlMissingPathFallbackAllowed(req) { + continue + } + source, ok = s.sourceRef(root, req.Path, false) + if !ok { + source, ok = s.directPathSource(root, req.Path, false) + } + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s codexSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path, true); ok { + if !req.RequireFreshSource || req.PreferStoredSource { + return source, true, nil + } + return s.canonicalSource(ctx, source) + } + if source, ok := s.directPathSource(root, path, true); ok { + if !req.RequireFreshSource || req.PreferStoredSource { + return source, true, nil + } + return s.canonicalSource(ctx, source) + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path, true); ok { + return s.canonicalSource(ctx, source) + } + } + return SourceRef{}, false, nil +} + +func (s codexSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("codex source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash, err := hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: CodexEffectiveMtime(path, info.ModTime().UnixNano()), + Hash: hash, + }, nil +} + +func (s codexSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case codexSource: + return src.Path, src.Path != "" + case *codexSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate, true); ok { + src := ref.Opaque.(codexSource) + return src.Path, true + } + if ref, ok := s.directPathSource(root, candidate, true); ok { + src := ref.Opaque.(codexSource) + return src.Path, true + } + } + } + return "", false +} + +func (s codexSourceSet) sourcesForIndexPath( + ctx context.Context, + indexPath string, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + indexDir := filepath.Dir(indexPath) + return s.discover(ctx, func(root string) bool { + return filepath.Dir(root) == indexDir + }) +} + +func (s codexSourceSet) sourceRef( + root string, + path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + layout, uuid, ok := CodexSessionPathInfo(root, path) + if !ok || uuid == "" { + return SourceRef{}, false + } + if requireRegular && !IsRegularFile(path) { + return SourceRef{}, false + } + return SourceRef{ + Provider: AgentCodex, + Key: codexSourceKey(uuid), + DisplayPath: path, + FingerprintKey: path, + Opaque: codexSource{ + Root: root, + Path: path, + UUID: uuid, + Layout: layout, + }, + }, true +} + +func (s codexSourceSet) directPathSource( + root string, + path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !strings.HasSuffix(path, ".jsonl") || !pathUnderRoot(root, path) { + return SourceRef{}, false + } + if requireRegular && !IsRegularFile(path) { + return SourceRef{}, false + } + return SourceRef{ + Provider: AgentCodex, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: codexSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s codexSourceSet) canonicalSource( + ctx context.Context, + source SourceRef, +) (SourceRef, bool, error) { + src, ok := source.Opaque.(codexSource) + if !ok || src.UUID == "" { + return source, true, nil + } + best := source + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + path := s.findSourceFile(root, src.UUID) + if path == "" { + continue + } + candidate, ok := s.sourceRef(root, path, true) + if !ok { + continue + } + if preferCodexSource(candidate, best) { + best = candidate + } + } + return best, true, nil +} + +func codexSourceKey(uuid string) string { + return string(AgentCodex) + ":" + uuid +} + +func preferCodexSource(candidate, current SourceRef) bool { + cand := candidate.Opaque.(codexSource) + curr := current.Opaque.(codexSource) + if cand.Layout != curr.Layout { + return cand.Layout == CodexLayoutDated + } + return candidate.DisplayPath < current.DisplayPath +} + +func codexProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Relationships: CapabilitySupported, + Subagents: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + ToolResultEvents: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + TerminationStatus: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/codex_provider_test.go b/internal/parser/codex_provider_test.go new file mode 100644 index 000000000..f86267e43 --- /dev/null +++ b/internal/parser/codex_provider_test.go @@ -0,0 +1,368 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestCodexProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCodex) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCodexProviderSourceMethods(t *testing.T) { + base := t.TempDir() + root := filepath.Join(base, "sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e1" + sourcePath := writeCodexProviderSession(t, root, uuid, "Rename me") + indexPath := filepath.Join(base, CodexSessionIndexFilename) + require.NoError(t, os.WriteFile(indexPath, []byte( + `{"id":"`+uuid+`","thread_name":"Renamed title","updated_at":"2026-06-11T17:34:20Z"}`+"\n", + ), 0o644)) + newer := time.Now().Add(time.Hour) + require.NoError(t, os.Chtimes(indexPath, newer, newer)) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, base, plan.Roots[1].Path) + assert.False(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{CodexSessionIndexFilename}, plan.Roots[1].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + source := discovered[0] + assert.Equal(t, AgentCodex, source.Provider) + assert.Equal(t, sourcePath, source.DisplayPath) + assert.Equal(t, sourcePath, source.FingerprintKey) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~codex:" + uuid, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + for _, path := range []string{sourcePath, indexPath} { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + } + + info, err := os.Stat(sourcePath) + require.NoError(t, err) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Equal(t, info.Size(), fingerprint.Size) + assert.Equal(t, newer.UnixNano(), fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "codex:"+uuid, result.Result.Session.ID) + assert.Equal(t, AgentCodex, result.Result.Session.Agent) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, "api", result.Result.Session.Project) + assert.Equal(t, "Renamed title", result.Result.Session.SessionName) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 1) +} + +func TestCodexProviderDoesNotAdvertiseIncrementalAppend(t *testing.T) { + root := t.TempDir() + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e2" + writeCodexProviderSession(t, root, uuid, "hello") + + provider, ok := NewProvider(AgentCodex, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + assert.Equal(t, + CapabilityNotApplicable, + provider.Capabilities().Source.IncrementalAppend, + ) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "codex:" + uuid, + }) + require.NoError(t, err) + require.True(t, ok) + + outcome, status, err := provider.ParseIncremental( + context.Background(), + IncrementalRequest{ + Source: source, + Fingerprint: SourceFingerprint{}, + SessionID: "codex:" + uuid, + Offset: 0, + StartOrdinal: 1, + }, + ) + require.NoError(t, err) + assert.Equal(t, IncrementalUnsupported, status) + assert.Empty(t, outcome.Messages) +} + +func TestCodexProviderDiscoverDedupesLiveAndArchivedByUUID(t *testing.T) { + base := t.TempDir() + liveRoot := filepath.Join(base, "sessions") + archivedRoot := filepath.Join(base, "archived_sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e5" + livePath := writeCodexProviderSession(t, liveRoot, uuid, "live") + archivedPath := writeCodexProviderArchivedSession( + t, archivedRoot, uuid, "archived", + ) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{archivedRoot, liveRoot}, + }) + require.True(t, ok) + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, livePath, discovered[0].DisplayPath) + assert.NotEqual(t, archivedPath, discovered[0].DisplayPath) +} + +func TestCodexProviderFindSourcePinsExactArchivedDuplicate(t *testing.T) { + base := t.TempDir() + liveRoot := filepath.Join(base, "sessions") + archivedRoot := filepath.Join(base, "archived_sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e6" + livePath := writeCodexProviderSession(t, liveRoot, uuid, "live") + archivedPath := writeCodexProviderArchivedSession( + t, archivedRoot, uuid, "archived", + ) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{archivedRoot, liveRoot}, + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: archivedPath, + FullSessionID: "codex:" + uuid, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, archivedPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "codex:" + uuid, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, livePath, found.DisplayPath) +} + +func TestCodexProviderFindSourceAcceptsLegacyShapedStoredPath(t *testing.T) { + root := t.TempDir() + sessionID := "test-uuid" + sourcePath := filepath.Join( + root, + "2024", + "01", + "15", + "rollout-20240115-"+sessionID+".jsonl", + ) + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + sessionID, + "/home/user/code/api", + "codex_cli_rs", + tsEarly, + ), + testjsonl.CodexMsgJSON("user", "Add tests", tsEarlyS1), + ) + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile(sourcePath, []byte(content), 0o644)) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + source, found, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: sourcePath, + FingerprintKey: sourcePath, + }) + require.NoError(t, err) + require.True(t, found) + assert.Equal(t, AgentCodex, source.Provider) + assert.Equal(t, sourcePath, source.DisplayPath) + assert.Equal(t, sourcePath, source.FingerprintKey) + + fingerprint, err := provider.Fingerprint(context.Background(), source) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Fingerprint: fingerprint, + Machine: "devbox", + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, "codex:"+sessionID, result.Result.Session.ID) + assert.Equal(t, "api", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 1) +} + +func TestCodexProviderChangedPathPinsArchivedDuplicate(t *testing.T) { + base := t.TempDir() + liveRoot := filepath.Join(base, "sessions") + archivedRoot := filepath.Join(base, "archived_sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e7" + _ = writeCodexProviderSession(t, liveRoot, uuid, "live") + archivedPath := writeCodexProviderArchivedSession( + t, archivedRoot, uuid, "archived", + ) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{archivedRoot, liveRoot}, + }) + require.True(t, ok) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: archivedPath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, archivedPath, changed[0].DisplayPath) +} + +func TestCodexProviderChangedPathClassifiesRemovedTranscript(t *testing.T) { + root := t.TempDir() + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e8" + sourcePath := writeCodexProviderSession(t, root, uuid, "remove") + provider, ok := NewProvider(AgentCodex, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + require.NoError(t, os.Remove(sourcePath)) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestCodexProviderIndexPathClassifiesAllSiblingSources(t *testing.T) { + base := t.TempDir() + root := filepath.Join(base, "sessions") + firstUUID := "019eb791-cf7d-75c1-8439-9ed74c1229e9" + secondUUID := "019eb791-cf7d-75c1-8439-9ed74c1229ea" + firstPath := writeCodexProviderSession(t, root, firstUUID, "first") + secondPath := writeCodexProviderSession(t, root, secondUUID, "second") + indexPath := filepath.Join(base, CodexSessionIndexFilename) + require.NoError(t, os.WriteFile(indexPath, []byte( + `{"id":"`+firstUUID+`","thread_name":"Only first remains","updated_at":"2026-06-11T17:34:20Z"}`+"\n", + ), 0o644)) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: indexPath, EventKind: "write"}, + ) + require.NoError(t, err) + assert.Equal(t, []string{firstPath, secondPath}, sourceDisplayPaths(changed)) +} + +func writeCodexProviderSession( + t *testing.T, + root, uuid, prompt string, +) string { + t.Helper() + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON(uuid, "/home/user/code/api", "codex_cli_rs", tsEarly), + testjsonl.CodexMsgJSON("user", prompt, tsEarlyS1), + ) + return writeCodexProviderSessionContent(t, root, uuid, content) +} + +func writeCodexProviderArchivedSession( + t *testing.T, + root, uuid, prompt string, +) string { + t.Helper() + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON(uuid, "/home/user/code/archive", "codex_cli_rs", tsEarly), + testjsonl.CodexMsgJSON("user", prompt, tsEarlyS1), + ) + path := filepath.Join(root, "rollout-2026-06-11T12-44-06-"+uuid+".jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + return path +} + +func writeCodexProviderSessionContent( + t *testing.T, + root, uuid, content string, +) string { + t.Helper() + path := filepath.Join( + root, + "2026", + "06", + "11", + "rollout-2026-06-11T12-44-06-"+uuid+".jsonl", + ) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + return path +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index 09cdc1495..aef500698 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -524,6 +524,10 @@ func ClaudeProjectSessionFiles(projectsDir string) []DiscoveredFile { // DiscoverCodexSessions finds all Codex JSONL session files under // either the standard year/month/day layout or a flat archived dir. +// +// Local Codex discovery is owned by the Codex provider source set; this entry +// is retained as the s3:// discovery path (via discoverCodexS3), which the +// legacy S3 sync path consumes until S3 support folds into the source sets. func DiscoverCodexSessions(sessionsDir string) []DiscoveredFile { if strings.HasPrefix(sessionsDir, "s3://") { return discoverCodexS3(sessionsDir) @@ -645,62 +649,6 @@ func claudeFindSourceFile( return "" } -// FindCodexSourceFile finds a Codex session file by UUID. -// Prefers the standard year/month/day live path when present, -// then falls back to a flat archived dir entry. -func FindCodexSourceFile(sessionsDir, sessionID string) string { - if !IsValidSessionID(sessionID) { - return "" - } - - var archived string - entries, err := os.ReadDir(sessionsDir) - if err == nil { - for _, f := range entries { - if f.IsDir() { - continue - } - name := f.Name() - if !isCodexSessionFilename(name) { - continue - } - if extractUUIDFromRollout(name) == sessionID { - archived = filepath.Join(sessionsDir, name) - break - } - } - } - - var live string - walkCodexDayDirs(sessionsDir, func(dayPath string) bool { - if live != "" { - return false - } - entries, err := os.ReadDir(dayPath) - if err != nil { - return true - } - for _, f := range entries { - if f.IsDir() { - continue - } - name := f.Name() - if !isCodexSessionFilename(name) { - continue - } - if extractUUIDFromRollout(name) == sessionID { - live = filepath.Join(dayPath, name) - return false - } - } - return true - }) - if live != "" { - return live - } - return archived -} - func isCodexSessionFilename(name string) bool { return strings.HasPrefix(name, "rollout-") && strings.HasSuffix(name, ".jsonl") diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index b44bb3c4b..85c782fc6 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -199,7 +199,7 @@ func TestDiscoverCodexSessions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCodexSessions(dir) + files := discoverCodexTestSessions(t, dir) assertDiscoveredFiles(t, files, tt.wantFiles, AgentCodex) }) } @@ -439,7 +439,7 @@ func TestFindCodexSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindCodexSourceFile(dir, tt.targetID) + got := findCodexTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index adff7c24a..046787e28 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -969,7 +969,7 @@ func TestCodexUserMessageCount(t *testing.T) { ) path := createTestFile(t, "codex-umc.jsonl", content) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "ParseCodexSession") require.NotNil(t, sess, "session") require.Len(t, msgs, 4, "messages") @@ -983,9 +983,7 @@ func TestCodexSessionTimestampSemantics(t *testing.T) { path := createTestFile(t, "codex-ts-invalid.jsonl", content) buf := captureLog(t) - sess, msgs, err := ParseCodexSession( - path, "local", false, - ) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "ParseCodexSession") assertZeroTimestamp(t, sess.StartedAt, "StartedAt") @@ -1002,9 +1000,7 @@ func TestCodexSessionTimestampSemantics(t *testing.T) { path := createTestFile(t, "codex-ts-long-invalid.jsonl", content) buf := captureLog(t) - _, _, err := ParseCodexSession( - path, "local", false, - ) + _, _, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "ParseCodexSession") assertLogContains(t, buf, @@ -1038,9 +1034,7 @@ func TestParseCodexSessionOversizedLineSkipped(t *testing.T) { // skipping from the re-emitted-prompt dedup. content := meta + firstLine + oversizedLine + secondLine path := createTestFile(t, "oversized.jsonl", content) - sess, msgs, err := ParseCodexSession( - path, "local", false, - ) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "unexpected error") require.NotNil(t, sess, "session") require.Len(t, msgs, 2, "messages (oversized skipped)") @@ -1083,7 +1077,7 @@ func TestParseCodexSession_WorktreeBranchFallback(t *testing.T) { `{"type":"response_item","timestamp":"2024-01-01T00:00:01Z","payload":{"role":"user","content":[{"type":"input_text","text":"hello"}]}}` + "\n" path := createTestFile(t, "codex-worktree.jsonl", content) - sess, _, err := ParseCodexSession(path, "local", false) + sess, _, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "ParseCodexSession") require.NotNil(t, sess, "session") assert.Equal(t, "agentsview", sess.Project, "project") diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 7a54cf967..9d1911c3b 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -358,6 +358,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newClaudeProviderFactory(def) case AgentCommandCode: return newCommandCodeProviderFactory(def) + case AgentCodex: + return newCodexProviderFactory(def) case AgentCowork: return newCoworkProviderFactory(def) case AgentCortex: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 6a91112ef..6f1c418f1 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -19,7 +19,7 @@ const ( var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentClaude: ProviderMigrationProviderAuthoritative, AgentCowork: ProviderMigrationProviderAuthoritative, - AgentCodex: ProviderMigrationLegacyOnly, + AgentCodex: ProviderMigrationProviderAuthoritative, AgentCopilot: ProviderMigrationLegacyOnly, AgentGemini: ProviderMigrationLegacyOnly, AgentOpenHands: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 8e4aa92c4..302afd894 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -49,7 +49,6 @@ var providerNeutralEntrypoints = map[string]bool{ var pendingShimProviderFiles = map[string]bool{ "antigravity_cli_provider.go": true, "antigravity_provider.go": true, - "codex_provider.go": true, "copilot_provider.go": true, "db_backed_provider.go": true, "gemini_provider.go": true, diff --git a/internal/parser/provider_test.go b/internal/parser/provider_test.go index e1f73995b..d3b701e58 100644 --- a/internal/parser/provider_test.go +++ b/internal/parser/provider_test.go @@ -149,7 +149,7 @@ func TestProviderRegistryMirrorsAgentRegistry(t *testing.T) { } func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { - provider, ok := NewProvider(AgentCodex, ProviderConfig{ + provider, ok := NewProvider(AgentGemini, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -177,7 +177,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { source, found, err := provider.FindSource(ctx, FindSourceRequest{ RawSessionID: "session", - FullSessionID: "codex:session", + FullSessionID: "gemini:session", StoredFilePath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", }) @@ -186,7 +186,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.Empty(t, source) _, err = provider.Fingerprint(ctx, SourceRef{ - Provider: AgentCodex, + Provider: AgentGemini, Key: "session", DisplayPath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", @@ -195,9 +195,9 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) incremental, status, err := provider.ParseIncremental(ctx, IncrementalRequest{ - Source: SourceRef{Provider: AgentCodex, Key: "session"}, + Source: SourceRef{Provider: AgentGemini, Key: "session"}, Fingerprint: SourceFingerprint{Key: "/tmp/session.jsonl"}, - SessionID: "codex:session", + SessionID: "gemini:session", StartOrdinal: 1, Machine: "devbox", }) @@ -212,11 +212,11 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { Machine: "devbox", } - factory, ok := ProviderFactoryByType(AgentCodex) + factory, ok := ProviderFactoryByType(AgentGemini) require.True(t, ok) - assert.Equal(t, AgentCodex, factory.Definition().Type) + assert.Equal(t, AgentGemini, factory.Definition().Type) - provider, ok := NewProvider(AgentCodex, cfg) + provider, ok := NewProvider(AgentGemini, cfg) require.True(t, ok) require.NotNil(t, provider) @@ -233,7 +233,7 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { } func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { - provider, ok := NewProvider(AgentCodex, ProviderConfig{ + provider, ok := NewProvider(AgentGemini, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -241,7 +241,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { outcome, err := provider.Parse(context.Background(), ParseRequest{ Source: SourceRef{ - Provider: AgentCodex, + Provider: AgentGemini, Key: "source", DisplayPath: "/tmp/source.jsonl", FingerprintKey: "/tmp/source.jsonl", @@ -257,7 +257,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) var unsupported UnsupportedProviderFeatureError require.ErrorAs(t, err, &unsupported) - assert.Equal(t, AgentCodex, unsupported.Provider) + assert.Equal(t, AgentGemini, unsupported.Provider) assert.Equal(t, ProviderFeatureParse, unsupported.Feature) } diff --git a/internal/parser/skill_inference_test.go b/internal/parser/skill_inference_test.go index eef4df90e..50595396b 100644 --- a/internal/parser/skill_inference_test.go +++ b/internal/parser/skill_inference_test.go @@ -477,7 +477,7 @@ func TestParseCodexSessionFromInfersSkillNameFromSeededCwd(t *testing.T) { testjsonl.CodexMsgJSON("user", "use the dashboard skill", tsEarlyS1), ) file := createTestFile(t, "incremental-skill.jsonl", initial) - _, msgs, err := ParseCodexSession(file, "local", false) + _, msgs, err := parseCodexTestSession(t, file, "local", false) require.NoError(t, err) info, err := os.Stat(file) @@ -494,7 +494,7 @@ func TestParseCodexSessionFromInfersSkillNameFromSeededCwd(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, _, _, err := ParseCodexSessionFrom(file, offset, len(msgs), false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, file, offset, len(msgs), false) require.NoError(t, err) require.Len(t, newMsgs, 1) require.Len(t, newMsgs[0].ToolCalls, 1) diff --git a/internal/parser/types.go b/internal/parser/types.go index 2821dc0d3..f611a2c4e 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -128,8 +128,6 @@ var Registry = []AgentDef{ }, IDPrefix: "codex:", FileBased: true, - DiscoverFunc: DiscoverCodexSessions, - FindSourceFunc: FindCodexSourceFile, ShallowWatchRootsFunc: ResolveCodexShallowWatchRoots, }, { diff --git a/internal/sync/engine.go b/internal/sync/engine.go index e39a3b86d..7eb75a334 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -596,6 +596,17 @@ func (e *Engine) classifyProviderChangedPath( default: continue } + // Codex index (session_index.jsonl) events are owned by the engine's + // DB-aware classifyCodexIndexPath, which fans out only to sessions whose + // stored title changed and resolves a UUID's live/archived duplicate to + // the path the DB already tracks. The provider's broad index fan-out + // would re-add every sibling and prefer the live-over-archived layout, + // resurrecting a stale duplicate over the stored copy, so suppress it + // here and let the engine method classify the index event. + if agentType == parser.AgentCodex && + filepath.Base(path) == parser.CodexSessionIndexFilename { + continue + } roots := e.agentDirs[agentType] if len(roots) == 0 { continue @@ -777,6 +788,19 @@ func providerDeletedPhysicalSQLiteSource( func dedupeDiscoveredFiles( files []parser.DiscoveredFile, +) []parser.DiscoveredFile { + return dedupeDiscoveredFilesByPreference(files, preferDiscoveredFile) +} + +func dedupeDiscoveredFilesPreferNewestCodex( + files []parser.DiscoveredFile, +) []parser.DiscoveredFile { + return dedupeDiscoveredFilesByPreference(files, preferNewestCodexDiscoveredFile) +} + +func dedupeDiscoveredFilesByPreference( + files []parser.DiscoveredFile, + prefer func(candidate, current parser.DiscoveredFile) bool, ) []parser.DiscoveredFile { if len(files) < 2 { return files @@ -786,7 +810,7 @@ func dedupeDiscoveredFiles( for _, file := range files { key := discoveredFileKey(file) if current, ok := bestByKey[key]; ok { - if preferDiscoveredFile(file, current) { + if prefer(file, current) { bestByKey[key] = file } continue @@ -837,6 +861,27 @@ func preferDiscoveredFile( return false } +func preferNewestCodexDiscoveredFile( + candidate, current parser.DiscoveredFile, +) bool { + if candidate.Agent == parser.AgentCodex && current.Agent == parser.AgentCodex { + candMTime, candOK := discoveredFileMTime(candidate.Path) + currMTime, currOK := discoveredFileMTime(current.Path) + if candOK && currOK && candMTime != currMTime { + return candMTime > currMTime + } + } + return preferDiscoveredFile(candidate, current) +} + +func discoveredFileMTime(path string) (int64, bool) { + info, err := os.Stat(path) + if err != nil { + return 0, false + } + return info.ModTime().UnixNano(), true +} + func (e *Engine) expandClaudeDuplicateCandidates( files []parser.DiscoveredFile, ) []parser.DiscoveredFile { @@ -963,20 +1008,6 @@ func (e *Engine) classifyOnePath( // shapes, so the legacy block was removed when Claude was folded // onto its provider. - // Codex: either ////.jsonl - // or /.jsonl for archived sessions. - for _, codexDir := range e.agentDirs[parser.AgentCodex] { - if codexDir == "" { - continue - } - if _, _, ok := parser.CodexSessionPathInfo(codexDir, path); ok { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCodex, - }, true - } - } - // Copilot: /session-state/.jsonl // or: /session-state//events.jsonl for _, copilotDir := range e.agentDirs[parser.AgentCopilot] { @@ -2347,12 +2378,27 @@ func (e *Engine) syncAllLocked( } all = append(all, providerFound...) - if !since.IsZero() { + quickSyncCutoff := !since.IsZero() + if quickSyncCutoff { all = e.dedupeClaudeDiscoveredFiles(all) + // A Codex UUID can exist as both a live dated transcript and a flat + // archived copy. The provider's discovery deduplicates them to the + // preferred (live) layout, but the mtime cutoff filter runs before the + // engine's own dedup, so a changed archived copy that is newer than the + // cutoff would be lost behind an older live copy that the cutoff drops. + // Re-expand to every on-disk duplicate before filtering so the cutoff + // sees each copy's real mtime; the quick-sync dedupe below then keeps + // the newest surviving duplicate before falling back to normal layout + // preference. + all = e.expandCodexProviderDuplicates(all, scope) all = e.filterFilesByMtime(ctx, all, since) } - all = dedupeDiscoveredFiles(all) + if quickSyncCutoff { + all = dedupeDiscoveredFilesPreferNewestCodex(all) + } else { + all = dedupeDiscoveredFiles(all) + } all = e.dedupeClaudeDiscoveredFiles(all) all = e.filterShadowedLegacyKiroFiles(all) @@ -2830,6 +2876,85 @@ func (e *Engine) discoverProviderSources( return files, failures } +// expandCodexProviderDuplicates re-adds the on-disk duplicate paths of each +// discovered Codex source. The provider deduplicates a UUID's live and archived +// copies to the preferred layout at discovery time; this restores the dropped +// duplicates (scoped to the configured roots) so an mtime cutoff filter can +// judge each copy on its own mtime, matching the legacy discover-then-filter +// order. Non-Codex files and Codex files without a UUID-shaped name pass through +// unchanged. Duplicates are keyed by path so nothing is added twice. +func (e *Engine) expandCodexProviderDuplicates( + files []parser.DiscoveredFile, scope *rootSyncScope, +) []parser.DiscoveredFile { + pather := e.codexUUIDPathLister(scope) + if pather == nil { + return files + } + seen := make(map[string]struct{}, len(files)) + for _, f := range files { + seen[string(f.Agent)+"\x00"+filepath.Clean(f.Path)] = struct{}{} + } + out := files + for _, f := range files { + if f.Agent != parser.AgentCodex { + continue + } + uuid := parser.CodexSessionUUIDFromFilename(filepath.Base(f.Path)) + if uuid == "" { + continue + } + for _, dup := range pather(uuid) { + key := string(parser.AgentCodex) + "\x00" + filepath.Clean(dup) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, parser.DiscoveredFile{ + Path: dup, + Agent: parser.AgentCodex, + ProviderProcess: true, + ProviderSource: e.codexPinnedProviderSource(dup), + }) + } + } + return out +} + +// codexUUIDPathLister returns a function that lists every on-disk Codex +// transcript path for a UUID under the in-scope roots, or nil when the Codex +// provider is unavailable. It scopes a single provider to the in-scope roots so +// the returned paths cover both the live dated and flat archived copies of a +// duplicated UUID, including duplicates that share one root. +func (e *Engine) codexUUIDPathLister( + scope *rootSyncScope, +) func(string) []string { + factory, ok := e.providerFactories[parser.AgentCodex] + if !ok || factory == nil { + return nil + } + roots := make([]string, 0, len(e.agentDirs[parser.AgentCodex])) + for _, root := range e.agentDirs[parser.AgentCodex] { + if root == "" || !scope.includes(root) { + continue + } + roots = append(roots, root) + } + if len(roots) == 0 { + return nil + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: roots, + Machine: e.machine, + }) + lister, ok := provider.(interface { + AllSourcePathsForUUID(string) []string + }) + if !ok { + return nil + } + return lister.AllSourcePathsForUUID +} + // recordSyncStarted persists the start time of a sync run // into pg_sync_state. Callers use this to compute mtime // cutoffs for future quick incremental syncs. @@ -3892,7 +4017,7 @@ func (e *Engine) processFile( // re-validation. if cacheSkip && !e.forceParse && !file.ForceParse { // parse-diff: ignore the skip cache if e.shouldUseCachedSkip(file, mtime, sourceFingerprint) { - if e.pathNeedsProjectReparse(file.Path) { + if e.pathNeedsCachedSkipBypass(file.Path) { e.clearSkip(file.Path) } else { res := processResult{ @@ -3914,11 +4039,10 @@ func (e *Engine) processFile( // legacy dispatch, via the S3 sync path. res = e.processS3Session(ctx, file, info) case parser.AgentCodex: - if strings.HasPrefix(file.Path, "s3://") { - res = e.processS3Session(ctx, file, info) - } else { - res = e.processCodex(file, info) - } + // Non-S3 Codex is provider-authoritative and handled earlier by + // processProviderFile; only s3:// Codex sources fall through to the + // legacy dispatch, via the S3 sync path. + res = e.processS3Session(ctx, file, info) case parser.AgentCopilot: res = e.processCopilot(file, info) case parser.AgentReasonix: @@ -3990,6 +4114,25 @@ func (e *Engine) pathNeedsProjectReparse(path string) bool { return ok && parser.NeedsProjectReparse(project) } +func (e *Engine) pathNeedsCachedSkipBypass(path string) bool { + return e.pathNeedsProjectReparse(path) || + e.pathNeedsDataVersionReparse(path) +} + +func (e *Engine) pathNeedsDataVersionReparse(path string) bool { + if e == nil || e.db == nil { + return false + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + if _, _, ok := e.db.GetFileInfoByPath(lookupPath); !ok { + return false + } + return e.db.GetDataVersionByPath(lookupPath) < db.CurrentDataVersion() +} + func (e *Engine) processProviderFile( ctx context.Context, file parser.DiscoveredFile, @@ -4074,12 +4217,20 @@ func (e *Engine) processProviderFile( cachedMtime, cached := e.skipCache[cacheKey] e.skipMu.RUnlock() if cached && cachedMtime == fingerprint.MTimeNS { - return processResult{ - skip: true, - mtime: fingerprint.MTimeNS, - cacheSkip: true, - cacheKey: cacheKey, - }, true + // A cached skip must not hide a session whose stored row needs + // self-healing (e.g. a parser data-version bump or generated + // roborev CI worktree project): clear the entry and fall through + // to a full reparse, mirroring the legacy process arm. + if e.pathNeedsCachedSkipBypass(file.Path) { + e.clearSkip(cacheKey) + } else { + return processResult{ + skip: true, + mtime: fingerprint.MTimeNS, + cacheSkip: true, + cacheKey: cacheKey, + }, true + } } } @@ -4098,6 +4249,25 @@ func (e *Engine) processProviderFile( } incForceReplace := incRes.forceReplace + // DB-stored fingerprint skip. The provider has no database handle, so the + // engine reproduces the legacy DB-aware skip that single-session JSONL + // providers relied on: an unchanged source whose stored size and effective + // mtime already match is not reparsed, even when the in-memory skip cache + // was cleared (e.g. by SyncSingleSession) or never populated (a fresh + // engine). For Codex this also folds in the session_index.jsonl sidecar: + // a shared index mtime bump that did not change this session's title must + // not trigger a reparse. + if !e.forceParse && !file.ForceParse && + e.shouldSkipProviderSourceByDB(file, fingerprint) { + return processResult{ + skip: true, + mtime: fingerprint.MTimeNS, + cacheSkip: cacheSkip, + cacheKey: cacheKey, + noCacheSkip: true, + }, true + } + // DB-stored-file-info skip: a session whose persisted file_size/file_mtime // already match the source fingerprint (and whose data_version is current) // is unchanged and need not be reparsed. This reproduces the legacy @@ -5258,18 +5428,41 @@ func (e *Engine) tryIncrementalJSONL( }, true } -func (e *Engine) shouldSkipCodex( - path string, info os.FileInfo, +// shouldSkipProviderSourceByDB reports whether a provider-dispatched source is +// already stored at the parsed fingerprint and can be skipped without a reparse. +// It is the engine-side replacement for the DB-aware skip the legacy +// single-session JSONL processors performed, since a provider has no database +// handle. It is scoped to Codex: Codex's effective mtime folds in the shared +// session_index.jsonl sidecar, so a size-and-effective-mtime match plus a +// per-session title check preserves the legacy "skip when only the global index +// advanced but this session's name did not" semantics. Other providers keep +// their existing in-memory skip-cache behavior unchanged. +func (e *Engine) shouldSkipProviderSourceByDB( + file parser.DiscoveredFile, fingerprint parser.SourceFingerprint, ) bool { - if e.forceParse { // parse-diff: always re-parse + if file.Agent != parser.AgentCodex { return false } + return e.shouldSkipCodexFingerprint(file.Path, fingerprint) +} + +// shouldSkipCodexFingerprint reproduces the legacy shouldSkipCodex decision in +// terms of a provider SourceFingerprint. The fingerprint MTimeNS already folds +// in session_index.jsonl via CodexEffectiveMtime, so: +// - a stored size mismatch or stale data version forces a reparse; +// - an exact effective-mtime match skips; +// - an effective mtime ahead of the stored mtime driven only by the index +// (the raw transcript mtime is still at or below the stored mtime) skips +// unless this session's stored title differs from the current index title. +func (e *Engine) shouldSkipCodexFingerprint( + path string, fingerprint parser.SourceFingerprint, +) bool { lookupPath := path if e.pathRewriter != nil { lookupPath = e.pathRewriter(path) } storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) - if !ok || storedSize != info.Size() { + if !ok || storedSize != fingerprint.Size { return false } if project, ok := e.db.GetProjectByPath(lookupPath); ok && @@ -5280,26 +5473,17 @@ func (e *Engine) shouldSkipCodex( db.CurrentDataVersion() { return false } - // A Codex title lives in session_index.jsonl, not the transcript, so a - // title-only rename can change the title with no transcript signal. Detect - // it directly rather than inferring it from an mtime inequality: the index - // mtime is folded into the stored watermark, so a later rename whose index - // mtime lands at or below that watermark is invisible to a mtime compare, - // and the old storedMtime==effectiveMtime fast path skipped without ever - // consulting the title. codexIndexSessionNameChanged reads the live title - // (cached per index file) and the stored name; a cheaper stored-name lookup - // to keep this fully off the hot skip path is a deferred follow-up. - if e.codexIndexSessionNameChanged(path) { - return false // title changed -> re-parse to refresh metadata + effectiveMtime := fingerprint.MTimeNS + if storedMtime == effectiveMtime { + return true } - // Title verified unchanged: skip when the transcript itself is unchanged. - // Compare the bare file mtime, not the index-folded effective mtime -- the - // stored watermark may already include a folded index mtime, and a later - // bump of the shared session_index.jsonl (e.g. another session's rename) - // lifts every session's effective mtime; with the title confirmed - // unchanged, that rise must not force a needless reparse. - fileMtime := info.ModTime().UnixNano() - return fileMtime <= storedMtime + fileMtime := effectiveMtime + if info, err := os.Stat(path); err == nil { + fileMtime = info.ModTime().UnixNano() + } + return effectiveMtime > storedMtime && + fileMtime <= storedMtime && + !e.codexIndexSessionNameChanged(path) } // codexIndexNeedsRefreshSince reports whether a Codex session whose transcript @@ -5377,7 +5561,7 @@ func (e *Engine) classifyCodexIndexPath( } var candidates []parser.DiscoveredFile for _, root := range sessionRoots { - if src := parser.FindCodexSourceFile(root, uuid); src != "" { + if src := e.codexSourceFileForUUID(root, uuid); src != "" { candidates = append(candidates, parser.DiscoveredFile{ Path: src, Agent: parser.AgentCodex, @@ -5390,11 +5574,72 @@ func (e *Engine) classifyCodexIndexPath( // A UUID can exist in both sessions/ and archived_sessions/. // Prefer the path the DB already tracks so a title rename does // not reparse a stale duplicate over the stored copy. - out = append(out, pickPreferredCodexDiscoveredFile(e.db, candidates)) + chosen := pickPreferredCodexDiscoveredFile(e.db, candidates) + // Pin the provider source to the chosen path and route it through the + // provider so processProviderFile parses exactly this copy instead of + // re-canonicalizing the UUID to the preferred dated layout, which would + // undo the DB-aware selection above. + chosen.ProviderProcess = true + chosen.ProviderSource = e.codexPinnedProviderSource(chosen.Path) + out = append(out, chosen) } return out } +// codexSourceFileForUUID resolves a Codex session UUID to its on-disk +// transcript path under a single sessions root, preferring the live dated +// layout over a flat archived entry. It scopes a Codex provider to that one +// root so the provider's cross-root live-over-archived canonicalization does +// not collapse a per-root duplicate; classifyCodexIndexPath then applies its +// own DB-aware preference across the per-root candidates. Returns "" when the +// provider, source lookup, or path resolution fails. +func (e *Engine) codexSourceFileForUUID(root, uuid string) string { + factory, ok := e.providerFactories[parser.AgentCodex] + if !ok || factory == nil { + return "" + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: []string{root}, + Machine: e.machine, + }) + source, found, err := provider.FindSource( + context.Background(), + parser.FindSourceRequest{RawSessionID: uuid}, + ) + if err != nil || !found { + return "" + } + return providerDiscoveredPath(source) +} + +// codexPinnedProviderSource builds a Codex provider SourceRef pinned to the +// exact path, bypassing the provider's live-over-archived canonicalization. It +// is used when the engine's DB-aware or mtime-aware logic has already chosen +// which on-disk copy of a duplicated UUID to parse, so processProviderFile +// parses that copy instead of the provider's preferred dated layout. Returns +// nil when the Codex provider or the path's source shape is unavailable. +func (e *Engine) codexPinnedProviderSource(path string) *parser.SourceRef { + factory, ok := e.providerFactories[parser.AgentCodex] + if !ok || factory == nil { + return nil + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: e.agentDirs[parser.AgentCodex], + Machine: e.machine, + }) + pinner, ok := provider.(interface { + SourceRefForPath(string) (parser.SourceRef, bool) + }) + if !ok { + return nil + } + source, ok := pinner.SourceRefForPath(path) + if !ok { + return nil + } + return &source +} + // codexStoredNameDiffers reports whether the stored session_name for a Codex // session differs from the given index title. Unknown sessions return false: // a brand-new session is synced through its own transcript event, not the @@ -5460,6 +5705,54 @@ func pickPreferredCodexDiscoveredFile( return chosen } +// shouldSkipCodex is the legacy file_path + effective-mtime skip check used by +// the S3 Codex sync path (processCodex). Non-S3 Codex is provider-authoritative +// and uses shouldSkipCodexFingerprint; this remains for s3:// Codex sources, +// which the S3 sync path buffers to a temp file and feeds through processCodex. +func (e *Engine) shouldSkipCodex( + path string, info os.FileInfo, +) bool { + if e.forceParse { // parse-diff: always re-parse + return false + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) + if !ok || storedSize != info.Size() { + return false + } + if project, ok := e.db.GetProjectByPath(lookupPath); ok && + parser.NeedsProjectReparse(project) { + return false + } + if e.db.GetDataVersionByPath(lookupPath) < + db.CurrentDataVersion() { + return false + } + // A Codex title lives in session_index.jsonl, not the transcript, so a + // title-only rename can change the title with no transcript signal. Detect + // it directly rather than inferring it from an mtime inequality: the index + // mtime is folded into the stored watermark, so a later rename whose index + // mtime lands at or below that watermark is invisible to a mtime compare, + // and the old storedMtime==effectiveMtime fast path skipped without ever + // consulting the title. codexIndexSessionNameChanged reads the live title + // (cached per index file) and the stored name; a cheaper stored-name lookup + // to keep this fully off the hot skip path is a deferred follow-up. + if e.codexIndexSessionNameChanged(path) { + return false // title changed -> re-parse to refresh metadata + } + // Title verified unchanged: skip when the transcript itself is unchanged. + // Compare the bare file mtime, not the index-folded effective mtime -- the + // stored watermark may already include a folded index mtime, and a later + // bump of the shared session_index.jsonl (e.g. another session's rename) + // lifts every session's effective mtime; with the title confirmed + // unchanged, that rise must not force a needless reparse. + fileMtime := info.ModTime().UnixNano() + return fileMtime <= storedMtime +} + func (e *Engine) processCodex( file parser.DiscoveredFile, info os.FileInfo, ) processResult { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 3bf7bfb49..9336d386d 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -2663,6 +2663,104 @@ func TestSyncPathsCodexIndexEventRefreshesStoredDuplicate(t *testing.T) { assertSessionMessageCount(t, env.db, "codex:"+uuid, 2) } +func TestSyncPathsCodexArchivedDuplicateEventPinsChangedFile(t *testing.T) { + root := t.TempDir() + codexDir := filepath.Join(root, "sessions") + archivedDir := filepath.Join(root, "archived_sessions") + require.NoError(t, os.MkdirAll(codexDir, 0o755)) + require.NoError(t, os.MkdirAll(archivedDir, 0o755)) + env := setupTestEnv(t, WithCodexDirs([]string{codexDir, archivedDir})) + + uuid := "f7a8b9ca-7890-1234-ef01-456789012346" + staleLiveContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Stale live copy"). + String() + archivedContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Archived copy"). + String() + updatedArchivedContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Archived copy"). + AddCodexMessage(tsEarlyS5, "assistant", "Updated archived reply"). + String() + + livePath := env.writeCodexSession( + t, + filepath.Join("2026", "05", "04"), + "rollout-2026-05-04T02-10-04-"+uuid+".jsonl", + staleLiveContent, + ) + archivedPath := env.writeSession( + t, archivedDir, + "rollout-2026-05-04T14-31-58-"+uuid+".jsonl", + archivedContent, + ) + initialTime := time.Now().Add(-2 * time.Hour) + require.NoError(t, os.Chtimes(livePath, initialTime, initialTime), "chtimes live") + require.NoError(t, os.Chtimes(archivedPath, initialTime, initialTime), "chtimes archived") + + env.engine.SyncAll(context.Background(), nil) + assert.Equal(t, livePath, env.db.GetSessionFilePath("codex:"+uuid)) + + newTime := time.Now().Add(-30 * time.Minute) + require.NoError(t, os.WriteFile(archivedPath, []byte(updatedArchivedContent), 0o644)) + require.NoError(t, os.Chtimes(archivedPath, newTime, newTime), "chtimes archived update") + + env.engine.SyncPaths([]string{archivedPath}) + + assert.Equal(t, archivedPath, env.db.GetSessionFilePath("codex:"+uuid), + "archived transcript event must parse the changed file, not the stale live duplicate") + assertSessionMessageCount(t, env.db, "codex:"+uuid, 2) +} + +func TestSyncSingleSessionCodexPreservesStoredArchivedDuplicate(t *testing.T) { + root := t.TempDir() + codexDir := filepath.Join(root, "sessions") + archivedDir := filepath.Join(root, "archived_sessions") + require.NoError(t, os.MkdirAll(codexDir, 0o755)) + require.NoError(t, os.MkdirAll(archivedDir, 0o755)) + env := setupTestEnv(t, WithCodexDirs([]string{codexDir, archivedDir})) + + uuid := "f7a8b9ca-7890-1234-ef01-456789012347" + archivedContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Archived copy"). + AddCodexMessage(tsEarlyS5, "assistant", "Archived reply"). + String() + staleLiveContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Stale live copy"). + String() + + archivedPath := env.writeSession( + t, archivedDir, + "rollout-2026-05-04T14-31-58-"+uuid+".jsonl", + archivedContent, + ) + initialTime := time.Now().Add(-2 * time.Hour) + require.NoError(t, os.Chtimes(archivedPath, initialTime, initialTime), "chtimes archived") + + env.engine.SyncAll(context.Background(), nil) + require.Equal(t, archivedPath, env.db.GetSessionFilePath("codex:"+uuid), + "DB must track the archived copy before a stale live duplicate appears") + + livePath := env.writeCodexSession( + t, + filepath.Join("2026", "05", "04"), + "rollout-2026-05-04T02-10-04-"+uuid+".jsonl", + staleLiveContent, + ) + require.NoError(t, os.Chtimes(livePath, initialTime, initialTime), "chtimes live") + + require.NoError(t, env.engine.SyncSingleSession("codex:"+uuid)) + + assert.Equal(t, archivedPath, env.db.GetSessionFilePath("codex:"+uuid), + "single-session resync must preserve the stored archived source") + assertSessionMessageCount(t, env.db, "codex:"+uuid, 2) +} + func TestSyncPathsGeminiRejectsWrongStructure(t *testing.T) { env := setupTestEnv(t) @@ -7768,13 +7866,18 @@ func TestIncrementalSync_CodexHashMatchesConsumedPrefix(t *testing.T) { live, err := os.ReadFile(path) require.NoError(t, err, "read live transcript") - require.Less(t, *sess.FileSize, int64(len(live)), - "partial trailing JSON should remain outside the consumed prefix") - prefix := live[:*sess.FileSize] - sum := sha256.Sum256(prefix) + // Codex is provider-authoritative and always full-parses (the provider does + // not advertise incremental append), so the stored file_size is the full + // transcript length and file_hash covers the whole file. The partial + // trailing JSON line is skipped during parsing but still counts toward the + // fingerprinted size/hash; once it is completed the file grows and the next + // sync re-parses it. + require.Equal(t, int64(len(live)), *sess.FileSize, + "full parse stores the full transcript size") + sum := sha256.Sum256(live) wantHash := fmt.Sprintf("%x", sum[:]) assert.Equal(t, wantHash, *sess.FileHash, - "incremental Codex hash must match the consumed file_size prefix") + "Codex full-parse hash must match the full transcript") } func TestIncrementalSync_CodexExecAppendRetainsEvents(t *testing.T) { diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index c50a01440..ef0866317 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -1420,8 +1420,10 @@ func TestShouldSkipCodexReparsesStaleProject(t *testing.T) { }, } - assert.False(t, e.shouldSkipCodex(path, info), - "stale generated roborev CI projects must be reparsed") + assert.False(t, e.shouldSkipCodexFingerprint(path, parser.SourceFingerprint{ + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }), "stale generated roborev CI projects must be reparsed") } func TestProcessFileSkipCacheReparsesStaleCodexProject(t *testing.T) { @@ -1459,6 +1461,13 @@ func TestProcessFileSkipCacheReparsesStaleCodexProject(t *testing.T) { db: database, idPrefix: "host~", skipCache: map[string]int64{path: info.ModTime().UnixNano()}, + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, pathRewriter: func(path string) string { return "host:" + path }, @@ -1475,6 +1484,156 @@ func TestProcessFileSkipCacheReparsesStaleCodexProject(t *testing.T) { assert.Equal(t, "agentsview", res.results[0].Session.Project) } +func TestProcessFileSkipCacheReparsesStaleCodexDataVersion(t *testing.T) { + database := openTestDB(t) + root := t.TempDir() + path := filepath.Join(root, "rollout-2026-06-21T18-59-38-abc.jsonl") + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + "abc", + "/home/user/code/agentsview", + "user", + "2024-01-01T10:00:00Z", + ), + testjsonl.CodexMsgJSON("user", "review this", "2024-01-01T10:00:01Z"), + ) + require.NoError(t, os.WriteFile(path, []byte(content), 0o600)) + info, err := os.Stat(path) + require.NoError(t, err, "stat codex fixture") + + sess := db.Session{ + ID: "host~codex:abc", + Project: "agentsview", + Machine: "host", + Agent: "codex", + FilePath: strPtr("host:" + path), + FileSize: int64Ptr(info.Size()), + FileMtime: int64Ptr(info.ModTime().UnixNano()), + } + require.NoError(t, database.UpsertSession(sess)) + require.NoError(t, database.SetSessionDataVersion( + sess.ID, db.CurrentDataVersion()-1, + )) + + e := &Engine{ + db: database, + idPrefix: "host~", + skipCache: map[string]int64{path: info.ModTime().UnixNano()}, + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, + pathRewriter: func(path string) string { + return "host:" + path + }, + } + + res := e.processFile(context.Background(), parser.DiscoveredFile{ + Agent: parser.AgentCodex, + Path: path, + }) + require.NoError(t, res.err) + require.False(t, res.skip, + "skip cache must not hide stale parser data versions") + require.Len(t, res.results, 1) +} + +func TestProcessFileCodexDBFreshSkipIsNotCached(t *testing.T) { + database := openTestDB(t) + root := t.TempDir() + path := filepath.Join(root, "rollout-2026-06-21T18-59-38-abc.jsonl") + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + "abc", + "/home/user/code/agentsview", + "user", + "2024-01-01T10:00:00Z", + ), + testjsonl.CodexMsgJSON("user", "review this", "2024-01-01T10:00:01Z"), + ) + require.NoError(t, os.WriteFile(path, []byte(content), 0o600)) + info, err := os.Stat(path) + require.NoError(t, err, "stat codex fixture") + + sess := db.Session{ + ID: "host~codex:abc", + Project: "agentsview", + Machine: "host", + Agent: "codex", + FilePath: strPtr("host:" + path), + FileSize: int64Ptr(info.Size()), + FileMtime: int64Ptr(info.ModTime().UnixNano()), + } + require.NoError(t, database.UpsertSession(sess)) + require.NoError(t, database.SetSessionDataVersion( + sess.ID, db.CurrentDataVersion(), + )) + + e := &Engine{ + db: database, + idPrefix: "host~", + skipCache: map[string]int64{}, + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, + pathRewriter: func(path string) string { + return "host:" + path + }, + } + + res := e.processFile(context.Background(), parser.DiscoveredFile{ + Agent: parser.AgentCodex, + Path: path, + }) + require.NoError(t, res.err) + require.True(t, res.skip) + assert.True(t, res.noCacheSkip) + assert.Empty(t, e.SnapshotSkipCache()) +} + +func TestClassifyCodexIndexPathSkipsMissingTranscript(t *testing.T) { + database := openTestDB(t) + root := t.TempDir() + codexDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(codexDir, 0o755)) + indexPath := filepath.Join(root, parser.CodexSessionIndexFilename) + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e7" + missingPath := filepath.Join( + codexDir, + "2026", "06", "11", + "rollout-2026-06-11T12-44-06-"+uuid+".jsonl", + ) + require.NoError(t, database.UpsertSession(db.Session{ + ID: "codex:" + uuid, + Project: "agentsview", + Machine: "local", + Agent: string(parser.AgentCodex), + SessionName: strPtr("Old title"), + FilePath: &missingPath, + })) + require.NoError(t, os.WriteFile(indexPath, []byte( + `{"id":"`+uuid+`","thread_name":"New title",`+ + `"updated_at":"2026-06-11T17:34:20Z"}`+"\n", + ), 0o644)) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {codexDir}, + }, + Machine: "local", + }) + + files := engine.classifyCodexIndexPath(indexPath) + + assert.Empty(t, files) +} + func TestProcessCodexAppendedStaleProjectDoesFullReparse(t *testing.T) { database := openTestDB(t) root := t.TempDir() @@ -1525,21 +1684,25 @@ func TestProcessCodexAppendedStaleProjectDoesFullReparse(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") - info, err = os.Stat(path) - require.NoError(t, err, "stat appended codex fixture") - e := &Engine{ db: database, idPrefix: "host~", + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, pathRewriter: func(path string) string { return "host:" + path }, } - res := e.processCodex(parser.DiscoveredFile{ + res := e.processFile(context.Background(), parser.DiscoveredFile{ Agent: parser.AgentCodex, Path: path, - }, info) + }) require.NoError(t, res.err) require.Nil(t, res.incremental, "stale project metadata must force full parse even when file appended") @@ -1615,21 +1778,25 @@ func TestProcessCodexAppendedStaleProjectCarriesForceReplace(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") - info, err = os.Stat(path) - require.NoError(t, err, "stat appended codex fixture") - e := &Engine{ db: database, idPrefix: "host~", + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, pathRewriter: func(path string) string { return "host:" + path }, } - res := e.processCodex(parser.DiscoveredFile{ + res := e.processFile(context.Background(), parser.DiscoveredFile{ Agent: parser.AgentCodex, Path: path, - }, info) + }) require.NoError(t, res.err) require.Nil(t, res.incremental, "stale project metadata must force full parse even when file appended") diff --git a/internal/sync/parsediff.go b/internal/sync/parsediff.go index 39577484c..ef218aba7 100644 --- a/internal/sync/parsediff.go +++ b/internal/sync/parsediff.go @@ -3,7 +3,6 @@ package sync import ( "context" "fmt" - "log" "os" "path/filepath" "sort" @@ -80,7 +79,11 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi } continue } - files = append(files, e.parseDiffProviderSources(ctx, def.Type)...) + providerFiles, err := e.parseDiffProviderSources(ctx, def.Type) + if err != nil { + return nil, err + } + files = append(files, providerFiles...) } // DiscoverFunc does not emit the shared-SQLite source for Kiro // (data.sqlite3) or db-mode OpenCode (opencode.db) — normal sync @@ -215,14 +218,14 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi func (e *Engine) parseDiffProviderSources( ctx context.Context, agentType parser.AgentType, -) []parser.DiscoveredFile { +) ([]parser.DiscoveredFile, error) { factory, ok := e.providerFactories[agentType] if !ok || factory == nil { - return nil + return nil, nil } roots := e.agentDirs[agentType] if len(roots) == 0 { - return nil + return nil, nil } provider := factory.NewProvider(parser.ProviderConfig{ Roots: roots, @@ -230,8 +233,10 @@ func (e *Engine) parseDiffProviderSources( }) sources, err := provider.Discover(ctx) if err != nil { - log.Printf("parse-diff %s provider discovery: %v", agentType, err) - return nil + return nil, fmt.Errorf( + "parse-diff %s provider discovery: %w", + agentType, err, + ) } def := provider.Definition() var files []parser.DiscoveredFile @@ -253,7 +258,7 @@ func (e *Engine) parseDiffProviderSources( ProviderProcess: true, }) } - return files + return files, nil } func (e *Engine) parseDiffAgentDiscoverable(def parser.AgentDef) bool { diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index a389eac76..024e27f13 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -349,6 +349,42 @@ func TestProviderVirtualSourceBackedByEventPreservesHashInDBPath(t *testing.T) { assert.False(t, providerVirtualSourceBackedByEvent(sourcePath, filepath.Dir(dbPath))) } +func TestParseDiffProviderDiscoveryErrorFails(t *testing.T) { + root := t.TempDir() + discoverErr := errors.New("discover failed") + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + }, + }, + }, + discoverErr: discoverErr, + } + engine := NewDiffEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + report, err := engine.ParseDiff(context.Background(), ParseDiffOptions{ + Agents: []parser.AgentType{parser.AgentCodex}, + }) + + require.Error(t, err) + assert.Nil(t, report) + assert.ErrorContains(t, err, "parse-diff codex provider discovery") + assert.ErrorIs(t, err, discoverErr) +} + func TestProcessFileShadowRecordsCachedSkipAsNotComparable(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-skip.jsonl") diff --git a/internal/sync/provider_shadow_codex_test.go b/internal/sync/provider_shadow_codex_test.go new file mode 100644 index 000000000..0ef157e65 --- /dev/null +++ b/internal/sync/provider_shadow_codex_test.go @@ -0,0 +1,83 @@ +package sync + +import ( + "context" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/testjsonl" +) + +// TestObserveProviderSourceParsesCodexSourceWithIndexTitle exercises the folded +// Codex provider end to end through ObserveProviderSource. The legacy +// ParseCodexSession entrypoint was deleted in the fold, so this replaces the +// shadow-baseline comparison with provider-API coverage that pins the parsed +// session shape: discovery finds the dated transcript, the sibling +// session_index.jsonl supplies the thread title as session_name, and the +// observed parse output and data-version planning match the source. +func TestObserveProviderSourceParsesCodexSourceWithIndexTitle(t *testing.T) { + base := t.TempDir() + root := filepath.Join(base, "sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c12abcd" + sourcePath := filepath.Join( + root, + "2026", + "06", + "11", + "rollout-2026-06-11T12-44-06-"+uuid+".jsonl", + ) + writeProviderShadowSourceFile( + t, + sourcePath, + testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + uuid, + "/home/user/code/api", + "codex_cli_rs", + "2026-06-11T12:44:06Z", + ), + testjsonl.CodexMsgJSON("user", "provider question", "2026-06-11T12:44:07Z"), + ), + ) + writeProviderShadowSourceFile( + t, + filepath.Join(base, parser.CodexSessionIndexFilename), + `{"id":"`+uuid+`","thread_name":"Provider title","updated_at":"2026-06-11T17:34:20Z"}`+"\n", + ) + + provider, ok := parser.NewProvider(parser.AgentCodex, parser.ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: sources[0], + Machine: "devbox", + }) + require.NoError(t, err) + require.Len(t, observation.Results, 1) + + session := observation.Results[0].Session + assert.Equal(t, "codex:"+uuid, session.ID) + assert.Equal(t, parser.AgentCodex, session.Agent) + assert.Equal(t, "devbox", session.Machine) + assert.Equal(t, "/home/user/code/api", session.Cwd) + assert.Equal(t, "Provider title", session.SessionName) + assert.Equal(t, "provider question", session.FirstMessage) + assert.Equal(t, sourcePath, session.File.Path) + assert.Equal(t, observation.Fingerprint.Hash, session.File.Hash) + + require.Len(t, observation.Results[0].Messages, 1) + assert.Equal(t, parser.RoleUser, observation.Results[0].Messages[0].Role) + + assert.Equal(t, []string{session.ID}, observation.Planned.DataVersionSessionIDs()) + assert.Empty(t, observation.Planned.Diagnostics) +} diff --git a/internal/sync/provider_shadow_support_test.go b/internal/sync/provider_shadow_support_test.go new file mode 100644 index 000000000..03714305b --- /dev/null +++ b/internal/sync/provider_shadow_support_test.go @@ -0,0 +1,19 @@ +package sync + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +// writeProviderShadowSourceFile writes a provider source fixture, creating the +// parent directory. It is the shared helper for the per-provider shadow/parse +// tests (the Codex fold is the lowest caller; later provider folds reuse it). +func writeProviderShadowSourceFile(t *testing.T, path, content string) { + t.Helper() + + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) +} From ea5cb69c56791a12864eb33f2667e199d5777cfb Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 01:05:24 -0400 Subject: [PATCH 02/10] feat(parser): migrate gemini copilot providers Gemini and Copilot are direct local file sources, but each has source-shape details that were still coupled to the legacy adapter path. Moving them behind concrete providers keeps Gemini tmp//chats discovery and Copilot bare-vs-directory precedence explicit. The providers preserve raw and full ID lookup, changed-path classification, source hashing, Gemini project hints, Copilot workspace.yaml freshness, aggregate usage events, and parser output normalization. fix(parser): preserve gemini copilot provider freshness Gemini and Copilot now advertise provider-owned watch classification, so remove and rename events need to map back to syntactic source refs even after the filesystem entry has disappeared. Without that fallback, watcher-driven sync can leave stale provider sessions until a wider resync happens.\n\nCopilot also exposes a composite fingerprint that includes workspace.yaml freshness and shutdown aggregate usage. The provider parse result has to carry that same file metadata and usage event slice because sync consumes ParseResult, not only ParsedSession.\n\nValidation: go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(parser): include gemini project metadata freshness Gemini project names can come from projects.json or trustedFolders.json, so treating only the transcript as the provider source leaves metadata-only changes stale. The provider now watches those root-level sidecars, classifies their changes back to discovered sessions, and folds their contents into the source fingerprint.\n\nValidation: go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(parser): hash copilot workspace metadata Copilot workspace.yaml can change the provider-visible title without changing the event stream. Size and mtime are useful freshness guards, but the provider hash should also include the workspace file contents so same-length title edits cannot be skipped.\n\nValidation: go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(sync): bridge provider path classification Concrete providers own source sidecars that legacy path classifiers do not know about. SyncPaths now falls back to provider changed-path classification after the legacy classifiers miss, and provider-classified files force a full parse so metadata-only events can refresh stored session state.\n\nLegacy classification remains authoritative when it recognizes a path, preserving existing project extraction and optimized sidecar filters while still letting migrated providers cover new sidecar surfaces.\n\nValidation: go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check fix(sync): preserve provider sidecar reparses Provider sidecar events can map to the same session file as a legacy path event in one watcher batch. Keeping only the first classified file made the result order-dependent and could drop the force-parse signal that metadata-only changes rely on. Per-file forced parses also need to bypass the generic skip cache, not just the agent-specific mtime checks, because sidecar updates may leave the transcript mtime untouched while still changing parsed session metadata. Validation: go test -tags "fts5" ./internal/sync -run 'TestSyncPathsGeminiProjectMetadataEventRefreshesProject' -count=1; go test -tags "fts5" ./internal/sync -count=1; go test -tags "fts5" ./internal/parser -run 'Test(Gemini|Copilot|ProviderMigration)' -count=1; go vet ./...; git diff --check fix(sync): skip removed provider source events Provider changed-path classification can return syntactic source refs for deleted files so providers can model remove events. While legacy file processing is still authoritative, enqueueing an exact missing source path makes SyncPaths fail at the initial stat instead of treating the watcher remove as a no-op. Keep sidecar fanout intact for existing sources, because metadata changes such as Gemini projects.json still need to force a reparse even when the transcript mtime is unchanged. Validation: go test -tags "fts5" ./internal/sync -run 'TestEngine_ClassifyPathsProvider(RemoveSkipsMissingGeminiSource|SidecarKeepsExistingGeminiSources)|TestSyncPathsGeminiProjectMetadataEventRefreshesProject' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check test(sync): compare gemini copilot shadow parity Gemini and Copilot are migrated through concrete providers on this branch, so reviewers need a sync-level parity check that exercises the provider observation contract rather than only parser-local behavior. The fixtures cover their sidecar-sensitive source shapes: Gemini project metadata feeds the resolved project hint, and Copilot workspace.yaml participates in both title selection and the composite fingerprint. Validation: go test -tags "fts5" ./internal/sync -run 'TestObserveProviderSourceMatches(Gemini|Copilot)LegacyParser' -count=1; go test -tags "fts5" ./internal/parser -run 'Test(Gemini|Copilot)Provider' -count=1; go vet ./...; git diff --check. Full go test -tags "fts5" ./internal/parser ./internal/sync -count=1 currently fails in existing TestSyncPathsCodexIndexEventRefreshesStoredDuplicate. refactor(parser): fold gemini and copilot into providers Move Gemini and Copilot source discovery, lookup, and parse ownership onto the concrete geminiProvider and copilotProvider and delete the six package-level legacy entrypoints: DiscoverGeminiSessions, FindGeminiSourceFile, ParseGeminiSession, DiscoverCopilotSessions, FindCopilotSourceFile, and ParseCopilotSession. Discovery and find-source bodies now live as provider-owned source-set helpers (discoverSessionPaths and findSourceFile on each source set), the gemini confirmGeminiSessionID guard moves to the provider file, and the parsers become the providers' parseSession methods. The copilot source set's bare/dir precedence and dedup, and the gemini session-filename matching, are reproduced on the provider exactly as before. Gemini project resolution is preserved on the provider: sourceRef already resolves the project via BuildGeminiProjectMap/ResolveGeminiProject for both discovery and changed-path classification, so removing the engine's gemini project-map plumbing loses no project names. BuildGeminiProjectMap and ResolveGeminiProject stay exported package helpers used by the provider. Make both Gemini and Copilot provider-authoritative and drop their legacy sync dispatch: the classifyOnePath copilot and gemini blocks (and the now unused geminiProjectsByDir parameter threaded through classifyOnePath and classifyPaths), the processFile case arms, and the processGemini, processCopilot, and shouldSkipCopilot methods. copilotEffectiveMtime stays as a shared composite-mtime helper used by discoveredFileMtime. Wire the provider facade into parse-diff: agents that dropped their DiscoverFunc are now discovered through discoverProviderSources (filtered to the resolved, provider-discoverable agents), and resolveParseDiffAgents accepts file-based agents backed by a shadow-compare or provider-authoritative provider. Without this, a provider-authoritative agent would silently fall out of parse-diff once its DiscoverFunc was removed. Drop the Gemini and Copilot AgentDef DiscoverFunc/FindSourceFunc hooks, remove both files from the pending shim scan list, delete the shared shadow-baseline test file, and replace it with provider-API coverage plus guards asserting the legacy entrypoints stay gone. Package and engine tests route through the provider methods via new test helpers. test(sync): drop duplicate shadow source helper def The canonical writeProviderShadowSourceFile now lives at the Codex fold, so this redeclaration in provider_shadow_test.go conflicts with it. Drop the local copy and its now-unused os/path filepath imports; callers use the inherited shared helper. test(sync): restore provider-aware classify tests at gemini fold The original restack mis-merged engine_test.go on this branch, reverting the OpenCode SQLite, OpenCode removed-file, Claude stat-error, and Vibe meta-only classification tests to their stale pre-fold shapes (fake opencode.db bytes instead of a seeded session, dropped seedOpenCodeSQLiteSession helper) and re-adding a classify_vibe_test.go that exists on no lower branch. Those stale tests asserted the legacy direct-classification behavior and failed against the provider-routed path. Restore the correct versions inherited from the codex branch, keep this branch's two new Gemini provider classify tests, and drop the spurious classify_vibe_test.go. test(sync): restore gemini provider classify tests at gemini fold Re-add the two Gemini changed-path classify tests (TestEngine_ClassifyPathsProviderRemoveSkipsMissingGeminiSource and TestEngine_ClassifyPathsProviderSidecarKeepsExistingGeminiSources) that were dropped while restoring this branch's mis-merged engine_test.go to its provider-aware shape. fix(sync): skip fresh gemini copilot before hashing Gemini and Copilot lost their legacy DB freshness gates when the provider-authoritative path took over. That made unchanged sessions reach provider fingerprinting and parsing during normal full syncs, which is unnecessary work and no longer matches the old processGemini/processCopilot behavior.\n\nRestore the cheap pre-fingerprint checks for those two agents: Gemini compares the stored file path size and mtime, while Copilot compares transcript size plus the workspace.yaml effective mtime. Force-parse paths still flow through the provider so sidecar-driven reparses and parse-diff are not suppressed.\n\nValidation: go test -tags "fts5" ./internal/sync -run 'TestProcessFileProviderAuthoritativeSkipsFresh(Gemini|Copilot)BeforeFingerprint|TestProcessCodexAppendedStaleProject(DoesFullReparse|CarriesForceReplace)' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check fix(sync): restore discover fields on shadowCallerProvider The rebase onto origin/main dropped the discoverSources and discoverErr fields from the shadowCallerProvider test struct while keeping the Discover method that reads them, leaving this branch and every branch stacked above it uncompilable. Restore the two fields so the Discover stub resolves. --- internal/parser/copilot.go | 10 +- internal/parser/copilot_provider.go | 489 +++++++++++++++++ internal/parser/copilot_test.go | 63 ++- internal/parser/discovery.go | 202 +------ internal/parser/discovery_test.go | 31 +- internal/parser/gemini.go | 10 +- .../parser/gemini_copilot_provider_test.go | 321 +++++++++++ internal/parser/gemini_parser_test.go | 72 ++- internal/parser/gemini_provider.go | 509 ++++++++++++++++++ internal/parser/parser_test.go | 6 +- internal/parser/provider.go | 4 + internal/parser/provider_migration.go | 4 +- internal/parser/provider_shim_scan_test.go | 2 - internal/parser/provider_test.go | 39 +- internal/parser/types.go | 36 +- .../sync/classify_antigravity_cli_test.go | 5 +- internal/sync/engine.go | 235 +------- internal/sync/engine_integration_test.go | 102 +++- internal/sync/engine_test.go | 60 ++- internal/sync/provider_shadow_caller_test.go | 148 +++++ 20 files changed, 1855 insertions(+), 493 deletions(-) create mode 100644 internal/parser/copilot_provider.go create mode 100644 internal/parser/gemini_copilot_provider_test.go create mode 100644 internal/parser/gemini_provider.go diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index 57ee77d36..0c4c35be2 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -333,10 +333,12 @@ func readCopilotWorkspaceName(eventsPath string) string { return "" } -// ParseCopilotSession parses a Copilot JSONL session file. -// Returns (nil, nil, nil, nil) if the file doesn't exist or -// contains no user/assistant messages. -func ParseCopilotSession( +// parseSession parses a Copilot JSONL session file into the session, messages, +// and usage events the provider consumes. Returns (nil, nil, nil, nil) if the +// file doesn't exist or contains no user/assistant messages. This is the +// provider-owned parse entrypoint; the package-level free function was folded +// onto the provider. +func (p *copilotProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) diff --git a/internal/parser/copilot_provider.go b/internal/parser/copilot_provider.go new file mode 100644 index 000000000..2d7d4b5ca --- /dev/null +++ b/internal/parser/copilot_provider.go @@ -0,0 +1,489 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*copilotProvider)(nil) + +type copilotProviderFactory struct { + def AgentDef +} + +func newCopilotProviderFactory(def AgentDef) ProviderFactory { + return copilotProviderFactory{def: cloneAgentDef(def)} +} + +func (f copilotProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f copilotProviderFactory) Capabilities() Capabilities { + return copilotProviderCapabilities() +} + +func (f copilotProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &copilotProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: copilotProviderCapabilities(), + Config: cfg, + }, + sources: newCopilotSourceSet(cfg.Roots), + } +} + +type copilotProvider struct { + ProviderBase + sources copilotSourceSet +} + +func (p *copilotProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *copilotProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *copilotProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *copilotProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *copilotProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *copilotProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("copilot source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, usage, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + sess.UsageEvents = usage + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + UsageEvents: usage, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type copilotSource struct { + Root string + Path string +} + +type copilotSourceSet struct { + roots []string +} + +func newCopilotSourceSet(roots []string) copilotSourceSet { + return copilotSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s copilotSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Copilot session file paths under +// /session-state/. It supports both the bare layout (.jsonl) and +// the directory layout (/events.jsonl); when both exist for the same +// session, the directory layout wins and the bare file is dropped so a session +// is not discovered twice. +func (s copilotSourceSet) discoverSessionPaths(root string) []string { + if root == "" { + return nil + } + + stateDir := filepath.Join(root, copilotStateDir) + entries, err := os.ReadDir(stateDir) + if err != nil { + return nil + } + + dirs := make(map[string]struct{}) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + eventsPath := filepath.Join(stateDir, entry.Name(), "events.jsonl") + if _, err := os.Stat(eventsPath); err == nil { + dirs[entry.Name()] = struct{}{} + } + } + + var paths []string + for _, entry := range entries { + name := entry.Name() + if entry.IsDir() { + candidate := filepath.Join(stateDir, name, "events.jsonl") + if _, err := os.Stat(candidate); err == nil { + paths = append(paths, candidate) + } + continue + } + if stem, ok := strings.CutSuffix(name, ".jsonl"); ok { + if _, dup := dirs[stem]; dup { + continue + } + paths = append(paths, filepath.Join(stateDir, name)) + } + } + return paths +} + +func (s copilotSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + stateDir := filepath.Join(root, copilotStateDir) + roots = append(roots, WatchRoot{ + Path: stateDir, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "workspace.yaml"}, + DebounceKey: string(AgentCopilot) + ":state:" + stateDir, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s copilotSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + source, ok := s.sourceForChangedPath(root, req) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s copilotSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a Copilot session file by UUID under root. It checks +// the directory layout (/events.jsonl) first, then the bare layout +// (.jsonl), so the richer directory form takes precedence. Returns "" for +// invalid IDs or when no file resolves. +func (s copilotSourceSet) findSourceFile(root, rawID string) string { + if root == "" || !IsValidSessionID(rawID) { + return "" + } + + stateDir := filepath.Join(root, copilotStateDir) + + dirFmt := filepath.Join(stateDir, rawID, "events.jsonl") + if _, err := os.Stat(dirFmt); err == nil { + return dirFmt + } + + bare := filepath.Join(stateDir, rawID+".jsonl") + if _, err := os.Stat(bare); err == nil { + return bare + } + + return "" +} + +func (s copilotSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("copilot source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + size := info.Size() + mtime := info.ModTime().UnixNano() + if workspace := copilotWorkspacePath(path); workspace != "" { + if wsInfo, err := os.Stat(workspace); err == nil { + size += wsInfo.Size() + if wsMtime := wsInfo.ModTime().UnixNano(); wsMtime > mtime { + mtime = wsMtime + } + } + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: size, + MTimeNS: mtime, + } + h := sha256.New() + if err := addCopilotFingerprintPart(h, "events", path, info); err != nil { + return SourceFingerprint{}, err + } + if workspace := copilotWorkspacePath(path); workspace != "" { + if wsInfo, err := os.Stat(workspace); err == nil && !wsInfo.IsDir() { + if err := addCopilotFingerprintPart(h, "workspace", workspace, wsInfo); err != nil { + return SourceFingerprint{}, err + } + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s copilotSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case copilotSource: + return src.Path, src.Path != "" + case *copilotSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(copilotSource) + return src.Path, true + } + } + } + return "", false +} + +func (s copilotSourceSet) sourceForChangedPath( + root string, + req ChangedPathRequest, +) (SourceRef, bool) { + path := req.Path + if filepath.Base(path) == "workspace.yaml" { + return s.sourceRef(root, filepath.Join(filepath.Dir(path), "events.jsonl")) + } + if source, ok := s.sourceRef(root, path); ok { + return source, true + } + if !jsonlMissingPathFallbackAllowed(req) { + return SourceRef{}, false + } + if filepath.Base(path) == "events.jsonl" { + barePath := filepath.Join( + root, + copilotStateDir, + filepath.Base(filepath.Dir(path))+".jsonl", + ) + if source, ok := s.sourceRef(root, barePath); ok { + return source, true + } + } + return s.sourceRefForPath(root, path, false) +} + +func (s copilotSourceSet) sourceRef(root, path string) (SourceRef, bool) { + return s.sourceRefForPath(root, path, true) +} + +func (s copilotSourceSet) sourceRefForPath( + root, path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || (requireRegular && !IsRegularFile(path)) { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 3 && + parts[0] == copilotStateDir && + parts[2] == "events.jsonl" { + return s.newSourceRef(root, path), true + } + if len(parts) == 2 && + parts[0] == copilotStateDir && + strings.HasSuffix(parts[1], ".jsonl") { + stem := strings.TrimSuffix(parts[1], ".jsonl") + if dirPath := s.findSourceFile(root, stem); dirPath != "" && + dirPath != path { + return s.sourceRef(root, dirPath) + } + return s.newSourceRef(root, path), true + } + return SourceRef{}, false +} + +func (s copilotSourceSet) newSourceRef(root, path string) SourceRef { + return SourceRef{ + Provider: AgentCopilot, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: copilotSource{ + Root: root, + Path: path, + }, + } +} + +func copilotWorkspacePath(eventsPath string) string { + if filepath.Base(eventsPath) != "events.jsonl" { + return "" + } + return filepath.Join(filepath.Dir(eventsPath), "workspace.yaml") +} + +func addCopilotFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func copilotProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index 26a6d2f82..c5dc9cc72 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "strings" @@ -10,6 +11,60 @@ import ( "github.com/stretchr/testify/require" ) +// newCopilotTestProvider builds a concrete copilotProvider for the given roots +// so package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods, replacing the removed +// package-level entrypoints. +func newCopilotTestProvider(t *testing.T, roots ...string) *copilotProvider { + t.Helper() + provider, ok := NewProvider(AgentCopilot, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + cp, ok := provider.(*copilotProvider) + require.True(t, ok) + return cp +} + +// parseCopilotTestSession parses a Copilot JSONL session file at path through +// the provider-owned parse method, replacing the removed package-level +// ParseCopilotSession entrypoint. +func parseCopilotTestSession( + t *testing.T, path, machine string, +) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { + t.Helper() + return newCopilotTestProvider(t).parseSession(path, machine) +} + +// discoverCopilotTestSessions discovers Copilot sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path) the tests assert +// against. +func discoverCopilotTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newCopilotTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Agent: AgentCopilot, + }) + } + return files +} + +// findCopilotTestSourceFile resolves a Copilot session ID to a session file +// path through the provider, replacing the removed FindCopilotSourceFile. +func findCopilotTestSourceFile(t *testing.T, root, rawID string) string { + t.Helper() + return newCopilotTestProvider(t, root).sources.findSourceFile(root, rawID) +} + // writeCopilotJSONL writes JSONL lines to a temp file and // returns the file path. func writeCopilotJSONL( @@ -28,7 +83,7 @@ func writeCopilotJSONL( // parseAndValidateHelper parses the session and fails the test on basic errors. func parseAndValidateHelper(t *testing.T, path string, machine string, wantMsgs int) (*ParsedSession, []ParsedMessage) { t.Helper() - sess, msgs, _, err := ParseCopilotSession(path, machine) + sess, msgs, _, err := parseCopilotTestSession(t, path, machine) require.NoError(t, err) require.NotNil(t, sess, "expected non-nil session") require.Len(t, msgs, wantMsgs) @@ -349,7 +404,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { `{"type":"session.start","data":{"sessionId":"empty"},"timestamp":"2025-01-15T10:00:00Z"}`, ) - sess, msgs, _, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := parseCopilotTestSession(t, path, "m") require.NoError(t, err) assert.Nil(t, sess, "expected nil session for empty") assert.Nil(t, msgs, "expected nil messages for empty") @@ -358,7 +413,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { func TestParseCopilotSession_NonexistentFile(t *testing.T) { path := filepath.Join(t.TempDir(), "nonexistent.jsonl") - sess, msgs, _, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := parseCopilotTestSession(t, path, "m") require.NoError(t, err, "expected nil error") assert.Nil(t, sess, "expected nil session for nonexistent file") assert.Nil(t, msgs, "expected nil messages for nonexistent file") @@ -588,7 +643,7 @@ func parseCopilotFull( t *testing.T, path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent) { t.Helper() - sess, msgs, usage, err := ParseCopilotSession(path, machine) + sess, msgs, usage, err := parseCopilotTestSession(t, path, machine) require.NoError(t, err) return sess, msgs, usage } diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index aef500698..e6bdd1adf 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -23,6 +23,11 @@ var uuidRe = regexp.MustCompile( `[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})$`, ) +const ( + copilotStateDir = "session-state" + geminiChatsDir = "chats" +) + // isDirOrSymlink reports whether the entry is a directory or a // symlink that resolves to a directory. parentDir is needed to // build the full path for symlink resolution. @@ -831,117 +836,6 @@ func isGeminiSessionFilename(name string) bool { strings.HasSuffix(name, ".jsonl")) } -// DiscoverGeminiSessions finds all Gemini session files under -// the Gemini directory (~/.gemini/tmp/*/chats/session-*). -func DiscoverGeminiSessions( - geminiDir string, -) []DiscoveredFile { - if geminiDir == "" { - return nil - } - - tmpDir := filepath.Join(geminiDir, "tmp") - hashDirs, err := os.ReadDir(tmpDir) - if err != nil { - return nil - } - - projectMap := BuildGeminiProjectMap(geminiDir) - - var files []DiscoveredFile - for _, hd := range hashDirs { - if !isDirOrSymlink(hd, tmpDir) { - continue - } - hash := hd.Name() - chatsDir := filepath.Join(tmpDir, hash, "chats") - entries, err := os.ReadDir(chatsDir) - if err != nil { - continue - } - - project := ResolveGeminiProject(hash, projectMap) - - for _, sf := range entries { - if sf.IsDir() { - continue - } - name := sf.Name() - if !isGeminiSessionFilename(name) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(chatsDir, name), - Project: project, - Agent: AgentGemini, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindGeminiSourceFile locates a Gemini session file by its -// session UUID. Searches all project hash directories. -func FindGeminiSourceFile( - geminiDir, sessionID string, -) string { - if geminiDir == "" || !IsValidSessionID(sessionID) || - len(sessionID) < 8 { - return "" - } - - tmpDir := filepath.Join(geminiDir, "tmp") - hashDirs, err := os.ReadDir(tmpDir) - if err != nil { - return "" - } - - for _, hd := range hashDirs { - if !isDirOrSymlink(hd, tmpDir) { - continue - } - chatsDir := filepath.Join(tmpDir, hd.Name(), "chats") - entries, err := os.ReadDir(chatsDir) - if err != nil { - continue - } - for _, sf := range entries { - if sf.IsDir() { - continue - } - name := sf.Name() - if !isGeminiSessionFilename(name) { - continue - } - if strings.Contains(name, sessionID[:8]) { - path := filepath.Join(chatsDir, name) - if confirmGeminiSessionID( - path, sessionID, - ) { - return path - } - } - } - } - return "" -} - -// confirmGeminiSessionID reads the sessionId field from a -// Gemini file to confirm it matches the expected ID. -func confirmGeminiSessionID( - path, sessionID string, -) bool { - data, err := os.ReadFile(path) - if err != nil { - return false - } - return GeminiSessionID(data) == sessionID -} - // geminiProjectsFile holds the structure of // ~/.gemini/projects.json. type geminiProjectsFile struct { @@ -1057,92 +951,6 @@ func ResolveGeminiProject( return NormalizeName(dirName) } -// DiscoverCopilotSessions finds all JSONL files under -// /session-state/. Supports both bare format -// (.jsonl) and directory format (/events.jsonl). -func DiscoverCopilotSessions( - copilotDir string, -) []DiscoveredFile { - if copilotDir == "" { - return nil - } - - stateDir := filepath.Join(copilotDir, "session-state") - entries, err := os.ReadDir(stateDir) - if err != nil { - return nil - } - - dirs := make(map[string]struct{}) - for _, entry := range entries { - if !entry.IsDir() { - continue - } - eventsPath := filepath.Join( - stateDir, entry.Name(), "events.jsonl", - ) - if _, err := os.Stat(eventsPath); err == nil { - dirs[entry.Name()] = struct{}{} - } - } - - var files []DiscoveredFile - for _, entry := range entries { - name := entry.Name() - if entry.IsDir() { - candidate := filepath.Join( - stateDir, name, "events.jsonl", - ) - if _, err := os.Stat(candidate); err == nil { - files = append(files, DiscoveredFile{ - Path: candidate, - Agent: AgentCopilot, - }) - } - continue - } - if stem, ok := strings.CutSuffix(name, ".jsonl"); ok { - if _, dup := dirs[stem]; dup { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(stateDir, name), - Agent: AgentCopilot, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindCopilotSourceFile locates a Copilot session file by -// UUID. Checks both bare (.jsonl) and directory -// (/events.jsonl) layouts. -func FindCopilotSourceFile( - copilotDir, rawID string, -) string { - if copilotDir == "" || !IsValidSessionID(rawID) { - return "" - } - - stateDir := filepath.Join(copilotDir, "session-state") - - dirFmt := filepath.Join(stateDir, rawID, "events.jsonl") - if _, err := os.Stat(dirFmt); err == nil { - return dirFmt - } - - bare := filepath.Join(stateDir, rawID+".jsonl") - if _, err := os.Stat(bare); err == nil { - return bare - } - - return "" -} - // IsPiSessionFile reads the first non-blank line of path and returns true // when the JSON type field equals "session". The scanner buffer grows up to // 64 MiB to match parser.maxLineSize. Leading blank lines are skipped to diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 85c782fc6..3170a9b20 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -11,11 +11,6 @@ import ( "github.com/stretchr/testify/require" ) -const ( - copilotStateDir = "session-state" - geminiChatsDir = "chats" -) - // setupFileSystem creates a temporary directory and populates // it with the given relative file paths and contents. func setupFileSystem(t *testing.T, dir string, files map[string]string) { @@ -583,7 +578,7 @@ func TestDiscoverGeminiSessions(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverGeminiSessions(dir) + files := discoverGeminiTestSessions(t, dir) require.Len(t, files, len(tt.wantFiles), "files count") @@ -603,17 +598,17 @@ func TestDiscoverGeminiSessions(t *testing.T) { t.Run("EmptyChatDir", func(t *testing.T) { dir := t.TempDir() require.NoError(t, os.MkdirAll(filepath.Join(dir, "tmp", "hash1", geminiChatsDir), 0o755), "mkdir") - files := DiscoverGeminiSessions(dir) + files := discoverGeminiTestSessions(t, dir) assert.Nil(t, files, "expected nil") }) t.Run("Nonexistent", func(t *testing.T) { - files := DiscoverGeminiSessions(filepath.Join(t.TempDir(), "does-not-exist")) + files := discoverGeminiTestSessions(t, filepath.Join(t.TempDir(), "does-not-exist")) assert.Nil(t, files, "expected nil") }) t.Run("EmptyDir", func(t *testing.T) { - files := DiscoverGeminiSessions("") + files := discoverGeminiTestSessions(t, "") assert.Nil(t, files, "expected nil") }) } @@ -656,7 +651,7 @@ func TestFindGeminiSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindGeminiSourceFile(dir, tt.targetID) + got := findGeminiTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -669,13 +664,13 @@ func TestFindGeminiSourceFile(t *testing.T) { t.Run("ShortID", func(t *testing.T) { dir := t.TempDir() for _, id := range []string{"", "a", "abc", "1234567"} { - got := FindGeminiSourceFile(dir, id) + got := findGeminiTestSourceFile(t, dir, id) assert.Emptyf(t, got, "FindGeminiSourceFile(%q)", id) } }) t.Run("EmptyDir", func(t *testing.T) { - got := FindGeminiSourceFile("", "b0a4eadd-cb99-4165-94d9-64cad5a66d24") + got := findGeminiTestSourceFile(t, "", "b0a4eadd-cb99-4165-94d9-64cad5a66d24") assert.Empty(t, got, "expected empty") }) } @@ -893,7 +888,7 @@ func TestDiscoverCopilotSessions(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCopilotSessions(dir) + files := discoverCopilotTestSessions(t, dir) require.Len(t, files, len(tt.wantFiles), "files count") @@ -911,12 +906,12 @@ func TestDiscoverCopilotSessions(t *testing.T) { } t.Run("EmptyDir", func(t *testing.T) { - files := DiscoverCopilotSessions("") + files := discoverCopilotTestSessions(t, "") assert.Nil(t, files, "expected nil") }) t.Run("Nonexistent", func(t *testing.T) { - files := DiscoverCopilotSessions(filepath.Join(t.TempDir(), "does-not-exist")) + files := discoverCopilotTestSessions(t, filepath.Join(t.TempDir(), "does-not-exist")) assert.Nil(t, files, "expected nil") }) } @@ -962,7 +957,7 @@ func TestFindCopilotSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindCopilotSourceFile(dir, tt.targetID) + got := findCopilotTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -975,13 +970,13 @@ func TestFindCopilotSourceFile(t *testing.T) { t.Run("InvalidID", func(t *testing.T) { dir := t.TempDir() for _, id := range []string{"", "../etc/passwd", "a/b", "a b"} { - got := FindCopilotSourceFile(dir, id) + got := findCopilotTestSourceFile(t, dir, id) assert.Emptyf(t, got, "FindCopilotSourceFile(%q)", id) } }) t.Run("EmptyDir", func(t *testing.T) { - got := FindCopilotSourceFile("", "abc-123") + got := findCopilotTestSourceFile(t, "", "abc-123") assert.Empty(t, got, "expected empty") }) } diff --git a/internal/parser/gemini.go b/internal/parser/gemini.go index 432fad42b..60bb98519 100644 --- a/internal/parser/gemini.go +++ b/internal/parser/gemini.go @@ -53,10 +53,12 @@ func normalizedGeminiTokenUsage(tok geminiTokens) json.RawMessage { return raw } -// ParseGeminiSession parses a Gemini CLI session JSON file. -// Unlike Claude/Codex JSONL, each Gemini file is a single JSON -// document containing all messages. -func ParseGeminiSession( +// parseSession parses a Gemini CLI session JSON file into the session and +// messages the provider consumes. Unlike Claude/Codex JSONL, each Gemini file +// is a single JSON document containing all messages. This is the provider-owned +// parse entrypoint; the package-level free function was folded onto the +// provider. +func (p *geminiProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/gemini_copilot_provider_test.go b/internal/parser/gemini_copilot_provider_test.go new file mode 100644 index 000000000..c2d9d1344 --- /dev/null +++ b/internal/parser/gemini_copilot_provider_test.go @@ -0,0 +1,321 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestGeminiCopilotProviderFactoriesReplaceLegacyAdapter(t *testing.T) { + for _, agent := range []AgentType{AgentGemini, AgentCopilot} { + t.Run(string(agent), func(t *testing.T) { + factory, ok := ProviderFactoryByType(agent) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(agent, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) + }) + } +} + +func TestGeminiProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-provider" + sourcePath := filepath.Join( + root, + "tmp", + "my-project", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-provider.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "my-project", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, filepath.Join(root, "tmp"), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"session-*.json", "session-*.jsonl"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, root, plan.Roots[1].Path) + assert.False(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"projects.json", "trustedFolders.json"}, plan.Roots[1].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Equal(t, "my_project", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write", WatchRoot: filepath.Join(root, "tmp")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: filepath.Join(root, "tmp")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "my_project", changed[0].ProjectHint) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.Error(t, err) + require.Empty(t, fingerprint) +} + +func TestGeminiProviderProjectMetadataChangesClassifyAndFingerprint(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-project-metadata" + projectsPath := filepath.Join(root, "projects.json") + writeSourceFile(t, projectsPath, `{"projects":{"/Users/alice/code/one":"alias"}}`) + sourcePath := filepath.Join( + root, + "tmp", + "alias", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-project-metadata.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "alias", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, "one", found.ProjectHint) + + fingerprintOne, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + + writeSourceFile(t, projectsPath, `{"projects":{"/Users/alice/code/two":"alias"}}`) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: projectsPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "two", changed[0].ProjectHint) + + fingerprintTwo, err := provider.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + assert.NotEqual(t, fingerprintOne.Hash, fingerprintTwo.Hash) +} + +func TestGeminiProviderParse(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-provider" + sourcePath := filepath.Join( + root, + "tmp", + "my-project", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-provider.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "my-project", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "gemini:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentGemini, result.Result.Session.Agent) + assert.Equal(t, "my_project", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCopilotProviderSourceMethods(t *testing.T) { + root := t.TempDir() + barePath := filepath.Join(root, copilotStateDir, "copilot-provider.jsonl") + dirEvents := filepath.Join(root, copilotStateDir, "copilot-provider", "events.jsonl") + workspacePath := filepath.Join(root, copilotStateDir, "copilot-provider", "workspace.yaml") + content := strings.Join([]string{ + `{"type":"session.start","data":{"sessionId":"copilot-provider","context":{"cwd":"/home/user/code/copilot-app","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"hello copilot"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"hi"},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"gpt-5":{"usage":{"inputTokens":100,"outputTokens":20,"cacheReadTokens":30,"cacheWriteTokens":10,"reasoningTokens":5}}}},"timestamp":"2025-01-15T10:00:03Z"}`, + }, "\n") + "\n" + writeSourceFile(t, barePath, content) + writeSourceFile(t, dirEvents, content) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + + provider, ok := NewProvider(AgentCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, filepath.Join(root, copilotStateDir), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl", "workspace.yaml"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, dirEvents, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "copilot-provider", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, dirEvents, found.DisplayPath) + + for _, path := range []string{dirEvents, workspacePath} { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, dirEvents, changed[0].DisplayPath) + } + + require.NoError(t, os.Remove(dirEvents)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: dirEvents, EventKind: "remove", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, barePath, changed[0].DisplayPath) + writeSourceFile(t, dirEvents, content) + + require.NoError(t, os.Remove(workspacePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: workspacePath, EventKind: "remove", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, dirEvents, changed[0].DisplayPath) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, dirEvents, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + writeSourceFile(t, workspacePath, "name: Workspace other\n") + renamedFingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.NotEqual(t, fingerprint.Hash, renamedFingerprint.Hash) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + fingerprint, err = provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "copilot:copilot-provider", result.Result.Session.ID) + assert.Equal(t, AgentCopilot, result.Result.Session.Agent) + assert.Equal(t, "copilot_app", result.Result.Session.Project) + assert.Equal(t, "Workspace title", result.Result.Session.FirstMessage) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, fingerprint.Size, result.Result.Session.File.Size) + assert.Equal(t, fingerprint.MTimeNS, result.Result.Session.File.Mtime) + assert.Len(t, result.Result.Messages, 2) + require.Len(t, result.Result.UsageEvents, 1) + assert.Equal(t, "gpt-5", result.Result.UsageEvents[0].Model) +} diff --git a/internal/parser/gemini_parser_test.go b/internal/parser/gemini_parser_test.go index 547f80d86..5178f4db4 100644 --- a/internal/parser/gemini_parser_test.go +++ b/internal/parser/gemini_parser_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "strings" "testing" "time" @@ -11,10 +12,65 @@ import ( "go.kenn.io/agentsview/internal/testjsonl" ) +// newGeminiTestProvider builds a concrete geminiProvider for the given roots so +// package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods, replacing the removed +// package-level entrypoints. +func newGeminiTestProvider(t *testing.T, roots ...string) *geminiProvider { + t.Helper() + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + gp, ok := provider.(*geminiProvider) + require.True(t, ok) + return gp +} + +// parseGeminiTestSession parses a Gemini session file at path through the +// provider-owned parse method, replacing the removed package-level +// ParseGeminiSession entrypoint. +func parseGeminiTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newGeminiTestProvider(t).parseSession(path, project, machine) +} + +// discoverGeminiTestSessions discovers Gemini sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path + project) the +// tests assert against. +func discoverGeminiTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newGeminiTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Project: source.ProjectHint, + Agent: AgentGemini, + }) + } + return files +} + +// findGeminiTestSourceFile resolves a Gemini session ID to a session file path +// through the provider, replacing the removed FindGeminiSourceFile. +func findGeminiTestSourceFile(t *testing.T, root, sessionID string) string { + t.Helper() + return newGeminiTestProvider(t, root).sources.findSourceFile(root, sessionID) +} + func runGeminiParserTest(t *testing.T, content string) (*ParsedSession, []ParsedMessage) { t.Helper() path := createTestFile(t, "session.json", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) return sess, msgs } @@ -45,7 +101,7 @@ func TestParseGeminiSession_JSONLStream(t *testing.T) { `{"$set":{"lastUpdated":"2026-04-23T16:12:50.158Z"}}`, }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -74,7 +130,7 @@ func TestParseGeminiSession_JSONLStreamLargeRecord(t *testing.T) { `{"id":"u1","timestamp":"2026-04-23T16:12:43.085Z","type":"user","content":[{"text":"` + largeContent + `"}]}`, }, "\n") path := createTestFile(t, "large-session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -91,7 +147,7 @@ func TestParseGeminiSession_JSONLStreamTolerantOfPartialLines(t *testing.T) { `{"id":"a1","timestamp":"2026-04-23T16:12:50.158Z","type":"gemini","content":"reply"`, }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -108,7 +164,7 @@ func TestParseGeminiSession_JSONLStreamTolerantOfPartialLines(t *testing.T) { "", }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -452,12 +508,12 @@ func TestParseGeminiSession_EdgeCases(t *testing.T) { t.Run("malformed JSON", func(t *testing.T) { path := createTestFile(t, "session.json", "not valid json {{{") - _, _, err := ParseGeminiSession(path, "my_project", "local") + _, _, err := parseGeminiTestSession(t, path, "my_project", "local") assert.Error(t, err) }) t.Run("missing file", func(t *testing.T) { - _, _, err := ParseGeminiSession("/nonexistent.json", "my_project", "local") + _, _, err := parseGeminiTestSession(t, "/nonexistent.json", "my_project", "local") assert.Error(t, err) }) @@ -500,7 +556,7 @@ func TestParseGeminiSession_EdgeCases(t *testing.T) { t.Run("missing sessionId", func(t *testing.T) { content := `{"projectHash":"abc","startTime":"2024-01-01T00:00:00Z","lastUpdated":"2024-01-01T00:00:00Z","messages":[]}` path := createTestFile(t, "session.json", content) - _, _, err := ParseGeminiSession(path, "my_project", "local") + _, _, err := parseGeminiTestSession(t, path, "my_project", "local") assert.Error(t, err) }) } diff --git a/internal/parser/gemini_provider.go b/internal/parser/gemini_provider.go new file mode 100644 index 000000000..4433f4d76 --- /dev/null +++ b/internal/parser/gemini_provider.go @@ -0,0 +1,509 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*geminiProvider)(nil) + +type geminiProviderFactory struct { + def AgentDef +} + +func newGeminiProviderFactory(def AgentDef) ProviderFactory { + return geminiProviderFactory{def: cloneAgentDef(def)} +} + +func (f geminiProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f geminiProviderFactory) Capabilities() Capabilities { + return geminiProviderCapabilities() +} + +func (f geminiProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &geminiProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: geminiProviderCapabilities(), + Config: cfg, + }, + sources: newGeminiSourceSet(cfg.Roots), + } +} + +type geminiProvider struct { + ProviderBase + sources geminiSourceSet +} + +func (p *geminiProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *geminiProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *geminiProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *geminiProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *geminiProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *geminiProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("gemini source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type geminiSource struct { + Root string + Path string +} + +type geminiSourceSet struct { + roots []string +} + +func newGeminiSourceSet(roots []string) geminiSourceSet { + return geminiSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s geminiSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + rootSources, err := s.discoverRoot(ctx, root) + if err != nil { + return nil, err + } + for _, source := range rootSources { + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s geminiSourceSet) discoverRoot( + ctx context.Context, + root string, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + sources := make([]SourceRef, 0) + seen := make(map[string]struct{}) + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Gemini session file paths under the Gemini +// directory (/tmp//chats/session-*.json[l]). is either a +// SHA-256 project hash (old layout) or a project name (new layout); symlinked +// hash directories are followed (matching the watcher). Project resolution is +// applied by sourceRef via BuildGeminiProjectMap/ResolveGeminiProject, so this +// helper only enumerates source paths. +func (s geminiSourceSet) discoverSessionPaths(root string) []string { + if root == "" { + return nil + } + + tmpDir := filepath.Join(root, "tmp") + hashDirs, err := os.ReadDir(tmpDir) + if err != nil { + return nil + } + + var paths []string + for _, hd := range hashDirs { + if !isDirOrSymlink(hd, tmpDir) { + continue + } + chatsDir := filepath.Join(tmpDir, hd.Name(), geminiChatsDir) + entries, err := os.ReadDir(chatsDir) + if err != nil { + continue + } + for _, sf := range entries { + if sf.IsDir() { + continue + } + name := sf.Name() + if !isGeminiSessionFilename(name) { + continue + } + paths = append(paths, filepath.Join(chatsDir, name)) + } + } + return paths +} + +func (s geminiSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + tmp := filepath.Join(root, "tmp") + roots = append(roots, WatchRoot{ + Path: tmp, + Recursive: true, + IncludeGlobs: []string{"session-*.json", "session-*.jsonl"}, + DebounceKey: string(AgentGemini) + ":tmp:" + tmp, + }) + roots = append(roots, WatchRoot{ + Path: root, + Recursive: false, + IncludeGlobs: []string{"projects.json", "trustedFolders.json"}, + DebounceKey: string(AgentGemini) + ":projects:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s geminiSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + if geminiProjectMetadataPath(root, req.Path) { + return s.discoverRoot(ctx, root) + } + source, ok := s.sourceRef(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + if jsonlMissingPathFallbackAllowed(req) { + source, ok = s.sourceRefForPath(root, req.Path, false) + if ok { + return []SourceRef{source}, nil + } + } + } + return nil, nil +} + +func (s geminiSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a Gemini session file by its session UUID under root, +// searching all project hash directories. The session filename embeds the first +// eight characters of the UUID, so candidates are pre-filtered on that prefix +// before confirming the recorded sessionId matches. +func (s geminiSourceSet) findSourceFile(root, sessionID string) string { + if root == "" || !IsValidSessionID(sessionID) || + len(sessionID) < 8 { + return "" + } + + tmpDir := filepath.Join(root, "tmp") + hashDirs, err := os.ReadDir(tmpDir) + if err != nil { + return "" + } + + for _, hd := range hashDirs { + if !isDirOrSymlink(hd, tmpDir) { + continue + } + chatsDir := filepath.Join(tmpDir, hd.Name(), geminiChatsDir) + entries, err := os.ReadDir(chatsDir) + if err != nil { + continue + } + for _, sf := range entries { + if sf.IsDir() { + continue + } + name := sf.Name() + if !isGeminiSessionFilename(name) { + continue + } + if strings.Contains(name, sessionID[:8]) { + path := filepath.Join(chatsDir, name) + if confirmGeminiSessionID(path, sessionID) { + return path + } + } + } + } + return "" +} + +// confirmGeminiSessionID reads the sessionId field from a Gemini file to +// confirm it matches the expected ID. +func confirmGeminiSessionID(path, sessionID string) bool { + data, err := os.ReadFile(path) + if err != nil { + return false + } + return GeminiSessionID(data) == sessionID +} + +func (s geminiSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + root, path, ok := s.rootPathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("gemini source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + h := sha256.New() + if err := addGeminiFingerprintPart(h, "session", path, info); err != nil { + return SourceFingerprint{}, err + } + for _, metadataPath := range geminiProjectMetadataPaths(root) { + metadataInfo, err := os.Stat(metadataPath) + if err != nil || metadataInfo.IsDir() { + continue + } + fingerprint.Size += metadataInfo.Size() + if mtime := metadataInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if err := addGeminiFingerprintPart(h, "project", metadataPath, metadataInfo); err != nil { + return SourceFingerprint{}, err + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s geminiSourceSet) pathFromSource(source SourceRef) (string, bool) { + _, path, ok := s.rootPathFromSource(source) + return path, ok +} + +func (s geminiSourceSet) rootPathFromSource(source SourceRef) (string, string, bool) { + switch src := source.Opaque.(type) { + case geminiSource: + return src.Root, src.Path, src.Path != "" + case *geminiSource: + if src != nil && src.Path != "" { + return src.Root, src.Path, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(geminiSource) + return src.Root, src.Path, true + } + } + } + return "", "", false +} + +func (s geminiSourceSet) sourceRef(root, path string) (SourceRef, bool) { + return s.sourceRefForPath(root, path, true) +} + +func (s geminiSourceSet) sourceRefForPath( + root, path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || (requireRegular && !IsRegularFile(path)) { + return SourceRef{}, false + } + sepParts := strings.Split(filepath.ToSlash(rel), "/") + if len(sepParts) != 4 || + sepParts[0] != "tmp" || + sepParts[2] != geminiChatsDir || + !isGeminiSessionFilename(sepParts[3]) { + return SourceRef{}, false + } + project := ResolveGeminiProject(sepParts[1], BuildGeminiProjectMap(root)) + return SourceRef{ + Provider: AgentGemini, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: geminiSource{ + Root: root, + Path: path, + }, + }, true +} + +func geminiProjectMetadataPaths(root string) []string { + return []string{ + filepath.Join(root, "projects.json"), + filepath.Join(root, "trustedFolders.json"), + } +} + +func geminiProjectMetadataPath(root, path string) bool { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return false + } + rel = filepath.ToSlash(rel) + return rel == "projects.json" || rel == "trustedFolders.json" +} + +func addGeminiFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func geminiProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index 046787e28..de249d22f 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -1224,10 +1224,10 @@ func TestGeminiUserMessageCount(t *testing.T) { ) path := createTestFile(t, "gemini-umc.json", content) - sess, msgs, err := ParseGeminiSession( - path, "my_project", "local", + sess, msgs, err := parseGeminiTestSession( + t, path, "my_project", "local", ) - require.NoError(t, err, "ParseGeminiSession") + require.NoError(t, err, "parseGeminiTestSession") require.NotNil(t, sess, "session") require.Len(t, msgs, 4, "messages") assert.Equal(t, 2, sess.UserMessageCount, "UserMessageCount") diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 9d1911c3b..ac2e9051f 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -360,6 +360,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newCommandCodeProviderFactory(def) case AgentCodex: return newCodexProviderFactory(def) + case AgentCopilot: + return newCopilotProviderFactory(def) case AgentCowork: return newCoworkProviderFactory(def) case AgentCortex: @@ -374,6 +376,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIflowProviderFactory(def) case AgentGptme: return newGptmeProviderFactory(def) + case AgentGemini: + return newGeminiProviderFactory(def) case AgentKimi: return newKimiProviderFactory(def) case AgentKilo: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 6f1c418f1..eeec761db 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -20,8 +20,8 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentClaude: ProviderMigrationProviderAuthoritative, AgentCowork: ProviderMigrationProviderAuthoritative, AgentCodex: ProviderMigrationProviderAuthoritative, - AgentCopilot: ProviderMigrationLegacyOnly, - AgentGemini: ProviderMigrationLegacyOnly, + AgentCopilot: ProviderMigrationProviderAuthoritative, + AgentGemini: ProviderMigrationProviderAuthoritative, AgentOpenHands: ProviderMigrationProviderAuthoritative, AgentCursor: ProviderMigrationProviderAuthoritative, AgentMiMoCode: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 302afd894..c9d3a483c 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -49,9 +49,7 @@ var providerNeutralEntrypoints = map[string]bool{ var pendingShimProviderFiles = map[string]bool{ "antigravity_cli_provider.go": true, "antigravity_provider.go": true, - "copilot_provider.go": true, "db_backed_provider.go": true, - "gemini_provider.go": true, "kiro_ide_provider.go": true, "kiro_provider.go": true, "positron_provider.go": true, diff --git a/internal/parser/provider_test.go b/internal/parser/provider_test.go index d3b701e58..65b689c5e 100644 --- a/internal/parser/provider_test.go +++ b/internal/parser/provider_test.go @@ -149,7 +149,10 @@ func TestProviderRegistryMirrorsAgentRegistry(t *testing.T) { } func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { - provider, ok := NewProvider(AgentGemini, ProviderConfig{ + legacyAgent := legacyProviderTestAgent(t) + def, ok := AgentByType(legacyAgent) + require.True(t, ok) + provider, ok := NewProvider(legacyAgent, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -177,7 +180,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { source, found, err := provider.FindSource(ctx, FindSourceRequest{ RawSessionID: "session", - FullSessionID: "gemini:session", + FullSessionID: def.IDPrefix + "session", StoredFilePath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", }) @@ -186,7 +189,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.Empty(t, source) _, err = provider.Fingerprint(ctx, SourceRef{ - Provider: AgentGemini, + Provider: legacyAgent, Key: "session", DisplayPath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", @@ -195,9 +198,9 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) incremental, status, err := provider.ParseIncremental(ctx, IncrementalRequest{ - Source: SourceRef{Provider: AgentGemini, Key: "session"}, + Source: SourceRef{Provider: legacyAgent, Key: "session"}, Fingerprint: SourceFingerprint{Key: "/tmp/session.jsonl"}, - SessionID: "gemini:session", + SessionID: def.IDPrefix + "session", StartOrdinal: 1, Machine: "devbox", }) @@ -211,12 +214,13 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { Roots: []string{"/tmp/one", "/tmp/two"}, Machine: "devbox", } + legacyAgent := legacyProviderTestAgent(t) - factory, ok := ProviderFactoryByType(AgentGemini) + factory, ok := ProviderFactoryByType(legacyAgent) require.True(t, ok) - assert.Equal(t, AgentGemini, factory.Definition().Type) + assert.Equal(t, legacyAgent, factory.Definition().Type) - provider, ok := NewProvider(AgentGemini, cfg) + provider, ok := NewProvider(legacyAgent, cfg) require.True(t, ok) require.NotNil(t, provider) @@ -233,7 +237,8 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { } func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { - provider, ok := NewProvider(AgentGemini, ProviderConfig{ + legacyAgent := legacyProviderTestAgent(t) + provider, ok := NewProvider(legacyAgent, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -241,7 +246,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { outcome, err := provider.Parse(context.Background(), ParseRequest{ Source: SourceRef{ - Provider: AgentGemini, + Provider: legacyAgent, Key: "source", DisplayPath: "/tmp/source.jsonl", FingerprintKey: "/tmp/source.jsonl", @@ -257,7 +262,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) var unsupported UnsupportedProviderFeatureError require.ErrorAs(t, err, &unsupported) - assert.Equal(t, AgentGemini, unsupported.Provider) + assert.Equal(t, legacyAgent, unsupported.Provider) assert.Equal(t, ProviderFeatureParse, unsupported.Feature) } @@ -349,6 +354,18 @@ func (p *testProvider) Parse(context.Context, ParseRequest) (ParseOutcome, error return ParseOutcome{}, nil } +func legacyProviderTestAgent(t *testing.T) AgentType { + t.Helper() + for _, def := range Registry { + factory := providerFactoryForDef(def) + if _, ok := factory.(legacyProviderFactory); ok { + return def.Type + } + } + t.Fatal("expected at least one legacy provider for fallback tests") + return "" +} + func assertAgentDefMetadataEqual(t *testing.T, want, got AgentDef) { t.Helper() diff --git a/internal/parser/types.go b/internal/parser/types.go index f611a2c4e..0a27b9409 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -131,28 +131,24 @@ var Registry = []AgentDef{ ShallowWatchRootsFunc: ResolveCodexShallowWatchRoots, }, { - Type: AgentCopilot, - DisplayName: "Copilot", - EnvVar: "COPILOT_DIR", - ConfigKey: "copilot_dirs", - DefaultDirs: []string{".copilot"}, - IDPrefix: "copilot:", - WatchSubdirs: []string{"session-state"}, - FileBased: true, - DiscoverFunc: DiscoverCopilotSessions, - FindSourceFunc: FindCopilotSourceFile, + Type: AgentCopilot, + DisplayName: "Copilot", + EnvVar: "COPILOT_DIR", + ConfigKey: "copilot_dirs", + DefaultDirs: []string{".copilot"}, + IDPrefix: "copilot:", + WatchSubdirs: []string{"session-state"}, + FileBased: true, }, { - Type: AgentGemini, - DisplayName: "Gemini", - EnvVar: "GEMINI_DIR", - ConfigKey: "gemini_dirs", - DefaultDirs: []string{".gemini"}, - IDPrefix: "gemini:", - WatchSubdirs: []string{"tmp"}, - FileBased: true, - DiscoverFunc: DiscoverGeminiSessions, - FindSourceFunc: FindGeminiSourceFile, + Type: AgentGemini, + DisplayName: "Gemini", + EnvVar: "GEMINI_DIR", + ConfigKey: "gemini_dirs", + DefaultDirs: []string{".gemini"}, + IDPrefix: "gemini:", + WatchSubdirs: []string{"tmp"}, + FileBased: true, }, { Type: AgentMiMoCode, diff --git a/internal/sync/classify_antigravity_cli_test.go b/internal/sync/classify_antigravity_cli_test.go index f99a7d1ea..be05aba3e 100644 --- a/internal/sync/classify_antigravity_cli_test.go +++ b/internal/sync/classify_antigravity_cli_test.go @@ -67,7 +67,6 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { parser.AgentAntigravityCLI: {dir}, }, } - geminiMap := make(map[string]map[string]string) tests := []struct { name string @@ -131,7 +130,7 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) + got, ok := eng.classifyOnePath(tt.path) assert.Equal(t, tt.want, ok) if ok { assert.Equal(t, parser.AgentAntigravityCLI, got.Agent) @@ -146,7 +145,7 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { orphanTraj := filepath.Join(convDir, orphanUUID+".trajectory.json") require.NoError(t, os.WriteFile(orphanTraj, []byte("orphan"), 0o644)) - _, ok := eng.classifyOnePath(orphanTraj, geminiMap) + _, ok := eng.classifyOnePath(orphanTraj) assert.False(t, ok, "should not classify sidecar when pb file does not exist") }) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 7eb75a334..cd7033a71 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -523,7 +523,6 @@ func (e *Engine) SyncPaths(paths []string) { func (e *Engine) classifyPaths( paths []string, ) []parser.DiscoveredFile { - geminiProjectsByDir := make(map[string]map[string]string) seen := make(map[string]int, len(paths)) files := make([]parser.DiscoveredFile, 0, len(paths)) for _, p := range paths { @@ -535,9 +534,7 @@ func (e *Engine) classifyPaths( dfs = e.classifyCodexIndexPath(p) } if len(dfs) == 0 { - if df, ok := e.classifyOnePath( - p, geminiProjectsByDir, - ); ok { + if df, ok := e.classifyOnePath(p); ok { dfs = []parser.DiscoveredFile{df} } } @@ -973,7 +970,6 @@ func (e *Engine) classifyContainerPath( func (e *Engine) classifyOnePath( path string, - geminiProjectsByDir map[string]map[string]string, ) (parser.DiscoveredFile, bool) { sep := string(filepath.Separator) pathExists := true @@ -1008,97 +1004,6 @@ func (e *Engine) classifyOnePath( // shapes, so the legacy block was removed when Claude was folded // onto its provider. - // Copilot: /session-state/.jsonl - // or: /session-state//events.jsonl - for _, copilotDir := range e.agentDirs[parser.AgentCopilot] { - if copilotDir == "" { - continue - } - stateDir := filepath.Join( - copilotDir, "session-state", - ) - if rel, ok := isUnder(stateDir, path); ok { - parts := strings.Split(rel, sep) - switch len(parts) { - case 1: - stem, ok := strings.CutSuffix( - parts[0], ".jsonl", - ) - if !ok { - continue - } - dirEvents := filepath.Join( - stateDir, stem, "events.jsonl", - ) - if _, err := os.Stat(dirEvents); err == nil { - continue - } - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCopilot, - }, true - case 2: - if parts[1] == "events.jsonl" { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCopilot, - }, true - } - // workspace.yaml changes should trigger a re-parse - // of the sibling events.jsonl. - if parts[1] == "workspace.yaml" { - eventsPath := filepath.Join( - stateDir, parts[0], "events.jsonl", - ) - if _, err := os.Stat(eventsPath); err == nil { - return parser.DiscoveredFile{ - Path: eventsPath, - Agent: parser.AgentCopilot, - }, true - } - } - continue - default: - continue - } - } - } - - // Gemini: /tmp//chats/session-*.json(.l) - // is either a SHA-256 hash (old) or project name (new). - for _, geminiDir := range e.agentDirs[parser.AgentGemini] { - if geminiDir == "" { - continue - } - if rel, ok := isUnder(geminiDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) != 4 || - parts[0] != "tmp" || - parts[2] != "chats" { - continue - } - name := parts[3] - if !strings.HasPrefix(name, "session-") || - (!strings.HasSuffix(name, ".json") && - !strings.HasSuffix(name, ".jsonl")) { - continue - } - dirName := parts[1] - if _, ok := geminiProjectsByDir[geminiDir]; !ok { - geminiProjectsByDir[geminiDir] = - parser.BuildGeminiProjectMap(geminiDir) - } - project := parser.ResolveGeminiProject( - dirName, geminiProjectsByDir[geminiDir], - ) - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentGemini, - }, true - } - } - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { @@ -4043,12 +3948,8 @@ func (e *Engine) processFile( // processProviderFile; only s3:// Codex sources fall through to the // legacy dispatch, via the S3 sync path. res = e.processS3Session(ctx, file, info) - case parser.AgentCopilot: - res = e.processCopilot(file, info) case parser.AgentReasonix: res = e.processReasonix(file, info) - case parser.AgentGemini: - res = e.processGemini(file, info) case parser.AgentVSCodeCopilot: res = e.processVSCodeCopilot(file, info) case parser.AgentVSCopilot: @@ -4199,7 +4100,7 @@ func (e *Engine) processProviderFile( mtime: mtime, }, true } - if freshMtime, fresh := e.providerCoworkSourceFresh(source, file); fresh { + if freshMtime, fresh := e.providerSourceFreshBeforeFingerprint(source, file); fresh { return processResult{ skip: true, mtime: freshMtime, @@ -4991,11 +4892,11 @@ func (e *Engine) providerSingleSessionFresh( !parser.NeedsProjectReparse(sess.Project) } -func (e *Engine) providerCoworkSourceFresh( +func (e *Engine) providerSourceFreshBeforeFingerprint( source parser.SourceRef, file parser.DiscoveredFile, ) (int64, bool) { - if e.forceParse || file.ForceParse || file.Agent != parser.AgentCowork { + if e.forceParse || file.ForceParse { return 0, false } path := providerDiscoveredPath(source) @@ -5013,15 +4914,31 @@ func (e *Engine) providerCoworkSourceFresh( return 0, false } } - mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) - effectiveInfo := fakeSnapshotInfo{ - fSize: info.Size(), - fMtime: mtime, - } - if !e.shouldSkipByPath(path, effectiveInfo) { - return 0, false + switch file.Agent { + case parser.AgentCowork: + mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if e.shouldSkipByPath(path, effectiveInfo) { + return mtime, true + } + case parser.AgentGemini: + if e.shouldSkipByPath(path, info) { + return info.ModTime().UnixNano(), true + } + case parser.AgentCopilot: + mtime := copilotEffectiveMtime(path, info) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if e.shouldSkipByPath(path, effectiveInfo) { + return mtime, true + } } - return mtime, true + return 0, false } // providerSourceUnchangedInDB reports whether a provider source's persisted @@ -5826,44 +5743,6 @@ func (e *Engine) processCodex( } } -func (e *Engine) processCopilot( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Use effective mtime = max(events.jsonl, workspace.yaml) so - // that a new or updated workspace.yaml triggers a re-parse and - // the stored mtime stays consistent with what we compare against - // on subsequent syncs (preventing oscillation). - effectiveMtime := copilotEffectiveMtime(file.Path, info) - if e.shouldSkipCopilot(file.Path, info, effectiveMtime) { - return processResult{skip: true} - } - - sess, msgs, usageEvents, err := parser.ParseCopilotSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - if effectiveMtime > sess.File.Mtime { - sess.File.Mtime = effectiveMtime - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs, UsageEvents: usageEvents}, - }, - } -} - // copilotEffectiveMtime returns max(events.jsonl mtime, // workspace.yaml mtime). For flat .jsonl sessions (no // workspace.yaml sibling) it returns the events.jsonl mtime. @@ -6016,64 +5895,6 @@ func reasonixEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -// shouldSkipCopilot is like shouldSkipByPath but uses the -// pre-computed effectiveMtime (max of events.jsonl and -// workspace.yaml) for the mtime comparison, keeping the stored -// value consistent with what we compare against on next sync. -func (e *Engine) shouldSkipCopilot( - path string, info os.FileInfo, effectiveMtime int64, -) bool { - if e.forceParse { // parse-diff: always re-parse - return false - } - lookupPath := path - if e.pathRewriter != nil { - lookupPath = e.pathRewriter(path) - } - storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) - if !ok { - return false - } - if storedSize != info.Size() || storedMtime != effectiveMtime { - return false - } - if e.db.GetDataVersionByPath(lookupPath) < - db.CurrentDataVersion() { - return false - } - return true -} - -func (e *Engine) processGemini( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Fast path: skip by file_path + mtime before parsing. - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseGeminiSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func (e *Engine) processVSCodeCopilot( file parser.DiscoveredFile, info os.FileInfo, ) processResult { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 9336d386d..c6a074e48 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -2128,6 +2128,97 @@ func TestSyncPathsGeminiJSONL(t *testing.T) { assertSessionMessageCount(t, env.db, "gemini:"+sessionID, 2) } +func TestSyncPathsGeminiProjectMetadataEventRefreshesProject(t *testing.T) { + env := setupTestEnv(t) + + sessionID := "gem-project-refresh" + projectsPath := filepath.Join(env.geminiDir, "projects.json") + writeProject := func(name string) { + t.Helper() + require.NoError(t, os.WriteFile( + projectsPath, + fmt.Appendf(nil, + `{"projects":{"/Users/alice/code/%s":"alias"}}`, + name, + ), + 0o644, + ), "write projects") + } + writeProject("one") + path := env.writeGeminiSession( + t, + filepath.Join( + "tmp", "alias", "chats", + "session-001.json", + ), + testjsonl.GeminiSessionJSON( + sessionID, "alias", tsEarly, tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg( + "m1", tsEarly, "Hello Gemini", + ), + testjsonl.GeminiAssistantMsg( + "m2", tsEarlyS5, "Hi there!", nil, + ), + }, + ), + ) + + env.engine.SyncPaths([]string{path}) + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + info, err := os.Stat(path) + require.NoError(t, err, "stat gemini session") + env.engine.InjectSkipCache(map[string]int64{ + path: info.ModTime().UnixNano(), + }) + + writeProject("two") + env.engine.SyncPaths([]string{projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) + + writeProject("three") + env.engine.SyncPaths([]string{path, projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "three", sess.Project) + }, + ) + + writeProject("four") + env.engine.SyncPaths([]string{projectsPath, path}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "four", sess.Project) + }, + ) + + require.NoError(t, os.Remove(projectsPath), "remove projects") + env.engine.SyncPaths([]string{projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "alias", sess.Project) + }, + ) +} + func TestSyncPathsCodexAcceptsFlatArchived(t *testing.T) { env := setupTestEnv(t) @@ -5799,18 +5890,15 @@ func TestResyncAllReplacesMessageContent(t *testing.T) { }) require.NoError(t, err, "update message content") - // Normal SyncAll should skip (file unchanged on disk). - stats := env.engine.SyncAll(context.Background(), nil) - require.Equal(t, 1, stats.Skipped, "expected 1 skip, got %d", stats.Skipped) - msgs = fetchMessages(t, env.db, fullID) - require.True(t, strings.Contains(msgs[1].Content, "stale content"), "SyncAll should not have replaced content") - // Capture FTS state before resync so a regression that // breaks FTS isn't masked by HasFTS() returning false // post-resync. hadFTS := env.db.HasFTS() - // ResyncAll should re-parse and replace message content. + // ResyncAll should re-parse and replace message content. Gemini is + // provider-authoritative, so it has no DB-backed mtime skip; a plain + // SyncAll would also re-parse the unchanged file. ResyncAll additionally + // drops and rebuilds the FTS index, which is what this test guards. env.engine.ResyncAll(context.Background(), nil) msgs = fetchMessages(t, env.db, fullID) require.Equal(t, 2, len(msgs), "got %d messages after resync, want 2", len(msgs)) diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index ef0866317..923ba7ef9 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -1684,6 +1684,7 @@ func TestProcessCodexAppendedStaleProjectDoesFullReparse(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") + e := &Engine{ db: database, idPrefix: "host~", @@ -1778,6 +1779,7 @@ func TestProcessCodexAppendedStaleProjectCarriesForceReplace(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") + e := &Engine{ db: database, idPrefix: "host~", @@ -3523,7 +3525,7 @@ func TestEngine_ClassifyOnePathReasonixProjectBareMeta(t *testing.T) { dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) dbtest.WriteTestFile(t, metaPath, []byte(`{"model":"claude"}`)) - got, ok := engine.classifyOnePath(metaPath, nil) + got, ok := engine.classifyOnePath(metaPath) require.True(t, ok, "expected Reasonix sidecar to classify") assert.Equal(t, sessionPath, got.Path) assert.Equal(t, "proj", got.Project) @@ -3546,7 +3548,7 @@ func TestEngine_ClassifyOnePathReasonixDeletedMeta(t *testing.T) { metaPath := sessionPath + ".meta" dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) - got, ok := engine.classifyOnePath(metaPath, nil) + got, ok := engine.classifyOnePath(metaPath) require.True(t, ok, "expected deleted Reasonix sidecar to classify") assert.Equal(t, sessionPath, got.Path) assert.Equal(t, "proj", got.Project) @@ -3567,7 +3569,7 @@ func TestEngine_ClassifyOnePathReasonixDeletedTranscriptIgnored(t *testing.T) { reasonixDir, "projects", "proj", "sessions", "session-123.jsonl", ) - _, ok := engine.classifyOnePath(sessionPath, nil) + _, ok := engine.classifyOnePath(sessionPath) assert.False(t, ok, "expected deleted Reasonix transcript to be ignored") } @@ -4354,3 +4356,55 @@ func TestShouldSkipCodexTitleRenameBelowStoredMtimeDoesNotSkip(t *testing.T) { assert.False(t, f.e.shouldSkipCodex(f.path, f.info), "title-only rename at or below stored watermark must not skip") } + +func TestEngine_ClassifyPathsProviderRemoveSkipsMissingGeminiSource( + t *testing.T, +) { + db := openTestDB(t) + geminiDir := t.TempDir() + engine := NewEngine(db, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {geminiDir}, + }, + Machine: "local", + }) + + sessionPath := filepath.Join( + geminiDir, "tmp", "alias", "chats", "session-001.json", + ) + dbtest.WriteTestFile(t, sessionPath, []byte("{}")) + require.NoError(t, os.Remove(sessionPath), "Remove(%q)", sessionPath) + + files := engine.classifyPaths([]string{sessionPath}) + assert.Empty(t, files) +} + +func TestEngine_ClassifyPathsProviderSidecarKeepsExistingGeminiSources( + t *testing.T, +) { + db := openTestDB(t) + geminiDir := t.TempDir() + engine := NewEngine(db, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {geminiDir}, + }, + Machine: "local", + }) + + projectsPath := filepath.Join(geminiDir, "projects.json") + dbtest.WriteTestFile( + t, + projectsPath, + []byte(`{"projects":{"/Users/alice/code/sample":"alias"}}`), + ) + sessionPath := filepath.Join( + geminiDir, "tmp", "alias", "chats", "session-001.json", + ) + dbtest.WriteTestFile(t, sessionPath, []byte("{}")) + + files := engine.classifyPaths([]string{projectsPath}) + require.Len(t, files, 1) + assert.Equal(t, sessionPath, files[0].Path) + assert.Equal(t, parser.AgentGemini, files[0].Agent) + assert.True(t, files[0].ForceParse) +} diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index 024e27f13..ac1d7070f 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -649,6 +649,55 @@ func TestProcessFileProviderAuthoritativeSkipsFreshCoworkBeforeFingerprint(t *te assert.Empty(t, provider.calls) } +func TestProcessFileProviderAuthoritativeSkipsFreshGeminiBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join( + root, "tmp", "alias", "chats", "session-001.json", + ) + sourceMtime := writeFreshProviderDBSession( + t, sourcePath, nil, + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentGemini, + DisplayName: "Gemini CLI", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentGemini, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentGemini: parser.ProviderMigrationProviderAuthoritative, + }, + }) + requireFreshProviderSession(t, engine.db, parser.AgentGemini, sourcePath, sourceMtime) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentGemini, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *testing.T) { root := t.TempDir() database := dbtest.OpenTestDB(t) @@ -703,6 +752,56 @@ func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *te assert.True(t, provider.parseRequest.ForceParse) } +func TestProcessFileProviderAuthoritativeSkipsFreshCopilotBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join( + root, "session-state", "copilot-fresh", "events.jsonl", + ) + workspacePath := filepath.Join(filepath.Dir(sourcePath), "workspace.yaml") + sourceMtime := writeFreshProviderDBSession( + t, sourcePath, &workspacePath, + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCopilot, + DisplayName: "Copilot CLI", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCopilot, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCopilot: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCopilot: parser.ProviderMigrationProviderAuthoritative, + }, + }) + requireFreshProviderSession(t, engine.db, parser.AgentCopilot, sourcePath, sourceMtime) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCopilot, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") @@ -1293,3 +1392,52 @@ func writeFreshCoworkProviderSource( return sourcePath, sourceMtime } + +func writeFreshProviderDBSession( + t *testing.T, + sourcePath string, + mtimeSidecar *string, +) int64 { + t.Helper() + + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + sourceTime := time.Unix(1_781_475_210, 0) + require.NoError(t, os.Chtimes(sourcePath, sourceTime, sourceTime)) + mtime := sourceTime.UnixNano() + if mtimeSidecar != nil { + sidecarTime := sourceTime.Add(time.Second) + require.NoError(t, os.WriteFile(*mtimeSidecar, []byte("name: Fresh\n"), 0o644)) + require.NoError(t, os.Chtimes(*mtimeSidecar, sidecarTime, sidecarTime)) + mtime = sidecarTime.UnixNano() + } + + return mtime +} + +func requireFreshProviderSession( + t *testing.T, + database *db.DB, + agent parser.AgentType, + sourcePath string, + sourceMtime int64, +) { + t.Helper() + + info, err := os.Stat(sourcePath) + require.NoError(t, err) + sourceSize := info.Size() + fullSessionID := string(agent) + ":fresh" + require.NoError(t, database.UpsertSession(db.Session{ + ID: fullSessionID, + Project: "provider-project", + Machine: "devbox", + Agent: string(agent), + FilePath: &sourcePath, + FileSize: &sourceSize, + FileMtime: &sourceMtime, + })) + require.NoError(t, database.SetSessionDataVersion( + fullSessionID, db.CurrentDataVersion(), + )) +} From 471c0d1aa800171473456b5ce2800f70f23ed840 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 20:59:33 -0400 Subject: [PATCH 03/10] feat(parser): migrate copilot ide providers VS Code Copilot and Visual Studio Copilot both needed concrete providers because their source identity is richer than a plain parser callback. VS Code needs workspace and global chat discovery with .jsonl preference, while Visual Studio needs virtual per-conversation trace sources with sibling-aware freshness. The providers preserve raw and full ID lookup, watch classification, source hashing, VS Code project hints, Visual Studio physical trace fan-out, strict composite trace fingerprints, force-replace parse semantics, and parser output normalization. fix(parser): classify copilot ide source changes The Copilot IDE providers advertised changed-path classification, but the initial migration only accepted source paths that still existed. That dropped deletion and metadata-only events before the sync layer could make a refresh or removal decision. Classify syntactically valid removed VS Code chat files and Visual Studio trace files, fan workspace.json changes out to current workspace chat sessions, and cover Visual Studio physical trace fan-out with multiple conversations. fix(parser): include vscode workspace metadata freshness VS Code Copilot project names come from workspace.json, so classifying manifest writes is not enough if the source fingerprint still only reflects the chat transcript. An unchanged chat file could skip the parse that refreshes Session.Project. Fold workspace.json size, mtime, and content hash into workspace chat fingerprints while leaving global chat fingerprints unchanged, and cover metadata-only freshness in the provider tests. fix(sync): refresh vscode copilot workspace metadata VS Code Copilot was provider-aware for workspace.json freshness, but this stack still runs legacy sync writes. Without mirroring that freshness in the legacy process path, metadata-only workspace renames could be classified but then skipped against the unchanged chat transcript. Move the Copilot IDE providers into shadow compare on their migration branch, preserve .jsonl priority during provider changed-path classification, and store composite workspace freshness for VS Code Copilot sessions while both shapes run. Validation: go test -tags "fts5" ./internal/sync -run 'TestSyncPathsVSCodeCopilot(JSONLPriority|WorkspaceMetadataRefreshesProject)' -count=1; go test -tags "fts5" ./internal/parser -run 'Test(VSCodeCopilotProvider|VisualStudioCopilotProvider|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/sync -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare copilot ide shadow parity VS Code Copilot and Visual Studio Copilot are already opted into shadow comparison on this branch, but provider method tests alone do not prove the migration path still matches the legacy parser output consumed by sync. Cover the workspace-backed VS Code JSONL source and Visual Studio virtual trace source through ObserveProviderSource so reviewers can see provider observation, data-version planning, and legacy parser parity in one place. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatches(VSCodeCopilot|VisualStudioCopilot)LegacyParser|TestCopilotIDEProvider|Test(VSCodeCopilotProvider|VisualStudioCopilotProvider)' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check refactor(parser): fold copilot IDE providers Move VSCode Copilot and Visual Studio Copilot source discovery, lookup, and parse ownership onto their concrete providers and delete the seven legacy package-level free functions: DiscoverVSCodeCopilotSessions, FindVSCodeCopilotSourceFile, ParseVSCodeCopilotSession, DiscoverVisualStudioCopilotSessions, FindVisualStudioCopilotSourceFile, ParseVisualStudioCopilotConversation, and ParseVisualStudioCopilotVirtualPath. VSCode Copilot: discoverSessionFiles and findSourceFile become source-set helpers, parseSession becomes a provider method, and the shared discoverVSCodeSessionFiles helper stays in discovery.go. Visual Studio Copilot: discoverSessionFiles and findSourceFile become source-set helpers (over the retained findVisualStudioCopilotTraceSourceFile and discoverVisualStudioCopilotSessionFiles helpers), and parseConversation becomes a provider method. The virtual-path resolution is reproduced on the provider via the provider-neutral ParseVirtualSourcePath helper plus the trace-file and conversation-ID predicates (splitVisualStudioCopilotVirtualPath), replacing the deleted ParseVisualStudioCopilotVirtualPath. External callers (session export, direct service, parsediff, engine skip-path checks) use the new exported SplitVisualStudioCopilotVirtualPath, which wraps the same neutral splitter. The provider's discovery now surfaces an unreadable physical trace file as a source so the read failure is reported instead of being dropped. Make both providers provider-authoritative and drop their legacy sync dispatch: the classifyOnePath VSCode block, classifyVisualStudioCopilotPath and its call, the processFile case arms, processVSCodeCopilot and its vscodeCopilot* helpers, processVisualStudioCopilot, the vscodeJSONLSiblingExists helper, and the now-dead legacy-preamble references to these agents. Drop the AgentDef DiscoverFunc/FindSourceFunc hooks for both, remove both provider files from the pending shim scan list, and replace the shadow-baseline test with provider API coverage plus a guard asserting the legacy entrypoints stay gone. Re-home the shared writeProviderShadowSourceFile test helper into provider_shadow_test.go so the sync test package builds. fix(parser): preserve copilot provider metadata Provider-authoritative Copilot sync consumes ParseResult side channels, not only fields stored on ParsedSession. VS Code Copilot was parsing aggregate token usage but returning an empty ParseResult.UsageEvents slice, so a provider resync could erase usage rows. Visual Studio Copilot single-session resyncs carry the stored project through Source.ProjectHint. Honoring that hint prevents the provider default from overwriting preserved project metadata, while VS Code now also carries the composite fingerprint size and mtime alongside the hash. Validation: go test -tags "fts5" ./internal/parser -run 'Test(VSCodeCopilotProviderSourceMethods|VisualStudioCopilotProviderSourceMethods)' -count=1; go test -tags "fts5" ./internal/sync -run 'TestSyncPathsVSCodeCopilotPersistsUsageEvents|TestSyncSingleSessionContextVisualStudioCopilotPreservesProject' -count=1; go test -tags "fts5" ./internal/parser -run 'Test.*Copilot.*Provider|TestParseVSCodeCopilotSession_TokenUsage|TestParseVisualStudioCopilot' -count=1; go test -tags "fts5" ./internal/sync -run 'Test.*(VSCodeCopilot|VisualStudioCopilot).*' -count=1; go vet ./...; git diff --check test(parser): guard visual studio copilot session fold The Copilot IDE fold deleted ParseVisualStudioCopilotSession along with the other Visual Studio Copilot legacy entrypoints, but the regression guard did not name that symbol. Adding it prevents a future shim from reappearing unnoticed. Validation: go test -tags "fts5" ./internal/parser -run 'TestCopilotIDEProvidersOwnLegacyEntrypoints|Test(VSCodeCopilotProviderSourceMethods|VisualStudioCopilotProviderSourceMethods)' -count=1; git diff --check --- cmd/agentsview/session_export.go | 2 +- internal/parser/copilot_ide_provider_test.go | 416 +++++++++++ .../parser/copilot_ide_test_helpers_test.go | 113 +++ internal/parser/discovery.go | 146 +--- internal/parser/provider.go | 4 + internal/parser/provider_migration.go | 4 +- internal/parser/types.go | 10 +- internal/parser/visualstudio_copilot.go | 68 +- .../parser/visualstudio_copilot_provider.go | 274 ++++++++ internal/parser/visualstudio_copilot_test.go | 74 +- internal/parser/vscode_copilot.go | 8 +- internal/parser/vscode_copilot_provider.go | 654 ++++++++++++++++++ internal/parser/vscode_copilot_test.go | 22 +- internal/service/direct.go | 2 +- internal/sync/engine.go | 213 +----- internal/sync/engine_integration_test.go | 137 ++++ internal/sync/parsediff.go | 2 +- .../visualstudio_copilot_integration_test.go | 10 +- 18 files changed, 1683 insertions(+), 476 deletions(-) create mode 100644 internal/parser/copilot_ide_provider_test.go create mode 100644 internal/parser/copilot_ide_test_helpers_test.go create mode 100644 internal/parser/visualstudio_copilot_provider.go create mode 100644 internal/parser/vscode_copilot_provider.go diff --git a/cmd/agentsview/session_export.go b/cmd/agentsview/session_export.go index 21ab5aa96..18b42bb1a 100644 --- a/cmd/agentsview/session_export.go +++ b/cmd/agentsview/session_export.go @@ -114,7 +114,7 @@ func newSessionExportCommand() *cobra.Command { // conversations, so streaming the whole file would disclose // unrelated conversations. Filter to the requested conversation. if tracePath, conversationID, ok := - parser.ParseVisualStudioCopilotVirtualPath(storedPath); ok { + parser.SplitVisualStudioCopilotVirtualPath(storedPath); ok { err := parser.WriteVisualStudioCopilotConversationJSONL( cmd.OutOrStdout(), tracePath, conversationID, ) diff --git a/internal/parser/copilot_ide_provider_test.go b/internal/parser/copilot_ide_provider_test.go new file mode 100644 index 000000000..1d7aa9ce7 --- /dev/null +++ b/internal/parser/copilot_ide_provider_test.go @@ -0,0 +1,416 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestCopilotIDEProvidersOwnLegacyEntrypoints guards the fold: the +// provider-specific Discover/Find/Parse free functions (and the Visual Studio +// virtual-path splitter) must stay deleted, and neither the provider files nor +// their legacy source files may reach back into them as a shim. Discovery and +// lookup live on the provider source sets; parse lives on the provider methods; +// the Visual Studio virtual-path resolution is reproduced via the +// provider-neutral ParseVirtualSourcePath helper. +func TestCopilotIDEProvidersOwnLegacyEntrypoints(t *testing.T) { + files := map[string]string{} + for _, name := range []string{ + "discovery.go", + "vscode_copilot.go", + "vscode_copilot_provider.go", + "visualstudio_copilot.go", + "visualstudio_copilot_provider.go", + } { + data, err := os.ReadFile(name) + require.NoError(t, err) + files[name] = string(data) + } + + deletedSymbols := []string{ + "func DiscoverVSCodeCopilotSessions", + "func FindVSCodeCopilotSourceFile", + "func ParseVSCodeCopilotSession", + "func DiscoverVisualStudioCopilotSessions", + "func FindVisualStudioCopilotSourceFile", + "func ParseVisualStudioCopilotConversation", + "func ParseVisualStudioCopilotSession", + "func ParseVisualStudioCopilotVirtualPath", + } + for name, src := range files { + for _, symbol := range deletedSymbols { + assert.NotContainsf(t, src, symbol, "%s still defines %s", name, symbol) + } + } + + deletedCalls := []string{ + "DiscoverVSCodeCopilotSessions(", + "FindVSCodeCopilotSourceFile(", + "ParseVSCodeCopilotSession(", + "DiscoverVisualStudioCopilotSessions(", + "FindVisualStudioCopilotSourceFile(", + "ParseVisualStudioCopilotConversation(", + "ParseVisualStudioCopilotSession(", + "ParseVisualStudioCopilotVirtualPath(", + } + for _, providerFile := range []string{ + "vscode_copilot_provider.go", + "visualstudio_copilot_provider.go", + } { + for _, call := range deletedCalls { + assert.NotContainsf( + t, files[providerFile], call, + "%s references removed legacy entrypoint %s", providerFile, call, + ) + } + } +} + +func TestCopilotIDEProviderFactoriesReplaceLegacyAdapter(t *testing.T) { + for _, agent := range []AgentType{AgentVSCodeCopilot, AgentVSCopilot} { + t.Run(string(agent), func(t *testing.T) { + factory, ok := ProviderFactoryByType(agent) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(agent, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) + }) + } +} + +func TestVSCodeCopilotProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "vscode-provider" + hashDir := filepath.Join(root, "workspaceStorage", "workspace-hash") + chatDir := filepath.Join(hashDir, "chatSessions") + jsonPath := filepath.Join(chatDir, sessionID+".json") + jsonlPath := filepath.Join(chatDir, sessionID+".jsonl") + writeSourceFile(t, filepath.Join(hashDir, "workspace.json"), + `{"folder":"file:///Users/alice/code/copilot-app"}`) + writeSourceFile(t, jsonPath, `{"version":3,"sessionId":"`+sessionID+`","requests":[]}`) + writeSourceFile(t, jsonlPath, strings.Join([]string{ + `{"kind":0,"v":{"version":3,"sessionId":"` + sessionID + `","creationDate":1770650022790,"requests":[]}}`, + `{"kind":2,"k":["requests"],"v":[{"requestId":"req1","timestamp":1770650031889,"message":{"text":"Hello VS Code","parts":[]},"response":[{"value":"Hi from VS Code"}],"modelId":"copilot/claude-opus-4.8","result":{"metadata":{"promptTokens":42,"outputTokens":7,"resolvedModel":"claude-opus-4-8"}}}]}`, + }, "\n")+"\n") + + provider, ok := NewProvider(AgentVSCodeCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, filepath.Join(root, "workspaceStorage"), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, filepath.Join(root, "globalStorage"), plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, jsonlPath, discovered[0].DisplayPath) + assert.Equal(t, "copilot-app", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~vscode-copilot:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, jsonlPath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: jsonlPath, EventKind: "write", WatchRoot: filepath.Join(root, "workspaceStorage")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, jsonlPath, changed[0].DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, jsonlPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + require.False(t, outcome.ForceReplace) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "vscode-copilot:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentVSCodeCopilot, result.Result.Session.Agent) + assert.Equal(t, "copilot-app", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, fingerprint.Size, result.Result.Session.File.Size) + assert.Equal(t, fingerprint.MTimeNS, result.Result.Session.File.Mtime) + assert.Len(t, result.Result.Messages, 2) + require.Len(t, result.Result.UsageEvents, 1) + assert.Equal(t, "vscode-copilot", result.Result.UsageEvents[0].Source) + assert.Equal(t, "claude-opus-4-8", result.Result.UsageEvents[0].Model) + assert.Equal(t, 42, result.Result.UsageEvents[0].InputTokens) + assert.Equal(t, 7, result.Result.UsageEvents[0].OutputTokens) +} + +func TestVSCodeCopilotProviderClassifiesDeletedAndMetadataPaths(t *testing.T) { + root := t.TempDir() + hashDir := filepath.Join(root, "workspaceStorage", "workspace-hash") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + jsonlPath := filepath.Join(chatDir, "deleted-jsonl.jsonl") + jsonPath := filepath.Join(chatDir, "fallback-json.json") + globalPath := filepath.Join( + root, + "globalStorage", + "emptyWindowChatSessions", + "deleted-global.json", + ) + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/copilot-app"}`) + writeSourceFile(t, jsonlPath, vscodeCopilotProviderJSONL("deleted-jsonl", "Hello deleted")) + writeSourceFile(t, jsonPath, vscodeCopilotProviderJSON("fallback-json", "Hello fallback")) + writeSourceFile(t, globalPath, vscodeCopilotProviderJSON("deleted-global", "Hello global")) + + provider, ok := NewProvider(AgentVSCodeCopilot, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + metadataChanged, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: workspacePath, EventKind: "write"}, + ) + require.NoError(t, err) + assert.ElementsMatch(t, + []string{jsonlPath, jsonPath}, + sourceDisplayPaths(metadataChanged), + ) + require.Len(t, metadataChanged, 2) + beforeMetadata, err := provider.Fingerprint(context.Background(), metadataChanged[0]) + require.NoError(t, err) + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/copilot-renamed-app"}`) + afterMetadata, err := provider.Fingerprint(context.Background(), metadataChanged[0]) + require.NoError(t, err) + assert.NotEqual(t, beforeMetadata.Hash, afterMetadata.Hash) + + require.NoError(t, os.Remove(jsonlPath)) + deletedJSONL, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: jsonlPath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, deletedJSONL, 1) + assert.Equal(t, jsonlPath, deletedJSONL[0].DisplayPath) + + require.NoError(t, os.Remove(globalPath)) + deletedGlobal, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: globalPath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, deletedGlobal, 1) + assert.Equal(t, globalPath, deletedGlobal[0].DisplayPath) +} + +func TestVisualStudioCopilotProviderSourceMethods(t *testing.T) { + root := t.TempDir() + conversationID := "4a8f63f6-7626-4416-a874-fc7bd2c3f005" + tracePath := filepath.Join( + root, + "20260612T194439_257709a3_VSGitHubCopilot_traces.jsonl", + ) + writeSourceFile(t, tracePath, strings.Join([]string{ + vsCopilotTraceLineJSON(conversationID, + "execute_tool run_command_in_terminal", + "1781293588624985000", "1781293588769581200", + map[string]string{ + "gen_ai.tool.name": "run_command_in_terminal", + "gen_ai.tool.call.id": "call_123", + "gen_ai.tool.call.arguments": `{"command":"go test ./..."}`, + "gen_ai.tool.call.result": `{"Value":"ok"}`, + }), + vsCopilotTraceLineJSON(conversationID, + "invoke_agent GitHub Copilot", + "1781293600000000000", "1781293610000000000", + map[string]string{ + "gen_ai.agent.name": "GitHub Copilot", + "gen_ai.request.model": "gpt-5.5", + "copilot_chat.mode": "Agent", + "copilot_chat.turn_count": "1", + }), + }, "\n")+"\n") + + provider, ok := NewProvider(AgentVSCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*_VSGitHubCopilot_traces.jsonl"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + virtualPath := VisualStudioCopilotVirtualPath(tracePath, conversationID) + assert.Equal(t, virtualPath, discovered[0].DisplayPath) + assert.Equal(t, "visualstudio", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: conversationID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, virtualPath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tracePath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tracePath, changed[0].DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, virtualPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + foundWithProject := found + foundWithProject.ProjectHint = "stored-solution" + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: foundWithProject, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.True(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "visualstudio-copilot:"+conversationID, result.Result.Session.ID) + assert.Equal(t, AgentVSCopilot, result.Result.Session.Agent) + assert.Equal(t, "stored-solution", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 1) +} + +func TestVisualStudioCopilotProviderClassifiesDeletedTraceAndFansOutPhysicalTrace( + t *testing.T, +) { + root := t.TempDir() + firstConversationID := "4a8f63f6-7626-4416-a874-fc7bd2c3f005" + secondConversationID := "5b9f63f6-7626-4416-a874-fc7bd2c3f006" + tracePath := filepath.Join( + root, + "20260612T194439_257709a3_VSGitHubCopilot_traces.jsonl", + ) + writeSourceFile(t, tracePath, strings.Join([]string{ + vsCopilotTraceLineJSON(firstConversationID, + "execute_tool run_command_in_terminal", + "1781293588624985000", "1781293588769581200", + map[string]string{ + "gen_ai.tool.name": "run_command_in_terminal", + "gen_ai.tool.call.id": "call_123", + "gen_ai.tool.call.arguments": `{"command":"go test ./..."}`, + "gen_ai.tool.call.result": `{"Value":"ok"}`, + }), + vsCopilotTraceLineJSON(secondConversationID, + "execute_tool run_command_in_terminal", + "1781293688624985000", "1781293688769581200", + map[string]string{ + "gen_ai.tool.name": "run_command_in_terminal", + "gen_ai.tool.call.id": "call_456", + "gen_ai.tool.call.arguments": `{"command":"go vet ./..."}`, + "gen_ai.tool.call.result": `{"Value":"ok"}`, + }), + }, "\n")+"\n") + + provider, ok := NewProvider(AgentVSCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{ + VisualStudioCopilotVirtualPath(tracePath, firstConversationID), + VisualStudioCopilotVirtualPath(tracePath, secondConversationID), + }, sourceDisplayPaths(discovered)) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tracePath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tracePath, changed[0].DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: changed[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 2) + assert.ElementsMatch(t, []string{ + "visualstudio-copilot:" + firstConversationID, + "visualstudio-copilot:" + secondConversationID, + }, parseOutcomeSessionIDs(outcome)) + + require.NoError(t, os.Remove(tracePath)) + deleted, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tracePath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, deleted, 1) + assert.Equal(t, tracePath, deleted[0].DisplayPath) +} + +func vscodeCopilotProviderJSON(sessionID, prompt string) string { + return `{"version":3,"sessionId":"` + sessionID + `","creationDate":1770650022790,"requests":[{"requestId":"req1","timestamp":1770650031889,"message":{"text":"` + prompt + `","parts":[]},"response":[{"value":"Hi from VS Code"}],"modelId":"copilot/gpt-4o"}]}` +} + +func vscodeCopilotProviderJSONL(sessionID, prompt string) string { + return strings.Join([]string{ + `{"kind":0,"v":{"version":3,"sessionId":"` + sessionID + `","creationDate":1770650022790,"requests":[]}}`, + `{"kind":2,"k":["requests"],"v":[{"requestId":"req1","timestamp":1770650031889,"message":{"text":"` + prompt + `","parts":[]},"response":[{"value":"Hi from VS Code"}],"modelId":"copilot/gpt-4o"}]}`, + }, "\n") + "\n" +} + +func parseOutcomeSessionIDs(outcome ParseOutcome) []string { + ids := make([]string, 0, len(outcome.Results)) + for _, result := range outcome.Results { + ids = append(ids, result.Result.Session.ID) + } + return ids +} diff --git a/internal/parser/copilot_ide_test_helpers_test.go b/internal/parser/copilot_ide_test_helpers_test.go new file mode 100644 index 000000000..3dad2913f --- /dev/null +++ b/internal/parser/copilot_ide_test_helpers_test.go @@ -0,0 +1,113 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +// newVSCodeCopilotTestProvider builds a concrete vscodeCopilotProvider for the +// given roots so package tests can exercise the folded parse, discovery, and +// source-lookup behavior directly through provider-owned methods. +func newVSCodeCopilotTestProvider( + t *testing.T, roots ...string, +) *vscodeCopilotProvider { + t.Helper() + provider, ok := NewProvider(AgentVSCodeCopilot, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + p, ok := provider.(*vscodeCopilotProvider) + require.True(t, ok) + return p +} + +// parseVSCodeCopilotTestSession parses a VSCode Copilot session file through +// the provider-owned parse method, replacing the removed package-level +// ParseVSCodeCopilotSession entrypoint. +func parseVSCodeCopilotTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newVSCodeCopilotTestProvider(t).parseSession(path, project, machine) +} + +// discoverVSCodeCopilotTestSessions discovers VSCode Copilot session files +// under root through the provider source set, returning the legacy +// DiscoveredFile shape the tests assert against. +func discoverVSCodeCopilotTestSessions( + t *testing.T, root string, +) []DiscoveredFile { + t.Helper() + return newVSCodeCopilotTestProvider(t, root).sources.discoverSessionFiles(root) +} + +// findVSCodeCopilotTestSourceFile resolves a raw VSCode Copilot session ID to a +// session file through the provider source set, replacing the removed +// FindVSCodeCopilotSourceFile. +func findVSCodeCopilotTestSourceFile( + t *testing.T, root, rawID string, +) string { + t.Helper() + return newVSCodeCopilotTestProvider(t, root).sources.findSourceFile(root, rawID) +} + +// parseVisualStudioCopilotTestConversation parses one Visual Studio Copilot +// conversation through the folded free function, replacing the removed +// package-level ParseVisualStudioCopilotConversation entrypoint. +func parseVisualStudioCopilotTestConversation( + t *testing.T, tracePath, conversationID, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return parseVisualStudioCopilotConversation( + tracePath, conversationID, project, machine, + ) +} + +// parseVisualStudioCopilotTestSession reproduces the removed package-level +// ParseVisualStudioCopilotSession entrypoint. The path may be a real trace file +// or a # virtual path; a real trace file resolves to +// its first conversation. +func parseVisualStudioCopilotTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + if tracePath, conversationID, ok := + splitVisualStudioCopilotVirtualPath(path); ok { + return parseVisualStudioCopilotConversation( + tracePath, conversationID, project, machine, + ) + } + if !IsVisualStudioCopilotTraceFile(path) { + return nil, nil, nil + } + ids, err := VisualStudioCopilotFileConversationIDs(path) + if err != nil { + return nil, nil, err + } + if len(ids) == 0 { + return nil, nil, nil + } + return parseVisualStudioCopilotConversation(path, ids[0], project, machine) +} + +// discoverVisualStudioCopilotTestSessions discovers Visual Studio Copilot +// session work items under root, replacing the removed +// DiscoverVisualStudioCopilotSessions. +func discoverVisualStudioCopilotTestSessions( + t *testing.T, root string, +) []DiscoveredFile { + t.Helper() + return discoverVisualStudioCopilotSessionFilesUnderRoot(root) +} + +// findVisualStudioCopilotTestSourceFile resolves a raw Visual Studio Copilot +// conversation ID to a conversation-scoped virtual path, replacing the removed +// FindVisualStudioCopilotSourceFile. +func findVisualStudioCopilotTestSourceFile( + t *testing.T, root, rawID string, +) string { + t.Helper() + return findVisualStudioCopilotSourceFile(root, rawID) +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index e6bdd1adf..dc2d288b7 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -1005,82 +1005,11 @@ func isContainedIn(child, root string) bool { !strings.HasPrefix(rel, ".."+string(filepath.Separator)) } -// DiscoverVSCodeCopilotSessions traverses the VSCode -// workspaceStorage directory to find chatSessions/*.json -// and *.jsonl files. When both formats exist for the same -// session UUID, the .jsonl file takes priority. -// It also checks globalStorage/emptyWindowChatSessions. -// The vscodeUserDir should point to e.g. -// -// ~/Library/Application Support/Code/User (macOS) -// ~/.config/Code/User (Linux) -func DiscoverVSCodeCopilotSessions( - vscodeUserDir string, -) []DiscoveredFile { - if vscodeUserDir == "" { - return nil - } - - var files []DiscoveredFile - - // 1. Scan workspaceStorage//chatSessions/*.{json,jsonl} - wsDir := filepath.Join(vscodeUserDir, "workspaceStorage") - hashDirs, err := os.ReadDir(wsDir) - if err == nil { - for _, entry := range hashDirs { - if !entry.IsDir() { - continue - } - - hashPath := filepath.Join(wsDir, entry.Name()) - chatDir := filepath.Join(hashPath, "chatSessions") - sessionFiles, err := os.ReadDir(chatDir) - if err != nil { - continue - } - - // Read workspace.json to get project name - project := ReadVSCodeWorkspaceManifest(hashPath) - if project == "" { - project = "unknown" - } - - files = append(files, - discoverVSCodeSessionFiles( - chatDir, sessionFiles, project, - )..., - ) - } - } - - // 2. Scan globalStorage/emptyWindowChatSessions/*.{json,jsonl} - for _, subdir := range []string{ - "globalStorage/emptyWindowChatSessions", - "globalStorage/transferredChatSessions", - } { - globalDir := filepath.Join(vscodeUserDir, subdir) - globalFiles, err := os.ReadDir(globalDir) - if err != nil { - continue - } - files = append(files, - discoverVSCodeSessionFiles( - globalDir, globalFiles, "empty-window", - )..., - ) - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - // discoverVSCodeSessionFiles collects .json and .jsonl // session files from a directory, preferring .jsonl when // both exist for the same UUID. func discoverVSCodeSessionFiles( - dir string, entries []os.DirEntry, project string, + dir string, entries []os.DirEntry, project string, agent AgentType, ) []DiscoveredFile { // Collect UUIDs that have .jsonl files hasJSONL := make(map[string]bool) @@ -1123,70 +1052,6 @@ func discoverVSCodeSessionFiles( return files } -// FindVSCodeCopilotSourceFile locates a VSCode Copilot -// session file by UUID (.jsonl preferred over .json). -func FindVSCodeCopilotSourceFile( - vscodeUserDir, rawID string, -) string { - if vscodeUserDir == "" || !IsValidSessionID(rawID) { - return "" - } - - // Search through workspaceStorage - wsDir := filepath.Join(vscodeUserDir, "workspaceStorage") - hashDirs, err := os.ReadDir(wsDir) - if err == nil { - for _, entry := range hashDirs { - if !entry.IsDir() { - continue - } - base := filepath.Join( - wsDir, entry.Name(), "chatSessions", - ) - // Prefer .jsonl - for _, ext := range []string{".jsonl", ".json"} { - candidate := filepath.Join( - base, rawID+ext, - ) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - } - } - - // Check global dirs - for _, subdir := range []string{ - "globalStorage/emptyWindowChatSessions", - "globalStorage/transferredChatSessions", - } { - base := filepath.Join(vscodeUserDir, subdir) - for _, ext := range []string{".jsonl", ".json"} { - candidate := filepath.Join(base, rawID+ext) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - } - - return "" -} - -// DiscoverVisualStudioCopilotSessions finds Visual Studio Copilot -// trace files under the configured traces directory. -func DiscoverVisualStudioCopilotSessions(vsRoot string) []DiscoveredFile { - if vsRoot == "" { - return nil - } - entries, err := os.ReadDir(vsRoot) - if err != nil { - return nil - } - files := discoverVisualStudioCopilotSessionFiles(vsRoot, entries) - sort.Slice(files, func(i, j int) bool { return files[i].Path < files[j].Path }) - return files -} - // discoverVisualStudioCopilotSessionFiles emits one work item per conversation // found across the trace files in a directory. A single physical trace file // can hold spans for several conversations, and one conversation can be split @@ -1246,15 +1111,6 @@ func discoverVisualStudioCopilotSessionFiles( return files } -// FindVisualStudioCopilotSourceFile locates a Visual Studio Copilot -// trace file by conversation UUID. -func FindVisualStudioCopilotSourceFile(vsRoot, rawID string) string { - if vsRoot == "" || !IsValidSessionID(rawID) { - return "" - } - return findVisualStudioCopilotTraceSourceFile(vsRoot, rawID) -} - func findVisualStudioCopilotTraceSourceFile( dir, rawID string, ) string { diff --git a/internal/parser/provider.go b/internal/parser/provider.go index ac2e9051f..85f4653a7 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -398,6 +398,10 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newQwenPawProviderFactory(def) case AgentQClaw: return newQClawProviderFactory(def) + case AgentVSCopilot: + return newVisualStudioCopilotProviderFactory(def) + case AgentVSCodeCopilot: + return newVSCodeCopilotProviderFactory(def) case AgentVibe: return newVibeProviderFactory(def) case AgentWorkBuddy: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index eeec761db..90c65afb2 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -31,8 +31,8 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentIflow: ProviderMigrationProviderAuthoritative, AgentAmp: ProviderMigrationProviderAuthoritative, AgentZencoder: ProviderMigrationProviderAuthoritative, - AgentVSCodeCopilot: ProviderMigrationLegacyOnly, - AgentVSCopilot: ProviderMigrationLegacyOnly, + AgentVSCodeCopilot: ProviderMigrationProviderAuthoritative, + AgentVSCopilot: ProviderMigrationProviderAuthoritative, AgentPi: ProviderMigrationProviderAuthoritative, AgentQwen: ProviderMigrationProviderAuthoritative, AgentCommandCode: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/types.go b/internal/parser/types.go index 0a27b9409..0dd3f42c4 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -265,9 +265,7 @@ var Registry = []AgentDef{ "workspaceStorage", "globalStorage", }, - FileBased: true, - DiscoverFunc: DiscoverVSCodeCopilotSessions, - FindSourceFunc: FindVSCodeCopilotSourceFile, + FileBased: true, }, { Type: AgentVSCopilot, @@ -282,10 +280,8 @@ var Registry = []AgentDef{ // Linux ".cache/VSGitHubCopilotLogs/traces", }, - IDPrefix: "visualstudio-copilot:", - FileBased: true, - DiscoverFunc: DiscoverVisualStudioCopilotSessions, - FindSourceFunc: FindVisualStudioCopilotSourceFile, + IDPrefix: "visualstudio-copilot:", + FileBased: true, }, { Type: AgentPi, diff --git a/internal/parser/visualstudio_copilot.go b/internal/parser/visualstudio_copilot.go index b05755dde..e1ab9e13a 100644 --- a/internal/parser/visualstudio_copilot.go +++ b/internal/parser/visualstudio_copilot.go @@ -16,60 +16,24 @@ import ( "time" ) -// ParseVisualStudioCopilotSession parses a single Visual Studio Copilot -// conversation from an OpenTelemetry trace JSONL file. The path may be a real -// trace file or a # virtual path emitted by -// discovery. A real trace file resolves to the conversation it contains; when -// a file carries spans for more than one conversation, discovery emits one -// virtual-path work item per conversation, so production does not rely on this -// entry point to choose among several. -func ParseVisualStudioCopilotSession( - path, project, machine string, -) (*ParsedSession, []ParsedMessage, error) { - if tracePath, conversationID, ok := - ParseVisualStudioCopilotVirtualPath(path); ok { - return ParseVisualStudioCopilotConversation( - tracePath, conversationID, project, machine, - ) - } - if !IsVisualStudioCopilotTraceFile(path) { - return nil, nil, nil - } - ids, err := VisualStudioCopilotFileConversationIDs(path) - if err != nil { - return nil, nil, err - } - if len(ids) == 0 { - return nil, nil, nil - } - return ParseVisualStudioCopilotConversation( - path, ids[0], project, machine, - ) -} - // VisualStudioCopilotVirtualPath pairs a trace file with one conversation ID. // A single physical trace file can hold spans for multiple conversations, so // each conversation is tracked as its own work item under this virtual path. func VisualStudioCopilotVirtualPath(tracePath, conversationID string) string { - return tracePath + "#" + conversationID + return VirtualSourcePath(tracePath, conversationID) } -// ParseVisualStudioCopilotVirtualPath splits a # -// virtual path. It returns ok=false for a plain trace-file path. -func ParseVisualStudioCopilotVirtualPath( +// SplitVisualStudioCopilotVirtualPath splits a # +// virtual source path into its physical trace file and conversation ID. It +// builds on the provider-neutral ParseVirtualSourcePath splitter and adds the +// Visual Studio Copilot validation that the container names a trace file and +// the source ID is a valid conversation ID. It returns ok=false for a plain +// trace-file path. Callers outside the parser package use it to detect and +// resolve the virtual paths Visual Studio Copilot stores for its sessions. +func SplitVisualStudioCopilotVirtualPath( sourcePath string, ) (tracePath, conversationID string, ok bool) { - idx := strings.LastIndex(sourcePath, "#") - if idx <= 0 || idx >= len(sourcePath)-1 { - return "", "", false - } - tracePath = sourcePath[:idx] - conversationID = sourcePath[idx+1:] - if !IsVisualStudioCopilotTraceFile(tracePath) || - !IsValidSessionID(conversationID) { - return "", "", false - } - return tracePath, conversationID, true + return splitVisualStudioCopilotVirtualPath(sourcePath) } // IsVisualStudioCopilotTraceFile reports whether path names a Visual Studio @@ -89,7 +53,7 @@ func IsVisualStudioCopilotTraceFile(path string) bool { // path whose runs share one physical history file; both resolve to the // physical file. Every other agent stores a real path, returned unchanged. func ResolveSourceFilePath(storedPath string) string { - if tracePath, _, ok := ParseVisualStudioCopilotVirtualPath(storedPath); ok { + if tracePath, _, ok := splitVisualStudioCopilotVirtualPath(storedPath); ok { return tracePath } if historyPath, _, ok := ParseAiderVirtualPath(storedPath); ok { @@ -133,11 +97,11 @@ type vsCopilotTraceValue struct { BoolValue bool `json:"boolValue"` } -// ParseVisualStudioCopilotConversation parses one conversation, gathering its -// spans from the given trace file and every sibling trace file in the same -// directory. File metadata is recorded against the conversation's virtual path -// so that each conversation in a shared trace file is tracked independently. -func ParseVisualStudioCopilotConversation( +// parseConversation parses one conversation, gathering its spans from the given +// trace file and every sibling trace file in the same directory. File metadata +// is recorded against the conversation's virtual path so that each conversation +// in a shared trace file is tracked independently. +func parseVisualStudioCopilotConversation( tracePath, conversationID, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { if conversationID == "" { diff --git a/internal/parser/visualstudio_copilot_provider.go b/internal/parser/visualstudio_copilot_provider.go new file mode 100644 index 000000000..9fb39ddae --- /dev/null +++ b/internal/parser/visualstudio_copilot_provider.go @@ -0,0 +1,274 @@ +package parser + +import ( + "os" + "path/filepath" + "sort" + "strings" +) + +// Visual Studio Copilot stores conversations inside shared trace files +// (*_VSGitHubCopilot_traces.jsonl). It is a multi-session container provider, +// but unlike the SQLite-backed containers it discovers one source per +// conversation (deduplicated across trace files, newest trace wins) plus a bare +// physical source for any trace whose conversation IDs could not be read, so +// the read failure surfaces instead of being silently dropped. Parse of a +// conversation virtual path yields that one session; Parse of a bare trace fans +// out every conversation in it. All behavior is wired into the shared +// multi-session-container base via options. +func newVisualStudioCopilotProviderFactory(def AgentDef) ProviderFactory { + return NewMultiSessionProviderFactory( + def, + visualStudioCopilotProviderCapabilities(), + func(cfg ProviderConfig) multiSessionContainerSourceSet { + return NewMultiSessionContainerSourceSet( + AgentVSCopilot, + cfg.Roots, + WithSourceDiscovery(vsCopilotDiscoverSources), + WithWatchRoots(vsCopilotWatchRoots), + WithChangedPathClassifier(vsCopilotClassifyPath), + WithMemberLookup(vsCopilotFindMember), + WithFingerprint(vsCopilotFingerprintSource), + WithContainerParse(vsCopilotParseContainer), + WithMemberParse(vsCopilotParseMember), + // Every conversation in a trace shares the trace's content hash. + WithContainerHashStamping(), + ) + }, + ) +} + +// vsCopilotDiscoverSources emits one match per conversation (virtual path) plus +// a bare physical match for each unreadable trace, mirroring the legacy +// per-conversation discovery. +func vsCopilotDiscoverSources(root string) []multiSessionMatch { + var out []multiSessionMatch + for _, file := range discoverVisualStudioCopilotSessionFilesUnderRoot(root) { + match, ok := vsCopilotDiscoveredMatch(root, file.Path) + if !ok { + continue + } + match.ProjectHint = file.Project + out = append(out, match) + } + return out +} + +// vsCopilotDiscoveredMatch classifies a discovery path. Discovery emits either a +// # virtual path for a readable trace, or a bare +// physical trace path for one whose conversation IDs could not be read. The +// unreadable physical file must still become a source so the engine surfaces +// the read failure instead of silently dropping it; the regular-file +// requirement is therefore relaxed for the bare physical trace (which os.ReadDir +// already enumerated) while virtual paths keep validating that their backing +// trace exists. +func vsCopilotDiscoveredMatch(root, path string) (multiSessionMatch, bool) { + if match, ok := vsCopilotClassifyPath(root, path, false); ok { + return match, true + } + root = filepath.Clean(root) + path = filepath.Clean(path) + if _, _, ok := splitVisualStudioCopilotVirtualPath(path); ok { + return multiSessionMatch{}, false + } + if !visualStudioCopilotTraceUnderRoot(root, path, false) { + return multiSessionMatch{}, false + } + return multiSessionMatch{ + Path: path, + Container: path, + ProjectHint: "visualstudio", + }, true +} + +func discoverVisualStudioCopilotSessionFilesUnderRoot( + vsRoot string, +) []DiscoveredFile { + if vsRoot == "" { + return nil + } + entries, err := os.ReadDir(vsRoot) + if err != nil { + return nil + } + files := discoverVisualStudioCopilotSessionFiles(vsRoot, entries) + sort.Slice(files, func(i, j int) bool { return files[i].Path < files[j].Path }) + return files +} + +func vsCopilotWatchRoots(roots []string) []WatchRoot { + out := make([]WatchRoot, 0, len(roots)) + for _, root := range roots { + out = append(out, WatchRoot{ + Path: root, + Recursive: false, + IncludeGlobs: []string{"*_VSGitHubCopilot_traces.jsonl"}, + DebounceKey: string(AgentVSCopilot) + ":traces:" + root, + }) + } + return out +} + +// vsCopilotClassifyPath maps a stored or changed path to its trace container and +// conversation. A virtual path always requires its backing trace to exist; a +// bare trace path relaxes the regular-file check under allowMissing so a deleted +// trace still classifies for changed-path tombstones. +func vsCopilotClassifyPath( + root, path string, allowMissing bool, +) (multiSessionMatch, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if tracePath, conversationID, ok := + splitVisualStudioCopilotVirtualPath(path); ok { + if !visualStudioCopilotTraceUnderRoot(root, tracePath, true) { + return multiSessionMatch{}, false + } + return multiSessionMatch{ + Path: path, + Container: tracePath, + MemberID: conversationID, + ProjectHint: "visualstudio", + }, true + } + if visualStudioCopilotTraceUnderRoot(root, path, !allowMissing) { + return multiSessionMatch{ + Path: path, + Container: path, + ProjectHint: "visualstudio", + }, true + } + return multiSessionMatch{}, false +} + +func vsCopilotFindMember(root, rawID string) (multiSessionMatch, bool) { + path := findVisualStudioCopilotSourceFile(root, rawID) + if path == "" { + return multiSessionMatch{}, false + } + return vsCopilotClassifyPath(root, path, false) +} + +// findVisualStudioCopilotSourceFile locates a trace file by conversation UUID +// and returns a conversation-scoped # virtual path. +func findVisualStudioCopilotSourceFile(root, rawID string) string { + if root == "" || !IsValidSessionID(rawID) { + return "" + } + return findVisualStudioCopilotTraceSourceFile(root, rawID) +} + +func vsCopilotFingerprintSource( + src multiSessionSource, +) (SourceFingerprint, error) { + size, mtime, err := VisualStudioCopilotTraceFingerprintStrict(src.Container) + if err != nil { + return SourceFingerprint{}, err + } + hash, err := hashJSONLSourceFile(src.Container) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Size: size, + MTimeNS: mtime, + Hash: hash, + }, nil +} + +func vsCopilotParseMember( + src multiSessionSource, req ParseRequest, +) (*ParseResult, error) { + project := firstNonEmptyJSONLString(req.Source.ProjectHint, "visualstudio") + sess, msgs, err := parseVisualStudioCopilotConversation( + src.Container, src.MemberID, project, req.Machine, + ) + if err != nil { + return nil, err + } + if sess == nil { + return nil, nil + } + return &ParseResult{Session: *sess, Messages: msgs}, nil +} + +func vsCopilotParseContainer( + src multiSessionSource, req ParseRequest, +) ([]ParseResult, error) { + ids, err := VisualStudioCopilotFileConversationIDs(src.Container) + if err != nil { + return nil, err + } + project := firstNonEmptyJSONLString(req.Source.ProjectHint, "visualstudio") + results := make([]ParseResult, 0, len(ids)) + for _, id := range ids { + sess, msgs, err := parseVisualStudioCopilotConversation( + src.Container, id, project, req.Machine, + ) + if err != nil { + return nil, err + } + if sess == nil { + continue + } + results = append(results, ParseResult{Session: *sess, Messages: msgs}) + } + return results, nil +} + +// splitVisualStudioCopilotVirtualPath splits a # +// virtual source path into its physical trace file and conversation ID. It +// builds on the provider-neutral ParseVirtualSourcePath splitter and adds the +// Visual Studio Copilot validation: the container must name a trace file and the +// source ID must be a valid conversation ID. It returns ok=false for a plain +// trace-file path. +func splitVisualStudioCopilotVirtualPath( + sourcePath string, +) (tracePath, conversationID string, ok bool) { + tracePath, conversationID, ok = ParseVirtualSourcePath(sourcePath) + if !ok { + return "", "", false + } + if !IsVisualStudioCopilotTraceFile(tracePath) || + !IsValidSessionID(conversationID) { + return "", "", false + } + return tracePath, conversationID, true +} + +func visualStudioCopilotTraceUnderRoot( + root, path string, + requireRegular bool, +) bool { + rel, ok := relUnder(root, path) + if !ok || strings.Contains(filepath.ToSlash(rel), "/") { + return false + } + if !IsVisualStudioCopilotTraceFile(path) { + return false + } + return !requireRegular || IsRegularFile(path) +} + +func visualStudioCopilotProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/visualstudio_copilot_test.go b/internal/parser/visualstudio_copilot_test.go index aeb1f2ab1..768d1bac7 100644 --- a/internal/parser/visualstudio_copilot_test.go +++ b/internal/parser/visualstudio_copilot_test.go @@ -36,7 +36,7 @@ func TestDiscoverVisualStudioCopilotSessions(t *testing.T) { []byte("{}\n"), 0o644, )) - files := DiscoverVisualStudioCopilotSessions(tracesDir) + files := discoverVisualStudioCopilotTestSessions(t, tracesDir) require.Len(t, files, 1) assert.Equal(t, tracePath+"#"+conversationID, files[0].Path) @@ -55,7 +55,7 @@ func TestDiscoverVisualStudioCopilotSessions_IgnoresParentDirs(t *testing.T) { "20260612T194439_257709a3_VSGitHubCopilot_traces.jsonl", ), []byte("{}\n"), 0o644)) - files := DiscoverVisualStudioCopilotSessions(root) + files := discoverVisualStudioCopilotTestSessions(t, root) assert.Empty(t, files) } @@ -80,7 +80,7 @@ func TestDiscoverVisualStudioCopilotSessions_DeduplicatesConversationTraceFiles( require.NoError(t, os.WriteFile(oldTrace, []byte(data), 0o644)) require.NoError(t, os.WriteFile(newTrace, []byte(data), 0o644)) - files := DiscoverVisualStudioCopilotSessions(root) + files := discoverVisualStudioCopilotTestSessions(t, root) require.Len(t, files, 1) assert.Equal(t, newTrace+"#"+conversationID, files[0].Path) @@ -100,7 +100,7 @@ func TestParseVisualStudioCopilotSession_MalformedTraceLineReturnsError(t *testi }) + "\n" + `{"resourceSpans":[` + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -159,7 +159,7 @@ func TestDiscoverVisualStudioCopilotSessions_EmitsWorkItemPerConversation(t *tes require.NoError(t, os.WriteFile(oldTrace, []byte(oldData), 0o644)) require.NoError(t, os.WriteFile(newTrace, []byte(newData), 0o644)) - files := DiscoverVisualStudioCopilotSessions(dir) + files := discoverVisualStudioCopilotTestSessions(t, dir) got := map[string]string{} for _, f := range files { @@ -185,7 +185,7 @@ func TestDiscoverVisualStudioCopilotSessions_SampleFixturesEnumerateBothConversa t.Skipf("sample dir not available: %v", err) } - files := DiscoverVisualStudioCopilotSessions(sampleDir) + files := discoverVisualStudioCopilotTestSessions(t, sampleDir) got := map[string]struct{}{} for _, f := range files { @@ -228,7 +228,7 @@ func TestParseVisualStudioCopilotConversation_PropagatesSiblingDirReadError(t *t require.NoError(t, os.Chmod(dir, 0o100)) t.Cleanup(func() { _ = os.Chmod(dir, 0o755) }) - _, _, err := ParseVisualStudioCopilotConversation( + _, _, err := parseVisualStudioCopilotTestConversation(t, tracePath, conversationID, "visualstudio", "local", ) require.Error(t, err, @@ -325,7 +325,7 @@ func TestParseVisualStudioCopilotConversation_PropagatesReadError(t *testing.T) ) require.NoError(t, os.Mkdir(dir, 0o755)) - sess, msgs, err := ParseVisualStudioCopilotConversation( + sess, msgs, err := parseVisualStudioCopilotTestConversation(t, dir, "4a8f63f6-7626-4416-a874-fc7bd2c3f005", "visualstudio", "local", ) @@ -349,7 +349,7 @@ func TestDiscoverVisualStudioCopilotSessions_EnqueuesUnreadableTraceFile(t *test ) require.NoError(t, os.Symlink(target, link)) - files := DiscoverVisualStudioCopilotSessions(root) + files := discoverVisualStudioCopilotTestSessions(t, root) require.Len(t, files, 1) assert.Equal(t, link, files[0].Path, @@ -394,7 +394,7 @@ func TestParseVisualStudioCopilotSession_IgnoresNonTraceFiles(t *testing.T) { }` require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -431,7 +431,7 @@ func TestParseVisualStudioCopilotTraceSession(t *testing.T) { }, "\n") + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -475,7 +475,7 @@ func TestParseVisualStudioCopilotTraceSession_GetFileResult(t *testing.T) { }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -521,7 +521,7 @@ func TestParseVisualStudioCopilotTraceSession_InvokeOnlyFirstMessage(t *testing. }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -553,7 +553,7 @@ func TestParseVisualStudioCopilotTraceSession_ChatPromptFirstMessage(t *testing. }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -593,7 +593,7 @@ func TestParseVisualStudioCopilotTraceSession_PreservesPromptMarkdown(t *testing }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -637,7 +637,7 @@ func TestParseVisualStudioCopilotTraceSession_CombinesConversationTraceFiles(t * require.NoError(t, os.WriteFile(path, []byte(firstData), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(secondData), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -687,7 +687,7 @@ func TestParseVisualStudioCopilotTraceSession_PropagatesSiblingReadError(t *test ) require.NoError(t, os.Symlink(target, sibling)) - _, _, err := ParseVisualStudioCopilotSession( + _, _, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) require.Error(t, err, @@ -710,7 +710,7 @@ func TestParseVisualStudioCopilotTraceSession_MalformedTraceLineErrors(t *testin }) + "\n" + `{"resourceSpans":` + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - _, _, err := ParseVisualStudioCopilotSession( + _, _, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) require.Error(t, err, @@ -746,7 +746,7 @@ func TestParseVisualStudioCopilotTraceSession_DeduplicatesChatOutputAcrossFiles( require.NoError(t, os.WriteFile(path, []byte(chatSpan), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(chatSpan), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -805,7 +805,7 @@ func TestParseVisualStudioCopilotTraceSession_PrefersCompleteChatOutputAcrossFil require.NoError(t, os.WriteFile(path, []byte(partial), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(complete), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -865,7 +865,7 @@ func TestParseVisualStudioCopilotTraceSession_PrefersCompleteChatUsageForVisible require.NoError(t, os.WriteFile(path, []byte(richEarlier), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(leanerLater), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -913,7 +913,7 @@ func TestParseVisualStudioCopilotTraceSession_DeduplicatesToolSpanAcrossFiles(t require.NoError(t, os.WriteFile(path, []byte(partial), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(complete), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -970,7 +970,7 @@ func TestParseVisualStudioCopilotTraceSession_PreservesOrderWhenDedupingToolSpan ) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1010,7 +1010,7 @@ func TestParseVisualStudioCopilotTraceSession_ChatOutputMessages(t *testing.T) { }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1071,7 +1071,7 @@ func TestParseVisualStudioCopilotTraceSession_CountsUsageForToolOnlyChatTurn(t * }) + "\n" require.NoError(t, os.WriteFile(path, []byte(chatSpan+toolSpan), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1130,7 +1130,7 @@ func TestParseVisualStudioCopilotTraceSession_DoesNotDoubleCountTextPlusToolUsag }) + "\n" require.NoError(t, os.WriteFile(path, []byte(chatSpan+toolSpan), 0o644)) - sess, _, err := ParseVisualStudioCopilotSession( + sess, _, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1181,7 +1181,7 @@ func TestParseVisualStudioCopilotTraceSession_PrefersCompleteToolOnlyChatUsage(t require.NoError(t, os.WriteFile(path, []byte(chatSpan("200", "10")), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(chatSpan("500", "42")+toolSpan), 0o644)) - sess, _, err := ParseVisualStudioCopilotSession( + sess, _, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1214,7 +1214,7 @@ func TestParseVisualStudioCopilotTraceSession_ChatUsage(t *testing.T) { }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1262,7 +1262,7 @@ func TestParseVisualStudioCopilotTraceSession_StandardToolInputs(t *testing.T) { }, "\n") + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - _, msgs, err := ParseVisualStudioCopilotSession( + _, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1313,7 +1313,7 @@ func TestParseVisualStudioCopilotTraceSession_UsesSiblingPromptSpan(t *testing.T require.NoError(t, os.WriteFile(path, []byte(primaryData), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(siblingData), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1386,7 +1386,7 @@ func TestParseVisualStudioCopilotTraceSession_DeduplicatesPromptAndToolSpans(t * }, "\n") + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1430,7 +1430,7 @@ func TestParseVisualStudioCopilotTraceSession_ChatSummaryFallback(t *testing.T) }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1473,7 +1473,7 @@ func TestParseVisualStudioCopilotConversation_ParsesEachConversationIndependentl require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) // The prompt conversation parses with its user message. - promptSess, _, err := ParseVisualStudioCopilotConversation( + promptSess, _, err := parseVisualStudioCopilotTestConversation(t, path, promptID, "visualstudio", "local", ) require.NoError(t, err) @@ -1486,7 +1486,7 @@ func TestParseVisualStudioCopilotConversation_ParsesEachConversationIndependentl // The ambient conversation in the same file is not dropped; it // parses on its own with its invoke_agent turn. - ambientSess, ambientMsgs, err := ParseVisualStudioCopilotConversation( + ambientSess, ambientMsgs, err := parseVisualStudioCopilotTestConversation(t, path, ambientID, "visualstudio", "local", ) require.NoError(t, err) @@ -1517,13 +1517,13 @@ func TestFindVisualStudioCopilotSourceFile(t *testing.T) { []byte(traceLine+"\n"), 0o644)) assert.Equal(t, VisualStudioCopilotVirtualPath(newTrace, uuid), - FindVisualStudioCopilotSourceFile(tracesDir, uuid), + findVisualStudioCopilotTestSourceFile(t, tracesDir, uuid), "source lookup must return a conversation-scoped virtual path so a "+ "single-session resync does not reparse the whole trace file") assert.Equal(t, "", - FindVisualStudioCopilotSourceFile(dir, uuid)) + findVisualStudioCopilotTestSourceFile(t, dir, uuid)) assert.Equal(t, "", - FindVisualStudioCopilotSourceFile(tracesDir, "../etc/passwd")) + findVisualStudioCopilotTestSourceFile(t, tracesDir, "../etc/passwd")) } // TestWriteVisualStudioCopilotConversationJSONL verifies that exporting one diff --git a/internal/parser/vscode_copilot.go b/internal/parser/vscode_copilot.go index c93274b87..84beb8b3f 100644 --- a/internal/parser/vscode_copilot.go +++ b/internal/parser/vscode_copilot.go @@ -115,10 +115,10 @@ type vscodeCopilotWorkspace struct { Workspace string `json:"workspace"` } -// ParseVSCodeCopilotSession parses a VSCode Copilot chat -// session file (.json or .jsonl). Returns (nil, nil, nil) -// if the file is empty or contains no meaningful content. -func ParseVSCodeCopilotSession( +// parseSession parses a VSCode Copilot chat session file (.json or .jsonl). +// Returns (nil, nil, nil) if the file is empty or contains no meaningful +// content. +func (p *vscodeCopilotProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/vscode_copilot_provider.go b/internal/parser/vscode_copilot_provider.go new file mode 100644 index 000000000..bfecc74d0 --- /dev/null +++ b/internal/parser/vscode_copilot_provider.go @@ -0,0 +1,654 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "sort" + "strings" +) + +var _ Provider = (*vscodeCopilotProvider)(nil) + +type vscodeCopilotProviderFactory struct { + def AgentDef +} + +func newVSCodeCopilotProviderFactory(def AgentDef) ProviderFactory { + return vscodeCopilotProviderFactory{def: cloneAgentDef(def)} +} + +func (f vscodeCopilotProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f vscodeCopilotProviderFactory) Capabilities() Capabilities { + return vscodeCopilotProviderCapabilities() +} + +func (f vscodeCopilotProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &vscodeCopilotProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: vscodeCopilotProviderCapabilities(), + Config: cfg, + }, + sources: newVSCodeCopilotSourceSet(cfg.Roots), + } +} + +type vscodeCopilotProvider struct { + ProviderBase + sources vscodeCopilotSourceSet +} + +func (p *vscodeCopilotProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *vscodeCopilotProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *vscodeCopilotProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *vscodeCopilotProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *vscodeCopilotProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *vscodeCopilotProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, project, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("vscode copilot source path unavailable") + } + if req.Source.ProjectHint != "" { + project = req.Source.ProjectHint + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, project, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + sess.File.Size = req.Fingerprint.Size + sess.File.Mtime = req.Fingerprint.MTimeNS + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + UsageEvents: sess.UsageEvents, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type vscodeCopilotSource struct { + Root string + Path string + Project string +} + +type vscodeCopilotSourceSet struct { + roots []string +} + +func newVSCodeCopilotSourceSet(roots []string) vscodeCopilotSourceSet { + return vscodeCopilotSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s vscodeCopilotSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, file := range s.discoverSessionFiles(root) { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + source.ProjectHint = file.Project + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionFiles traverses the VSCode workspaceStorage directory to find +// chatSessions/*.json and *.jsonl files. When both formats exist for the same +// session UUID, the .jsonl file takes priority. It also checks +// globalStorage/emptyWindowChatSessions and transferredChatSessions. The root +// should point to e.g. +// +// ~/Library/Application Support/Code/User (macOS) +// ~/.config/Code/User (Linux) +func (s vscodeCopilotSourceSet) discoverSessionFiles( + vscodeUserDir string, +) []DiscoveredFile { + if vscodeUserDir == "" { + return nil + } + + var files []DiscoveredFile + + // 1. Scan workspaceStorage//chatSessions/*.{json,jsonl} + wsDir := filepath.Join(vscodeUserDir, "workspaceStorage") + hashDirs, err := os.ReadDir(wsDir) + if err == nil { + for _, entry := range hashDirs { + if !entry.IsDir() { + continue + } + + hashPath := filepath.Join(wsDir, entry.Name()) + chatDir := filepath.Join(hashPath, "chatSessions") + sessionFiles, err := os.ReadDir(chatDir) + if err != nil { + continue + } + + // Read workspace.json to get project name + project := ReadVSCodeWorkspaceManifest(hashPath) + if project == "" { + project = "unknown" + } + + files = append(files, + discoverVSCodeSessionFiles( + chatDir, sessionFiles, project, + AgentVSCodeCopilot, + )..., + ) + } + } + + // 2. Scan globalStorage/emptyWindowChatSessions/*.{json,jsonl} + for _, subdir := range []string{ + "globalStorage/emptyWindowChatSessions", + "globalStorage/transferredChatSessions", + } { + globalDir := filepath.Join(vscodeUserDir, subdir) + globalFiles, err := os.ReadDir(globalDir) + if err != nil { + continue + } + files = append(files, + discoverVSCodeSessionFiles( + globalDir, globalFiles, "empty-window", + AgentVSCodeCopilot, + )..., + ) + } + + sort.Slice(files, func(i, j int) bool { + return files[i].Path < files[j].Path + }) + return files +} + +func (s vscodeCopilotSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)*2) + for _, root := range s.roots { + workspace := filepath.Join(root, "workspaceStorage") + roots = append(roots, WatchRoot{ + Path: workspace, + Recursive: true, + IncludeGlobs: []string{"*.json", "*.jsonl"}, + DebounceKey: string(AgentVSCodeCopilot) + ":workspace:" + workspace, + }) + global := filepath.Join(root, "globalStorage") + roots = append(roots, WatchRoot{ + Path: global, + Recursive: true, + IncludeGlobs: []string{"*.json", "*.jsonl"}, + DebounceKey: string(AgentVSCodeCopilot) + ":global:" + global, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s vscodeCopilotSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + sources := s.sourcesForWorkspaceManifest(root, req.Path) + if len(sources) > 0 { + return sources, nil + } + source, ok := s.sourceRefForChangedPath(root, req) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s vscodeCopilotSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a VSCode Copilot session file by UUID (.jsonl +// preferred over .json) across workspaceStorage and the global session dirs. +func (s vscodeCopilotSourceSet) findSourceFile( + vscodeUserDir, rawID string, +) string { + if vscodeUserDir == "" || !IsValidSessionID(rawID) { + return "" + } + + // Search through workspaceStorage + wsDir := filepath.Join(vscodeUserDir, "workspaceStorage") + hashDirs, err := os.ReadDir(wsDir) + if err == nil { + for _, entry := range hashDirs { + if !entry.IsDir() { + continue + } + base := filepath.Join( + wsDir, entry.Name(), "chatSessions", + ) + // Prefer .jsonl + for _, ext := range []string{".jsonl", ".json"} { + candidate := filepath.Join( + base, rawID+ext, + ) + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + } + } + + // Check global dirs + for _, subdir := range []string{ + "globalStorage/emptyWindowChatSessions", + "globalStorage/transferredChatSessions", + } { + base := filepath.Join(vscodeUserDir, subdir) + for _, ext := range []string{".jsonl", ".json"} { + candidate := filepath.Join(base, rawID+ext) + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + } + + return "" +} + +func (s vscodeCopilotSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, _, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("vscode copilot source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + workspacePath := s.workspaceManifestForSource(path) + if workspacePath != "" { + if workspaceInfo, err := os.Stat(workspacePath); err == nil { + fingerprint.Size += workspaceInfo.Size() + if mtime := workspaceInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + } + } + fingerprint.Hash, err = vscodeCopilotSourceHash(path, workspacePath) + if err != nil { + return SourceFingerprint{}, err + } + return fingerprint, nil +} + +func (s vscodeCopilotSourceSet) pathFromSource(source SourceRef) (string, string, bool) { + switch src := source.Opaque.(type) { + case vscodeCopilotSource: + return src.Path, src.Project, src.Path != "" + case *vscodeCopilotSource: + if src != nil && src.Path != "" { + return src.Path, src.Project, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(vscodeCopilotSource) + return src.Path, src.Project, true + } + } + } + return "", "", false +} + +func (s vscodeCopilotSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 4 && + parts[0] == "workspaceStorage" && + parts[2] == "chatSessions" && + isVSCodeCopilotSessionPath(parts[3]) { + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + if !IsRegularFile(path) { + return SourceRef{}, false + } + hashDir := filepath.Join(root, "workspaceStorage", parts[1]) + project := ReadVSCodeWorkspaceManifest(hashDir) + if project == "" { + project = "unknown" + } + return s.newSourceRef(root, path, project), true + } + if len(parts) == 3 && + parts[0] == "globalStorage" && + (parts[1] == "emptyWindowChatSessions" || + parts[1] == "transferredChatSessions") && + isVSCodeCopilotSessionPath(parts[2]) { + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + if !IsRegularFile(path) { + return SourceRef{}, false + } + return s.newSourceRef(root, path, "empty-window"), true + } + return SourceRef{}, false +} + +func (s vscodeCopilotSourceSet) sourceRefForChangedPath( + root string, + req ChangedPathRequest, +) (SourceRef, bool) { + path := req.Path + if req.EventKind != "remove" && vscodeCopilotJSONLPreferredOver(path) { + return SourceRef{}, false + } + if source, ok := s.sourceRef(root, path); ok { + return source, true + } + return s.syntheticSourceRef(root, path) +} + +func (s vscodeCopilotSourceSet) syntheticSourceRef( + root, path string, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 4 && + parts[0] == "workspaceStorage" && + parts[2] == "chatSessions" && + isVSCodeCopilotSessionPath(parts[3]) { + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + hashDir := filepath.Join(root, "workspaceStorage", parts[1]) + project := ReadVSCodeWorkspaceManifest(hashDir) + if project == "" { + project = "unknown" + } + return s.newSourceRef(root, path, project), true + } + if len(parts) == 3 && + parts[0] == "globalStorage" && + (parts[1] == "emptyWindowChatSessions" || + parts[1] == "transferredChatSessions") && + isVSCodeCopilotSessionPath(parts[2]) { + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + return s.newSourceRef(root, path, "empty-window"), true + } + return SourceRef{}, false +} + +func (s vscodeCopilotSourceSet) sourcesForWorkspaceManifest( + root, path string, +) []SourceRef { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return nil + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) != 3 || + parts[0] != "workspaceStorage" || + parts[2] != "workspace.json" { + return nil + } + hashDir := filepath.Join(root, "workspaceStorage", parts[1]) + chatDir := filepath.Join(hashDir, "chatSessions") + entries, err := os.ReadDir(chatDir) + if err != nil { + return nil + } + project := ReadVSCodeWorkspaceManifest(hashDir) + if project == "" { + project = "unknown" + } + files := discoverVSCodeSessionFiles( + chatDir, entries, project, AgentVSCodeCopilot, + ) + sources := make([]SourceRef, 0, len(files)) + seen := make(map[string]struct{}, len(files)) + for _, file := range files { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + source.ProjectHint = file.Project + addJSONLSource(source, &sources, seen) + } + sortJSONLSources(sources) + return sources +} + +func (s vscodeCopilotSourceSet) workspaceManifestForSource(path string) string { + for _, root := range s.roots { + root = filepath.Clean(root) + rel, ok := relUnder(root, path) + if !ok { + continue + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 4 && + parts[0] == "workspaceStorage" && + parts[2] == "chatSessions" && + isVSCodeCopilotSessionPath(parts[3]) { + workspacePath := filepath.Join( + root, + "workspaceStorage", + parts[1], + "workspace.json", + ) + if IsRegularFile(workspacePath) { + return workspacePath + } + } + } + return "" +} + +func (s vscodeCopilotSourceSet) newSourceRef(root, path, project string) SourceRef { + return SourceRef{ + Provider: AgentVSCodeCopilot, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: vscodeCopilotSource{ + Root: root, + Path: path, + Project: project, + }, + } +} + +func isVSCodeCopilotSessionPath(name string) bool { + return strings.HasSuffix(name, ".json") || strings.HasSuffix(name, ".jsonl") +} + +func vscodeCopilotPreferredExistingPath(path string) string { + if base, ok := strings.CutSuffix(path, ".json"); ok { + candidate := base + ".jsonl" + if IsRegularFile(candidate) { + return candidate + } + } + if IsRegularFile(path) { + return path + } + if base, ok := strings.CutSuffix(path, ".jsonl"); ok { + candidate := base + ".json" + if IsRegularFile(candidate) { + return candidate + } + } + return "" +} + +func vscodeCopilotJSONLPreferredOver(path string) bool { + base, ok := strings.CutSuffix(path, ".json") + if !ok { + return false + } + return IsRegularFile(base + ".jsonl") +} + +func vscodeCopilotSourceHash(path, workspacePath string) (string, error) { + hash, err := hashJSONLSourceFile(path) + if err != nil { + return "", err + } + if workspacePath == "" { + return hash, nil + } + workspaceHash, err := hashJSONLSourceFile(workspacePath) + if err != nil { + return "", err + } + h := sha256.New() + _, _ = h.Write([]byte("chat\x00" + hash + "\x00workspace\x00" + workspaceHash)) + return fmt.Sprintf("%x", h.Sum(nil)), nil +} + +func vscodeCopilotProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + Thinking: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/vscode_copilot_test.go b/internal/parser/vscode_copilot_test.go index bf73482ac..e7fbd3782 100644 --- a/internal/parser/vscode_copilot_test.go +++ b/internal/parser/vscode_copilot_test.go @@ -127,7 +127,7 @@ func TestParseVSCodeCopilotSession(t *testing.T) { path, []byte(tt.json), 0644, )) - sess, msgs, err := ParseVSCodeCopilotSession( + sess, msgs, err := parseVSCodeCopilotTestSession(t, path, "testproject", "local", ) require.NoError(t, err) @@ -166,7 +166,7 @@ func TestParseVSCodeCopilotSession(t *testing.T) { } func TestParseVSCodeCopilotSession_NonExistent(t *testing.T) { - sess, msgs, err := ParseVSCodeCopilotSession( + sess, msgs, err := parseVSCodeCopilotTestSession(t, "/nonexistent/path.json", "proj", "local", ) require.NoError(t, err, "expected nil error") @@ -196,7 +196,7 @@ func TestParseVSCodeCopilotSession_MixedTextAndTools(t *testing.T) { path := filepath.Join(dir, "test.json") require.NoError(t, os.WriteFile(path, []byte(data), 0644)) - _, msgs, err := ParseVSCodeCopilotSession(path, "proj", "local") + _, msgs, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) // Find assistant message @@ -242,7 +242,7 @@ func TestParseVSCodeCopilotSession_TerminalToolData(t *testing.T) { path := filepath.Join(dir, "test.json") require.NoError(t, os.WriteFile(path, []byte(data), 0644)) - _, msgs, err := ParseVSCodeCopilotSession(path, "proj", "local") + _, msgs, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) var assistant *ParsedMessage @@ -328,7 +328,7 @@ func TestDiscoverVSCodeCopilotSessions(t *testing.T) { globalPath := filepath.Join(globalDir, "global-sess.json") require.NoError(t, os.WriteFile(globalPath, []byte(sessionJSON), 0644)) - files := DiscoverVSCodeCopilotSessions(root) + files := discoverVSCodeCopilotTestSessions(t, root) require.Len(t, files, 2) @@ -518,7 +518,7 @@ func TestParseVSCodeCopilotSession_JSONL(t *testing.T) { path, []byte(content), 0644, )) - sess, msgs, err := ParseVSCodeCopilotSession( + sess, msgs, err := parseVSCodeCopilotTestSession(t, path, "testproject", "local", ) require.NoError(t, err) @@ -722,7 +722,7 @@ func TestDiscoverVSCodeCopilot_JSONLDedup(t *testing.T) { []byte(sessionJSON), 0644, )) - files := DiscoverVSCodeCopilotSessions(root) + files := discoverVSCodeCopilotTestSessions(t, root) // Should get 3 files: dup1.jsonl, only-jsonl.jsonl, only-json.json if !assert.Len(t, files, 3, "expected 3 files") { @@ -767,7 +767,7 @@ func TestFindVSCodeCopilotSourceFile(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := FindVSCodeCopilotSourceFile( + got := findVSCodeCopilotTestSourceFile(t, tt.dir, tt.id, ) assert.Equal(t, tt.want, got) @@ -823,7 +823,7 @@ func TestParseVSCodeCopilotSession_TokenUsage(t *testing.T) { path := filepath.Join(dir, "usage.json") require.NoError(t, os.WriteFile(path, []byte(sessionJSON), 0644)) - sess, _, err := ParseVSCodeCopilotSession(path, "proj", "local") + sess, _, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -871,7 +871,7 @@ func TestParseVSCodeCopilotSession_TokenUsageModelFallback(t *testing.T) { path := filepath.Join(dir, "usage2.json") require.NoError(t, os.WriteFile(path, []byte(sessionJSON), 0644)) - sess, _, err := ParseVSCodeCopilotSession(path, "proj", "local") + sess, _, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -898,7 +898,7 @@ func TestParseVSCodeCopilotSession_NoTokenUsage(t *testing.T) { path := filepath.Join(dir, "nousage.json") require.NoError(t, os.WriteFile(path, []byte(sessionJSON), 0644)) - sess, _, err := ParseVSCodeCopilotSession(path, "proj", "local") + sess, _, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) require.NotNil(t, sess) diff --git a/internal/service/direct.go b/internal/service/direct.go index 5809a91b6..2cc253603 100644 --- a/internal/service/direct.go +++ b/internal/service/direct.go @@ -311,7 +311,7 @@ func (b *directBackend) Sync( // conversation lives on in a sibling. The single-session path keeps the // conversation scope and follows it across sibling trace files. if _, _, ok := - parser.ParseVisualStudioCopilotVirtualPath(storedPath); ok { + parser.SplitVisualStudioCopilotVirtualPath(storedPath); ok { if err := b.engine.SyncSingleSessionContext( ctx, in.ID, ); err != nil { diff --git a/internal/sync/engine.go b/internal/sync/engine.go index cd7033a71..8198c185c 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -380,7 +380,7 @@ func IsVisualStudioCopilotSkipPath(path string) bool { if parser.IsVisualStudioCopilotTraceFile(path) { return true } - _, _, ok := parser.ParseVisualStudioCopilotVirtualPath(path) + _, _, ok := parser.SplitVisualStudioCopilotVirtualPath(path) return ok } @@ -1004,60 +1004,6 @@ func (e *Engine) classifyOnePath( // shapes, so the legacy block was removed when Claude was folded // onto its provider. - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} - // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} - for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { - if vscDir == "" { - continue - } - if rel, ok := isUnder(vscDir, path); ok { - parts := strings.Split(rel, sep) - // workspaceStorage//chatSessions/.{json,jsonl} - if len(parts) == 4 && - parts[0] == "workspaceStorage" && - parts[2] == "chatSessions" && - (strings.HasSuffix(parts[3], ".json") || - strings.HasSuffix(parts[3], ".jsonl")) { - if vscodeJSONLSiblingExists(path) { - continue - } - hashDir := filepath.Join( - vscDir, "workspaceStorage", parts[1], - ) - project := parser.ReadVSCodeWorkspaceManifest(hashDir) - if project == "" { - project = "unknown" - } - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentVSCodeCopilot, - }, true - } - // globalStorage/emptyWindowChatSessions/.{json,jsonl} - // globalStorage/transferredChatSessions/.{json,jsonl} - if len(parts) == 3 && - parts[0] == "globalStorage" && - (parts[1] == "emptyWindowChatSessions" || parts[1] == "transferredChatSessions") && - (strings.HasSuffix(parts[2], ".json") || - strings.HasSuffix(parts[2], ".jsonl")) { - if vscodeJSONLSiblingExists(path) { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: "empty-window", - Agent: parser.AgentVSCodeCopilot, - }, true - } - } - } - - // Visual Studio Copilot: /*_VSGitHubCopilot_traces.jsonl - if df, ok := e.classifyVisualStudioCopilotPath(path, sep); ok { - return df, true - } - if df, ok := e.classifyAiderPath(path); ok { return df, true } @@ -1164,37 +1110,6 @@ func (e *Engine) classifyOnePath( return parser.DiscoveredFile{}, false } -// classifyVisualStudioCopilotPath matches a top-level Visual Studio Copilot -// trace file (/*_VSGitHubCopilot_traces.jsonl) under a configured -// trace directory. Trace files live directly in the directory, so nested -// paths are rejected. Split out of classifyOnePath to keep that function -// within NilAway's per-function size limit. -func (e *Engine) classifyVisualStudioCopilotPath( - path, sep string, -) (parser.DiscoveredFile, bool) { - if !parser.IsVisualStudioCopilotTraceFile(path) { - return parser.DiscoveredFile{}, false - } - for _, vsDir := range e.agentDirs[parser.AgentVSCopilot] { - if vsDir == "" { - continue - } - rel, ok := isUnder(vsDir, path) - if !ok { - continue - } - if strings.Contains(rel, sep) { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: "visualstudio", - Agent: parser.AgentVSCopilot, - }, true - } - return parser.DiscoveredFile{}, false -} - // classifyAiderPath handles Aider's rootless chat-history layout: // // /.../.aider.chat.history.md @@ -1450,18 +1365,6 @@ func (e *Engine) classifyShelleySQLitePath( return parser.DiscoveredFile{}, false } -// vscodeJSONLSiblingExists returns true when path is a .json -// file and a .jsonl sibling exists for the same UUID. This -// mirrors the dedup logic in DiscoverVSCodeCopilotSessions. -func vscodeJSONLSiblingExists(path string) bool { - base, ok := strings.CutSuffix(path, ".json") - if !ok { - return false - } - _, err := os.Stat(base + ".jsonl") - return err == nil -} - // resyncTempSuffix is appended to the original DB path to // form the temp database path during resync. const resyncTempSuffix = "-resync" @@ -3875,8 +3778,6 @@ func (e *Engine) processFile( statPath = dbPath } else if dbPath, _, ok := parser.ParseShelleyVirtualPath(file.Path); ok { statPath = dbPath - } else if tracePath, _, ok := parser.ParseVisualStudioCopilotVirtualPath(file.Path); ok { - statPath = tracePath } else if historyPath, _, ok := parser.ParseAiderVirtualPath(file.Path); ok { // aider stores "#"; stat the physical file // so SyncSingleSession (live watcher / on-demand re-sync) works. @@ -3950,10 +3851,6 @@ func (e *Engine) processFile( res = e.processS3Session(ctx, file, info) case parser.AgentReasonix: res = e.processReasonix(file, info) - case parser.AgentVSCodeCopilot: - res = e.processVSCodeCopilot(file, info) - case parser.AgentVSCopilot: - res = e.processVisualStudioCopilot(file, info) case parser.AgentKiro: res = e.processKiro(file, info) case parser.AgentKiroIDE: @@ -4582,7 +4479,7 @@ func (e *Engine) shouldCacheSkip( return false } if _, _, ok := - parser.ParseVisualStudioCopilotVirtualPath(file.Path); ok { + parser.SplitVisualStudioCopilotVirtualPath(file.Path); ok { return false } } @@ -5895,112 +5792,6 @@ func reasonixEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -func (e *Engine) processVSCodeCopilot( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseVSCodeCopilotSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - { - Session: *sess, - Messages: msgs, - UsageEvents: sess.UsageEvents, - }, - }, - } -} - -func (e *Engine) processVisualStudioCopilot( - file parser.DiscoveredFile, _ os.FileInfo, -) processResult { - // Resolve the physical trace path first. Discovery emits one - // # work item per conversation; a watcher event - // or single-session resync may instead pass a real trace file, which can - // hold spans for several conversations. - tracePath := file.Path - var conversationIDs []string - if resolved, conversationID, ok := - parser.ParseVisualStudioCopilotVirtualPath(file.Path); ok { - tracePath = resolved - conversationIDs = []string{conversationID} - } - - // Skip on a fingerprint spanning every sibling trace file: a - // conversation's transcript is rebuilt from all of them, so a change to any - // sibling must defeat the skip even when the representative trace file is - // unchanged. The primary-file stat alone would let a single-session resync - // or watch fallback leave a session stale. - size, mtime, err := parser.VisualStudioCopilotTraceFingerprintStrict( - tracePath, - ) - if err != nil { - return processResult{err: err, noCacheSkip: true} - } - if e.shouldSkipByPath( - file.Path, fakeSnapshotInfo{fSize: size, fMtime: mtime}, - ) { - return processResult{skip: true} - } - - // A real trace file can hold spans for several conversations, so enumerate - // them and emit each independently. - if conversationIDs == nil { - ids, err := parser.VisualStudioCopilotFileConversationIDs(file.Path) - if err != nil { - return processResult{err: err, noCacheSkip: true} - } - conversationIDs = ids - } - - hash, hashErr := ComputeFileHash(tracePath) - - var results []parser.ParseResult - for _, conversationID := range conversationIDs { - sess, msgs, err := parser.ParseVisualStudioCopilotConversation( - tracePath, conversationID, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err, noCacheSkip: true} - } - if sess == nil { - continue - } - if hashErr == nil { - sess.File.Hash = hash - } - results = append(results, parser.ParseResult{ - Session: *sess, Messages: msgs, - }) - } - - // forceReplace mirrors the other multi-session-per-source agents - // (Zed, Kiro): each conversation's messages are fully re-derived from - // all of its spans on every parse, so existing rows must be replaced - // rather than appended. - return processResult{ - results: results, - forceReplace: true, - } -} - func (e *Engine) processZed( file parser.DiscoveredFile, info os.FileInfo, ) processResult { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index c6a074e48..d33e15328 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -7115,6 +7115,143 @@ func TestSyncPathsVSCodeCopilotJSONLPriority(t *testing.T) { assert.Equal(t, 0, len(page.Sessions), "expected 0 sessions (.json skipped), got %d", len(page.Sessions)) } +func TestSyncPathsVSCodeCopilotWorkspaceMetadataRefreshesProject(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + vscDir := filepath.Join(dir, "vscode") + hashDir := filepath.Join(vscDir, "workspaceStorage", "abc123") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentVSCodeCopilot: {vscDir}, + }, + Machine: "local", + }) + + writeWorkspace := func(name string) { + t.Helper() + dbtest.WriteTestFile(t, workspacePath, fmt.Appendf(nil, + `{"folder":"file:///Users/alice/code/%s"}`, + name, + )) + } + + uuid := "bbbbbbbb-cccc-dddd-eeee-ffffffffffff" + session := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + + writeWorkspace("one") + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+session+`}`), + ) + + engine.SyncPaths([]string{jsonlPath}) + assertSessionState( + t, database, "vscode-copilot:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + info, err := os.Stat(jsonlPath) + require.NoError(t, err, "stat vscode copilot session") + engine.InjectSkipCache(map[string]int64{ + jsonlPath: info.ModTime().UnixNano(), + }) + + writeWorkspace("two") + engine.SyncPaths([]string{workspacePath}) + assertSessionState( + t, database, "vscode-copilot:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) + + writeWorkspace("three") + engine.SyncPaths([]string{jsonlPath, workspacePath}) + assertSessionState( + t, database, "vscode-copilot:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "three", sess.Project) + }, + ) +} + +func TestSyncPathsVSCodeCopilotPersistsUsageEvents(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + vscDir := filepath.Join(dir, "vscode") + chatDir := filepath.Join( + vscDir, "workspaceStorage", "abc123", + "chatSessions", + ) + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentVSCodeCopilot: {vscDir}, + }, + Machine: "local", + }) + + uuid := "cccccccc-dddd-eeee-ffff-000000000000" + session := fmt.Sprintf( + `{"version":3,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000,`+ + `"modelId":"copilot/claude-opus-4.8",`+ + `"result":{"metadata":{`+ + `"promptTokens":12,`+ + `"outputTokens":3,`+ + `"resolvedModel":"claude-opus-4-8"}}}]}`, + uuid, + ) + jsonPath := filepath.Join(chatDir, uuid+".json") + dbtest.WriteTestFile(t, jsonPath, []byte(session)) + + engine.SyncPaths([]string{jsonPath}) + + ctx := context.Background() + sessionID := "vscode-copilot:" + uuid + events, err := database.GetUsageEvents(ctx, sessionID) + require.NoError(t, err) + require.Len(t, events, 1) + assert.Equal(t, "vscode-copilot", events[0].Source) + assert.Equal(t, "claude-opus-4-8", events[0].Model) + assert.Equal(t, 12, events[0].InputTokens) + assert.Equal(t, 3, events[0].OutputTokens) + + require.NoError(t, engine.SyncSingleSession(sessionID)) + events, err = database.GetUsageEvents(ctx, sessionID) + require.NoError(t, err) + require.Len(t, events, 1) + assert.Equal(t, "claude-opus-4-8", events[0].Model) +} + func TestPiSessionIntegration(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") diff --git a/internal/sync/parsediff.go b/internal/sync/parsediff.go index ef218aba7..92f950882 100644 --- a/internal/sync/parsediff.go +++ b/internal/sync/parsediff.go @@ -445,7 +445,7 @@ func isOpenCodeFamilyProviderVirtualSource(path string) bool { // shared trace file, and the "#runIdx" suffix aider appends to its shared // history file. func stripVirtualSourceSuffix(path string) string { - if tracePath, _, ok := parser.ParseVisualStudioCopilotVirtualPath(path); ok { + if tracePath, _, ok := parser.SplitVisualStudioCopilotVirtualPath(path); ok { return tracePath } if historyPath, _, ok := parser.ParseAiderVirtualPath(path); ok { diff --git a/internal/sync/visualstudio_copilot_integration_test.go b/internal/sync/visualstudio_copilot_integration_test.go index c2b3113ed..bb3fc545d 100644 --- a/internal/sync/visualstudio_copilot_integration_test.go +++ b/internal/sync/visualstudio_copilot_integration_test.go @@ -246,8 +246,8 @@ func TestFindSourceFileVisualStudioCopilotReturnsVirtualPath(t *testing.T) { } // TestSyncSingleSessionContextVisualStudioCopilotPreservesProject verifies that -// a single-session re-sync keeps the session's visualstudio project rather than -// overwriting it with an empty string. +// a single-session re-sync keeps the stored project rather than overwriting it +// with the provider's default project. func TestSyncSingleSessionContextVisualStudioCopilotPreservesProject(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") @@ -271,6 +271,8 @@ func TestSyncSingleSessionContextVisualStudioCopilotPreservesProject(t *testing. require.NoError(t, err) require.NotNil(t, before) require.Equal(t, "visualstudio", before.Project) + before.Project = "stored-solution" + require.NoError(t, database.UpsertSession(*before)) require.NoError(t, engine.SyncSingleSessionContext( context.Background(), sessionID, @@ -279,8 +281,8 @@ func TestSyncSingleSessionContextVisualStudioCopilotPreservesProject(t *testing. after, err := database.GetSession(context.Background(), sessionID) require.NoError(t, err) require.NotNil(t, after) - assert.Equal(t, "visualstudio", after.Project, - "single-session re-sync must preserve the visualstudio project") + assert.Equal(t, "stored-solution", after.Project, + "single-session re-sync must preserve the stored project") } // TestSyncEngineVisualStudioCopilotUnreadableSiblingBlocksPartialSession From 8fd9bcd69ddb16439eeb5bd4a08e7fcf0f6a09d2 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Tue, 23 Jun 2026 11:15:35 -0400 Subject: [PATCH 04/10] feat(parser): migrate positron provider Fold Positron onto a concrete provider-authoritative implementation and delete the duplicated legacy parser path so there is a single source of truth for its workspaceStorage-only layout and parse behavior. Discovery, source lookup, and parse move onto the provider; the package-level DiscoverPositronSessions, FindPositronSourceFile, and ParsePositronSession free functions are removed and positron.go is deleted. The engine's positron-specific dispatch, effective-mtime, and skip-cache blocks are removed in favor of the provider Fingerprint, which folds workspace.json size, mtime, and a chat+workspace composite hash into the source fingerprint so a workspace-only project rename still re-syncs. To keep that composite freshness once positron has no legacy mtime block, the SyncAllSince mtime filter resolves provider-authoritative sources through the provider Fingerprint (discoveredFileEffectiveMtime) instead of the legacy per-agent mtime path. Codex is excluded from that path: its Fingerprint folds the shared session_index.jsonl mtime into every session, which is correct for the skip cache but defeats the per-copy mtime discrimination the incremental-sync cutoff needs to preserve a changed archived duplicate, so codex keeps its raw per-file mtime and the index refresh stays handled separately by codexIndexRefresh. The OpenCode incremental-sync test asserts the resulting composite freshness, where a part-only edit advances the source mtime past the cutoff and re-syncs. --- internal/parser/discovery.go | 4 +- internal/parser/positron.go | 148 ------ internal/parser/positron_provider.go | 574 ++++++++++++++++++++++ internal/parser/positron_provider_test.go | 134 +++++ internal/parser/positron_test.go | 31 +- internal/parser/provider.go | 10 +- internal/parser/provider_migration.go | 2 +- internal/parser/types.go | 8 +- internal/sync/engine.go | 69 ++- internal/sync/engine_integration_test.go | 261 ++++++++++ 10 files changed, 1035 insertions(+), 206 deletions(-) delete mode 100644 internal/parser/positron.go create mode 100644 internal/parser/positron_provider.go create mode 100644 internal/parser/positron_provider_test.go diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index dc2d288b7..77484729e 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -1035,7 +1035,7 @@ func discoverVSCodeSessionFiles( files = append(files, DiscoveredFile{ Path: filepath.Join(dir, name), Project: project, - Agent: AgentVSCodeCopilot, + Agent: agent, }) } else if uuid, ok := strings.CutSuffix(name, ".json"); ok { // Skip .json if a .jsonl exists for the same UUID @@ -1045,7 +1045,7 @@ func discoverVSCodeSessionFiles( files = append(files, DiscoveredFile{ Path: filepath.Join(dir, name), Project: project, - Agent: AgentVSCodeCopilot, + Agent: agent, }) } } diff --git a/internal/parser/positron.go b/internal/parser/positron.go deleted file mode 100644 index 25d3739b4..000000000 --- a/internal/parser/positron.go +++ /dev/null @@ -1,148 +0,0 @@ -package parser - -import ( - "fmt" - "os" - "path/filepath" - "strings" -) - -// ParsePositronSession parses a Positron Assistant chat session -// file. The format is identical to VSCode Copilot sessions. -// Returns (nil, nil, nil) if the file is empty or contains no -// meaningful content. -func ParsePositronSession( - path, project, machine string, -) (*ParsedSession, []ParsedMessage, error) { - info, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return nil, nil, nil - } - return nil, nil, fmt.Errorf("stat %s: %w", path, err) - } - - var data []byte - if strings.HasSuffix(path, ".jsonl") { - data, err = reconstructJSONL(path) - } else { - data, err = os.ReadFile(path) - } - if err != nil { - return nil, nil, fmt.Errorf("read %s: %w", path, err) - } - if len(data) == 0 { - return nil, nil, nil - } - - // Reuse VSCode Copilot parsing logic since formats are identical - sess, msgs, err := parseVSCodeCopilotData( - data, path, project, machine, - ) - if err != nil { - return nil, nil, err - } - if sess == nil { - return nil, nil, nil - } - - // Override agent type and ID prefix for Positron - sess.Agent = AgentPositron - sess.ID = "positron:" + sess.ID - - sess.File = FileInfo{ - Path: path, - Size: info.Size(), - Mtime: info.ModTime().UnixNano(), - } - - return sess, msgs, nil -} - -// DiscoverPositronSessions finds all chat session files under the -// Positron User directory. The structure mirrors VSCode: -// /workspaceStorage//chatSessions/.json -func DiscoverPositronSessions(userDir string) []DiscoveredFile { - if userDir == "" { - return nil - } - - var files []DiscoveredFile - - // Scan workspaceStorage//chatSessions/*.{json,jsonl} - wsDir := filepath.Join(userDir, "workspaceStorage") - hashDirs, err := os.ReadDir(wsDir) - if err != nil { - return nil - } - - for _, entry := range hashDirs { - if !entry.IsDir() { - continue - } - - hashPath := filepath.Join(wsDir, entry.Name()) - chatDir := filepath.Join(hashPath, "chatSessions") - sessionFiles, err := os.ReadDir(chatDir) - if err != nil { - continue - } - - // Read workspace.json to get project name - project := ReadVSCodeWorkspaceManifest(hashPath) - if project == "" { - project = "unknown" - } - - for _, f := range sessionFiles { - if f.IsDir() { - continue - } - name := f.Name() - if !strings.HasSuffix(name, ".json") && - !strings.HasSuffix(name, ".jsonl") { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(chatDir, name), - Project: project, - Agent: AgentPositron, - }) - } - } - - return files -} - -// FindPositronSourceFile locates a Positron session file by its -// raw ID (prefix already stripped). -func FindPositronSourceFile(userDir, rawID string) string { - if userDir == "" || !IsValidSessionID(rawID) { - return "" - } - - // Search through workspaceStorage - wsDir := filepath.Join(userDir, "workspaceStorage") - hashDirs, err := os.ReadDir(wsDir) - if err != nil { - return "" - } - - for _, entry := range hashDirs { - if !entry.IsDir() { - continue - } - base := filepath.Join( - wsDir, entry.Name(), "chatSessions", - ) - // Prefer .jsonl over .json - for _, ext := range []string{".jsonl", ".json"} { - candidate := filepath.Join(base, rawID+ext) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - } - - return "" -} diff --git a/internal/parser/positron_provider.go b/internal/parser/positron_provider.go new file mode 100644 index 000000000..81f47f287 --- /dev/null +++ b/internal/parser/positron_provider.go @@ -0,0 +1,574 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*positronProvider)(nil) + +type positronProviderFactory struct { + def AgentDef +} + +func newPositronProviderFactory(def AgentDef) ProviderFactory { + return positronProviderFactory{def: cloneAgentDef(def)} +} + +func (f positronProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f positronProviderFactory) Capabilities() Capabilities { + return positronProviderCapabilities() +} + +func (f positronProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &positronProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: positronProviderCapabilities(), + Config: cfg, + }, + sources: newPositronSourceSet(cfg.Roots), + } +} + +type positronProvider struct { + ProviderBase + sources positronSourceSet +} + +func (p *positronProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *positronProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *positronProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *positronProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *positronProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *positronProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, project, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("positron source path unavailable") + } + if req.Source.ProjectHint != "" { + project = req.Source.ProjectHint + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, project, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +// parseSession parses a Positron Assistant chat session file. The format is +// identical to VSCode Copilot sessions. Returns (nil, nil, nil) if the file is +// empty or contains no meaningful content. +func (p *positronProvider) parseSession( + path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + info, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil, nil + } + return nil, nil, fmt.Errorf("stat %s: %w", path, err) + } + + var data []byte + if strings.HasSuffix(path, ".jsonl") { + data, err = reconstructJSONL(path) + } else { + data, err = os.ReadFile(path) + } + if err != nil { + return nil, nil, fmt.Errorf("read %s: %w", path, err) + } + if len(data) == 0 { + return nil, nil, nil + } + + // Reuse VSCode Copilot parsing logic since formats are identical. + sess, msgs, err := parseVSCodeCopilotData(data, path, project, machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + + // Override agent type and ID prefix for Positron. + sess.Agent = AgentPositron + sess.ID = "positron:" + sess.ID + + sess.File = FileInfo{ + Path: path, + Size: info.Size(), + Mtime: info.ModTime().UnixNano(), + } + + return sess, msgs, nil +} + +type positronSource struct { + Root string + Path string + Project string +} + +type positronSourceSet struct { + roots []string +} + +func newPositronSourceSet(roots []string) positronSourceSet { + return positronSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s positronSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, file := range s.discoverSessions(root) { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + source.ProjectHint = file.Project + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessions finds all chat session files under a Positron User +// directory. The structure mirrors VSCode: +// /workspaceStorage//chatSessions/.{json,jsonl}. When a +// .jsonl and .json sibling exist for the same UUID, the .jsonl is preferred. +func (s positronSourceSet) discoverSessions(userDir string) []DiscoveredFile { + if userDir == "" { + return nil + } + + var files []DiscoveredFile + + // Scan workspaceStorage//chatSessions/*.{json,jsonl}. + wsDir := filepath.Join(userDir, "workspaceStorage") + hashDirs, err := os.ReadDir(wsDir) + if err != nil { + return nil + } + + for _, entry := range hashDirs { + if !entry.IsDir() { + continue + } + + hashPath := filepath.Join(wsDir, entry.Name()) + chatDir := filepath.Join(hashPath, "chatSessions") + sessionFiles, err := os.ReadDir(chatDir) + if err != nil { + continue + } + + project := positronWorkspaceProject(userDir, entry.Name()) + files = append(files, + discoverVSCodeSessionFiles( + chatDir, sessionFiles, project, AgentPositron, + )..., + ) + } + + return files +} + +// findSourceFile locates a Positron session file by its raw ID (prefix already +// stripped), preferring .jsonl over .json. Returns "" when no matching file +// exists. +func (s positronSourceSet) findSourceFile(userDir, rawID string) string { + if userDir == "" || !IsValidSessionID(rawID) { + return "" + } + + wsDir := filepath.Join(userDir, "workspaceStorage") + hashDirs, err := os.ReadDir(wsDir) + if err != nil { + return "" + } + + for _, entry := range hashDirs { + if !entry.IsDir() { + continue + } + base := filepath.Join(wsDir, entry.Name(), "chatSessions") + // Prefer .jsonl over .json. + for _, ext := range []string{".jsonl", ".json"} { + candidate := filepath.Join(base, rawID+ext) + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + } + + return "" +} + +func (s positronSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + workspace := filepath.Join(root, "workspaceStorage") + roots = append(roots, WatchRoot{ + Path: workspace, + Recursive: true, + IncludeGlobs: []string{"*.json", "*.jsonl"}, + DebounceKey: string(AgentPositron) + ":workspace:" + workspace, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s positronSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + sources := s.sourcesForWorkspaceManifest(root, req.Path) + if len(sources) > 0 { + return sources, nil + } + source, ok := s.sourceRefForChangedPath(root, req) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s positronSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func (s positronSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, _, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("positron source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + workspacePath := s.workspaceManifestForSource(path) + if workspacePath != "" { + if workspaceInfo, err := os.Stat(workspacePath); err == nil { + fingerprint.Size += workspaceInfo.Size() + if mtime := workspaceInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + } + } + fingerprint.Hash, err = vscodeCopilotSourceHash(path, workspacePath) + if err != nil { + return SourceFingerprint{}, err + } + return fingerprint, nil +} + +func (s positronSourceSet) pathFromSource(source SourceRef) (string, string, bool) { + switch src := source.Opaque.(type) { + case positronSource: + return src.Path, src.Project, src.Path != "" + case *positronSource: + if src != nil && src.Path != "" { + return src.Path, src.Project, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(positronSource) + return src.Path, src.Project, true + } + } + } + return "", "", false +} + +func (s positronSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) != 4 || + parts[0] != "workspaceStorage" || + parts[2] != "chatSessions" || + !isVSCodeCopilotSessionPath(parts[3]) { + return SourceRef{}, false + } + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + if !IsRegularFile(path) { + return SourceRef{}, false + } + project := positronWorkspaceProject(root, parts[1]) + return s.newSourceRef(root, path, project), true +} + +func (s positronSourceSet) sourceRefForChangedPath( + root string, + req ChangedPathRequest, +) (SourceRef, bool) { + path := req.Path + if req.EventKind != "remove" && vscodeCopilotJSONLPreferredOver(path) { + return SourceRef{}, false + } + if source, ok := s.sourceRef(root, path); ok { + return source, true + } + return s.syntheticSourceRef(root, path) +} + +func (s positronSourceSet) syntheticSourceRef( + root, path string, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) != 4 || + parts[0] != "workspaceStorage" || + parts[2] != "chatSessions" || + !isVSCodeCopilotSessionPath(parts[3]) { + return SourceRef{}, false + } + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + project := positronWorkspaceProject(root, parts[1]) + return s.newSourceRef(root, path, project), true +} + +func (s positronSourceSet) sourcesForWorkspaceManifest( + root, path string, +) []SourceRef { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return nil + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) != 3 || + parts[0] != "workspaceStorage" || + parts[2] != "workspace.json" { + return nil + } + hashDir := filepath.Join(root, "workspaceStorage", parts[1]) + chatDir := filepath.Join(hashDir, "chatSessions") + entries, err := os.ReadDir(chatDir) + if err != nil { + return nil + } + project := positronWorkspaceProject(root, parts[1]) + files := discoverVSCodeSessionFiles(chatDir, entries, project, AgentPositron) + sources := make([]SourceRef, 0, len(files)) + seen := make(map[string]struct{}, len(files)) + for _, file := range files { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + source.Provider = AgentPositron + source.ProjectHint = file.Project + addJSONLSource(source, &sources, seen) + } + sortJSONLSources(sources) + return sources +} + +func (s positronSourceSet) workspaceManifestForSource(path string) string { + for _, root := range s.roots { + root = filepath.Clean(root) + rel, ok := relUnder(root, path) + if !ok { + continue + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 4 && + parts[0] == "workspaceStorage" && + parts[2] == "chatSessions" && + isVSCodeCopilotSessionPath(parts[3]) { + workspacePath := filepath.Join( + root, + "workspaceStorage", + parts[1], + "workspace.json", + ) + if IsRegularFile(workspacePath) { + return workspacePath + } + } + } + return "" +} + +func (s positronSourceSet) newSourceRef(root, path, project string) SourceRef { + return SourceRef{ + Provider: AgentPositron, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: positronSource{ + Root: root, + Path: path, + Project: project, + }, + } +} + +func positronWorkspaceProject(root, hash string) string { + hashDir := filepath.Join(root, "workspaceStorage", hash) + project := ReadVSCodeWorkspaceManifest(hashDir) + if project == "" { + project = "unknown" + } + return project +} + +func positronProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + Thinking: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/positron_provider_test.go b/internal/parser/positron_provider_test.go new file mode 100644 index 000000000..24ace99e2 --- /dev/null +++ b/internal/parser/positron_provider_test.go @@ -0,0 +1,134 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPositronProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentPositron) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentPositron, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestPositronProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "positron-provider" + hashDir := filepath.Join(root, "workspaceStorage", "workspace-hash") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + sourcePath := filepath.Join(chatDir, sessionID+".jsonl") + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/positron-app"}`) + writeSourceFile(t, sourcePath, + vscodeCopilotProviderJSONL(sessionID, "Hello Positron")) + + provider, ok := NewProvider(AgentPositron, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, filepath.Join(root, "workspaceStorage"), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Equal(t, "positron-app", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~positron:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + require.False(t, outcome.ForceReplace) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "positron:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentPositron, result.Result.Session.Agent) + assert.Equal(t, "positron-app", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 2) +} + +func TestPositronProviderClassifiesDeletedAndMetadataPaths(t *testing.T) { + root := t.TempDir() + hashDir := filepath.Join(root, "workspaceStorage", "workspace-hash") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + sourcePath := filepath.Join(chatDir, "metadata.jsonl") + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/positron-app"}`) + writeSourceFile(t, sourcePath, + vscodeCopilotProviderJSONL("metadata", "Hello metadata")) + + provider, ok := NewProvider(AgentPositron, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + metadataChanged, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: workspacePath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, metadataChanged, 1) + assert.Equal(t, sourcePath, metadataChanged[0].DisplayPath) + + beforeMetadata, err := provider.Fingerprint(context.Background(), metadataChanged[0]) + require.NoError(t, err) + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/positron-renamed-app"}`) + afterMetadata, err := provider.Fingerprint(context.Background(), metadataChanged[0]) + require.NoError(t, err) + assert.NotEqual(t, beforeMetadata.Hash, afterMetadata.Hash) + + require.NoError(t, os.Remove(sourcePath)) + deleted, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, deleted, 1) + assert.Equal(t, sourcePath, deleted[0].DisplayPath) +} diff --git a/internal/parser/positron_test.go b/internal/parser/positron_test.go index 8a93ffadb..50b51b724 100644 --- a/internal/parser/positron_test.go +++ b/internal/parser/positron_test.go @@ -9,7 +9,11 @@ import ( "github.com/stretchr/testify/require" ) -func TestParsePositronSession(t *testing.T) { +func newPositronTestSourceSet(roots ...string) positronSourceSet { + return newPositronSourceSet(roots) +} + +func TestPositronProviderParseSession(t *testing.T) { // Create a minimal Positron session JSON sessionJSON := `{ "version": 3, @@ -60,10 +64,11 @@ func TestParsePositronSession(t *testing.T) { sessionPath, []byte(sessionJSON), 0644, )) - sess, msgs, err := ParsePositronSession( + p := &positronProvider{} + sess, msgs, err := p.parseSession( sessionPath, "test-project", "test-machine", ) - require.NoError(t, err, "ParsePositronSession failed") + require.NoError(t, err, "parseSession failed") require.NotNil(t, sess, "expected session, got nil") // Verify session metadata @@ -86,7 +91,7 @@ func TestParsePositronSession(t *testing.T) { assert.True(t, msgs[3].HasToolUse, "msgs[3] should have tool use") } -func TestDiscoverPositronSessions(t *testing.T) { +func TestPositronSourceSetDiscoverSessions(t *testing.T) { tmpDir := t.TempDir() // Create directory structure: @@ -106,11 +111,13 @@ func TestDiscoverPositronSessions(t *testing.T) { 0644, )) - // Create session files + // Create session files. The .json file with a .jsonl sibling must be + // deduped so full discovery matches changed-path sync precedence. sessionJSON := `{"version": 3, "requests": []}` for _, name := range []string{ "session-1.json", "session-2.jsonl", + "session-2.json", } { require.NoError(t, os.WriteFile( filepath.Join(chatDir, name), @@ -126,16 +133,20 @@ func TestDiscoverPositronSessions(t *testing.T) { 0644, )) - files := DiscoverPositronSessions(tmpDir) + set := newPositronTestSourceSet(tmpDir) + files := set.discoverSessions(tmpDir) require.Len(t, files, 2) + paths := make([]string, 0, len(files)) for _, f := range files { + paths = append(paths, filepath.Base(f.Path)) assert.Equal(t, AgentPositron, f.Agent) assert.Equal(t, "myproject", f.Project) } + assert.ElementsMatch(t, []string{"session-1.json", "session-2.jsonl"}, paths) } -func TestFindPositronSourceFile(t *testing.T) { +func TestPositronSourceSetFindSourceFile(t *testing.T) { tmpDir := t.TempDir() // Create directory structure @@ -151,11 +162,13 @@ func TestFindPositronSourceFile(t *testing.T) { sessionPath, []byte(`{}`), 0644, )) + set := newPositronTestSourceSet(tmpDir) + // Test finding existing session - found := FindPositronSourceFile(tmpDir, "test-uuid") + found := set.findSourceFile(tmpDir, "test-uuid") assert.Equal(t, sessionPath, found) // Test finding non-existent session - notFound := FindPositronSourceFile(tmpDir, "nonexistent") + notFound := set.findSourceFile(tmpDir, "nonexistent") assert.Empty(t, notFound) } diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 85f4653a7..fa5d214a3 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -394,10 +394,14 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newOpenClawProviderFactory(def) case AgentOMP, AgentPi: return newPiProviderFactory(def) - case AgentQwenPaw: - return newQwenPawProviderFactory(def) + case AgentPositron: + return newPositronProviderFactory(def) case AgentQClaw: return newQClawProviderFactory(def) + case AgentQwen: + return newQwenProviderFactory(def) + case AgentQwenPaw: + return newQwenPawProviderFactory(def) case AgentVSCopilot: return newVisualStudioCopilotProviderFactory(def) case AgentVSCodeCopilot: @@ -406,8 +410,6 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newVibeProviderFactory(def) case AgentWorkBuddy: return newWorkBuddyProviderFactory(def) - case AgentQwen: - return newQwenProviderFactory(def) case AgentZencoder: return newZencoderProviderFactory(def) default: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 90c65afb2..bc8033e17 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -50,7 +50,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentForge: ProviderMigrationLegacyOnly, AgentPiebald: ProviderMigrationLegacyOnly, AgentWarp: ProviderMigrationLegacyOnly, - AgentPositron: ProviderMigrationLegacyOnly, + AgentPositron: ProviderMigrationProviderAuthoritative, AgentAntigravity: ProviderMigrationLegacyOnly, AgentAntigravityCLI: ProviderMigrationLegacyOnly, AgentVibe: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/types.go b/internal/parser/types.go index 0dd3f42c4..bd16c1274 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -477,11 +477,9 @@ var Registry = []AgentDef{ DefaultDirs: []string{ "Library/Application Support/Positron/User", }, - IDPrefix: "positron:", - WatchSubdirs: []string{"workspaceStorage"}, - FileBased: true, - DiscoverFunc: DiscoverPositronSessions, - FindSourceFunc: FindPositronSourceFile, + IDPrefix: "positron:", + WatchSubdirs: []string{"workspaceStorage"}, + FileBased: true, }, { Type: AgentZed, diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 8198c185c..288590bd2 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -2852,12 +2852,33 @@ func (e *Engine) filterFilesByMtime( return out } +// discoveredFileEffectiveMtime returns the freshness timestamp used to filter a +// discovered file against an incremental-sync cutoff. For provider-sourced +// files it consults the provider's Fingerprint so composite/sibling-file +// freshness (for example a Positron session whose workspace.json changed while +// the chat transcript did not) is honored without a per-agent legacy helper. +// Files without a provider source fall back to the legacy mtime computation. func (e *Engine) discoveredFileEffectiveMtime( - ctx context.Context, - file parser.DiscoveredFile, + ctx context.Context, file parser.DiscoveredFile, ) (int64, error) { + // Codex is excluded from the provider-Fingerprint path on purpose. Its + // Fingerprint folds the shared session_index.jsonl mtime into every + // session's freshness (see CodexEffectiveMtime). That shared signal is + // correct for the skip cache but wrong for the incremental-sync cutoff: + // when the index changes, both the live and archived copies of a UUID + // would look fresh, defeating the per-copy mtime discrimination that + // expandCodexProviderDuplicates relies on to preserve a changed archived + // duplicate. Index refreshes are handled separately by the codexIndexRefresh + // pass in filterFilesByMtime, so codex uses its raw per-file mtime here. + if file.Agent == parser.AgentCodex { + return discoveredFileMtime(file) + } + // Only provider-authoritative sources resolve freshness through the + // provider Fingerprint. Shadow-compare files keep the legacy mtime path so + // agent-specific incremental-sync behavior (for example the Codex index + // refresh below) is unchanged while a provider is still shadowed. if file.ProviderSource != nil && file.ProviderProcess { - if mtime, ok, err := e.providerFingerprintMtime(ctx, file); err != nil { + if mtime, ok, err := e.providerSourceMtime(ctx, file); err != nil { return 0, err } else if ok { return mtime, nil @@ -2866,9 +2887,12 @@ func (e *Engine) discoveredFileEffectiveMtime( return discoveredFileMtime(file) } -func (e *Engine) providerFingerprintMtime( - ctx context.Context, - file parser.DiscoveredFile, +// providerSourceMtime resolves a provider-sourced file's effective mtime through +// the owning provider's Fingerprint. The boolean reports whether the provider +// runtime produced a usable timestamp; a false result tells the caller to fall +// back to the legacy mtime path. +func (e *Engine) providerSourceMtime( + ctx context.Context, file parser.DiscoveredFile, ) (int64, bool, error) { if file.ProviderSource == nil { return 0, false, nil @@ -3855,8 +3879,6 @@ func (e *Engine) processFile( res = e.processKiro(file, info) case parser.AgentKiroIDE: res = e.processKiroIDE(file, info) - case parser.AgentPositron: - res = e.processPositron(file, info) case parser.AgentZed: res = e.processZed(file, info) case parser.AgentShelley: @@ -6093,35 +6115,6 @@ func vibeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -func (e *Engine) processPositron( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParsePositronSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - // aiderFileUnchanged reports whether a physical aider history file is // unchanged since the last sync. Aider sessions are stored under virtual // "#" paths, so the generic shouldSkipByPath (which looks the @@ -6289,6 +6282,8 @@ func (e *Engine) processAntigravityCLI( if sess == nil { return processResult{} } + sess.File.Size = effectiveInfo.Size() + sess.File.Mtime = effectiveInfo.ModTime().UnixNano() hash, err := ComputeFileHash(file.Path) if err == nil { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index d33e15328..9aec4e947 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -4744,6 +4744,8 @@ func TestSyncAllSinceOpenCodeStoragePicksUpUsagePartUpdate(t *testing.T) { require.NoError(t, os.Chtimes(usagePartPath, future, future), "chtimes usage part") require.NoError(t, os.Chtimes(sessionPath, sessionMtime, sessionMtime), "restore session mtime") + // Composite freshness includes the part file, so the part-only edit is + // fresh relative to the cutoff and re-syncs the updated reply. stats := env.engine.SyncAllSince(context.Background(), cutoff, nil) require.Equal(t, 1, stats.Synced, "SyncAllSince synced = %d, want 1", stats.Synced) @@ -7252,6 +7254,265 @@ func TestSyncPathsVSCodeCopilotPersistsUsageEvents(t *testing.T) { assert.Equal(t, "claude-opus-4-8", events[0].Model) } +func TestSyncPathsPositronJSONLPriority(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + positronDir := filepath.Join(dir, "positron") + chatDir := filepath.Join( + positronDir, "workspaceStorage", "abc123", + "chatSessions", + ) + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentPositron: {positronDir}, + }, + Machine: "local", + }) + + uuid := "cccccccc-dddd-eeee-ffff-aaaaaaaaaaaa" + session := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + + jsonPath := filepath.Join(chatDir, uuid+".json") + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + dbtest.WriteTestFile(t, jsonPath, []byte(session)) + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+session+`}`), + ) + + engine.SyncPaths([]string{jsonPath}) + + page, err := database.ListSessions( + context.Background(), db.SessionFilter{Limit: 10}, + ) + require.NoError(t, err) + assert.Equal(t, 0, len(page.Sessions), "expected 0 sessions (.json skipped), got %d", len(page.Sessions)) +} + +func TestSyncAllPositronJSONLPriority(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + positronDir := filepath.Join(dir, "positron") + hashDir := filepath.Join(positronDir, "workspaceStorage", "abc123") + chatDir := filepath.Join(hashDir, "chatSessions") + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentPositron: {positronDir}, + }, + Machine: "local", + }) + + uuid := "cccccccc-dddd-eeee-ffff-bbbbbbbbbbbb" + jsonSession := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"json fallback"},`+ + `"response":[{"value":"json response"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + jsonlSession := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"jsonl preferred"},`+ + `"response":[{"value":"jsonl response"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + + jsonPath := filepath.Join(chatDir, uuid+".json") + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + dbtest.WriteTestFile(t, jsonPath, []byte(jsonSession)) + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+jsonlSession+`}`), + ) + + stats := engine.SyncAll(context.Background(), nil) + assert.Equal(t, 1, stats.Synced, "synced = %d, want 1", stats.Synced) + + sess, err := database.GetSession(context.Background(), "positron:"+uuid) + require.NoError(t, err) + require.NotNil(t, sess) + assertSessionMessageCount(t, database, "positron:"+uuid, 2) + msgs := fetchMessages(t, database, "positron:"+uuid) + require.NotEmpty(t, msgs) + assert.Equal(t, "jsonl preferred", msgs[0].Content) +} + +func TestSyncPathsPositronWorkspaceMetadataRefreshesProject(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + positronDir := filepath.Join(dir, "positron") + hashDir := filepath.Join(positronDir, "workspaceStorage", "abc123") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentPositron: {positronDir}, + }, + Machine: "local", + }) + + writeWorkspace := func(name string) { + t.Helper() + dbtest.WriteTestFile(t, workspacePath, fmt.Appendf(nil, + `{"folder":"file:///Users/alice/code/%s"}`, + name, + )) + } + + uuid := "dddddddd-eeee-ffff-aaaa-bbbbbbbbbbbb" + session := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + + writeWorkspace("one") + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+session+`}`), + ) + + engine.SyncPaths([]string{jsonlPath}) + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + info, err := os.Stat(jsonlPath) + require.NoError(t, err, "stat positron session") + engine.InjectSkipCache(map[string]int64{ + jsonlPath: info.ModTime().UnixNano(), + }) + + writeWorkspace("two") + engine.SyncPaths([]string{workspacePath}) + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) + + writeWorkspace("three") + engine.SyncPaths([]string{jsonlPath, workspacePath}) + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "three", sess.Project) + }, + ) +} + +func TestSyncAllSincePositronWorkspaceMetadataRefreshesProject(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + positronDir := filepath.Join(dir, "positron") + hashDir := filepath.Join(positronDir, "workspaceStorage", "abc123") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentPositron: {positronDir}, + }, + Machine: "local", + }) + + writeWorkspace := func(name string) { + t.Helper() + dbtest.WriteTestFile(t, workspacePath, fmt.Appendf(nil, + `{"folder":"file:///Users/alice/code/%s"}`, + name, + )) + } + + uuid := "dddddddd-eeee-ffff-aaaa-cccccccccccc" + session := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + + writeWorkspace("one") + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+session+`}`), + ) + + engine.SyncAll(context.Background(), nil) + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + oldTime := time.Now().Add(-48 * time.Hour) + require.NoError(t, os.Chtimes(jsonlPath, oldTime, oldTime), "chtimes session") + require.NoError(t, os.Chtimes(workspacePath, oldTime, oldTime), "chtimes workspace") + cutoff := time.Now().Add(-1 * time.Hour) + + writeWorkspace("two") + stats := engine.SyncAllSince(context.Background(), cutoff, nil) + assert.Equal(t, 1, stats.Synced, "synced = %d, want 1", stats.Synced) + + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) +} + func TestPiSessionIntegration(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") From d8ee6bc4763203fa8084656c0056c0b126fa8e9f Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 21:07:32 -0400 Subject: [PATCH 05/10] test(sync): update Codex incremental fingerprint expectation Codex does not advertise incremental append, so re-syncing an appended transcript is a full re-parse that stores the raw file size and hash, including the ignored partial trailing line. The parsed-snapshot versus partial-tail distinction is enforced at parse-diff time via CodexTranscriptConsumedSize, not in the stored fingerprint. Align the regression with the provider-folded behavior. --- internal/sync/engine_integration_test.go | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 9aec4e947..826deb083 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -8310,7 +8310,7 @@ func TestIncrementalSync_CodexStoresEffectiveMtime(t *testing.T) { "effective mtime exceeds the plain rollout mtime") } -func TestIncrementalSync_CodexHashMatchesConsumedPrefix(t *testing.T) { +func TestIncrementalSync_CodexAppendFullReparseStoresRawFileSize(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") } @@ -8352,18 +8352,17 @@ func TestIncrementalSync_CodexHashMatchesConsumedPrefix(t *testing.T) { live, err := os.ReadFile(path) require.NoError(t, err, "read live transcript") - // Codex is provider-authoritative and always full-parses (the provider does - // not advertise incremental append), so the stored file_size is the full - // transcript length and file_hash covers the whole file. The partial - // trailing JSON line is skipped during parsing but still counts toward the - // fingerprinted size/hash; once it is completed the file grows and the next - // sync re-parses it. + // Codex does not advertise incremental append, so re-syncing the appended + // transcript is a full re-parse that stores the raw file size and hash + // (including the ignored partial trailing line). The parsed-snapshot vs + // partial-tail distinction is enforced at parse-diff time via + // CodexTranscriptConsumedSize, not in the stored fingerprint. require.Equal(t, int64(len(live)), *sess.FileSize, - "full parse stores the full transcript size") + "full Codex re-parse stores the raw file size") sum := sha256.Sum256(live) wantHash := fmt.Sprintf("%x", sum[:]) assert.Equal(t, wantHash, *sess.FileHash, - "Codex full-parse hash must match the full transcript") + "stored Codex hash matches the whole-file fingerprint") } func TestIncrementalSync_CodexExecAppendRetainsEvents(t *testing.T) { From ecccbdb47ea3a687533642424ebe8d6e81b96b0d Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Fri, 26 Jun 2026 15:29:23 -0400 Subject: [PATCH 06/10] test(parser): drop migrated providers from the pending shim list This branch folds the positron, Visual Studio Copilot, and VS Code Copilot providers onto their provider-owned source sets, so their *_provider.go files no longer reference legacy free functions. The anti-shim gate (TestProviderFilesDoNotReferenceLegacyEntrypoints) requires a provider be removed from pendingShimProviderFiles on the same branch it stops being a shim, so leaving these three entries fails the gate here and on every branch up to where they were previously removed. Remove them now; the remaining entries fold in on later branches. --- internal/parser/provider_shim_scan_test.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index c9d3a483c..8bef3b59d 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -47,16 +47,13 @@ var providerNeutralEntrypoints = map[string]bool{ // tip (the zero-legacy gate) asserts this list is empty, so a provider cannot // remain a permanent shim. var pendingShimProviderFiles = map[string]bool{ - "antigravity_cli_provider.go": true, - "antigravity_provider.go": true, - "db_backed_provider.go": true, - "kiro_ide_provider.go": true, - "kiro_provider.go": true, - "positron_provider.go": true, - "shelley_provider.go": true, - "visualstudio_copilot_provider.go": true, - "vscode_copilot_provider.go": true, - "zed_provider.go": true, + "antigravity_cli_provider.go": true, + "antigravity_provider.go": true, + "db_backed_provider.go": true, + "kiro_ide_provider.go": true, + "kiro_provider.go": true, + "shelley_provider.go": true, + "zed_provider.go": true, } // collectLegacyFreeFuncs returns the set of package-level free functions in the From 745d998284fa89ca07395cd824fddd254b57a0ba Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sat, 27 Jun 2026 10:36:48 -0400 Subject: [PATCH 07/10] fix(sync): reparse gemini sessions when project metadata changes The pre-fingerprint fast skip compared only the Gemini session transcript's size and mtime, but the Gemini provider's fingerprint is composite: it folds in projects.json and trustedFolders.json, which resolve a session's project. A scheduled SyncAll could therefore skip a session whose transcript was unchanged while its project metadata had changed, leaving a stale project until the transcript itself was rewritten. Drop Gemini from the fast skip so it computes the composite fingerprint and relies on the post-fingerprint skip cache, whose mtime folds the metadata in, exactly as the other non-Codex providers already do. The live-watcher metadata refresh path was already correct; this closes the periodic-sync gap. The obsolete shadow-caller test that asserted the old skip is replaced by a behavioral SyncAll reparse test. --- internal/sync/engine.go | 10 ++-- internal/sync/engine_integration_test.go | 53 ++++++++++++++++++++ internal/sync/provider_shadow_caller_test.go | 49 ------------------ 3 files changed, 59 insertions(+), 53 deletions(-) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 288590bd2..b2c837d98 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -4843,10 +4843,12 @@ func (e *Engine) providerSourceFreshBeforeFingerprint( if e.shouldSkipByPath(path, effectiveInfo) { return mtime, true } - case parser.AgentGemini: - if e.shouldSkipByPath(path, info) { - return info.ModTime().UnixNano(), true - } + // Gemini is deliberately absent here. Its fingerprint is composite (the + // session file plus projects.json and trustedFolders.json), so a + // pre-fingerprint skip keyed only on the session file's size and mtime + // would skip a session whose project metadata changed while the transcript + // did not, leaving a stale project on scheduled syncs. Gemini relies on the + // post-fingerprint skip cache instead, whose mtime folds in the composite. case parser.AgentCopilot: mtime := copilotEffectiveMtime(path, info) effectiveInfo := fakeSnapshotInfo{ diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 826deb083..31c28450e 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -2219,6 +2219,59 @@ func TestSyncPathsGeminiProjectMetadataEventRefreshesProject(t *testing.T) { ) } +// TestSyncAllGeminiProjectMetadataChangeReparsesProject guards that a scheduled +// SyncAll is not fooled by the pre-fingerprint fast skip when only Gemini's +// projects.json changed. The session transcript's own size and mtime are left +// untouched, so the removed AgentGemini fast skip (which compared just the +// session stat) would have kept the stale project; the composite fingerprint, +// which folds in projects.json, must drive a reparse on the periodic full sync. +func TestSyncAllGeminiProjectMetadataChangeReparsesProject(t *testing.T) { + env := setupTestEnv(t) + + sessionID := "gem-syncall-refresh" + projectsPath := filepath.Join(env.geminiDir, "projects.json") + writeProject := func(name string) { + t.Helper() + require.NoError(t, os.WriteFile( + projectsPath, + fmt.Appendf(nil, + `{"projects":{"/Users/alice/code/%s":"alias"}}`, + name, + ), + 0o644, + ), "write projects") + } + writeProject("one") + env.writeGeminiSession( + t, + filepath.Join("tmp", "alias", "chats", "session-001.json"), + testjsonl.GeminiSessionJSON( + sessionID, "alias", tsEarly, tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("m1", tsEarly, "Hello Gemini"), + testjsonl.GeminiAssistantMsg("m2", tsEarlyS5, "Hi there!", nil), + }, + ), + ) + + env.engine.SyncAll(context.Background(), nil) + assertSessionState(t, env.db, "gemini:"+sessionID, func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }) + + // Change only the project metadata, then advance its mtime so the composite + // fingerprint moves forward. The session transcript is left untouched, so a + // fast skip keyed on the transcript stat alone would keep the stale "one". + writeProject("two") + later := time.Now().Add(48 * time.Hour) + require.NoError(t, os.Chtimes(projectsPath, later, later), "bump projects mtime") + env.engine.SyncAll(context.Background(), nil) + + assertSessionState(t, env.db, "gemini:"+sessionID, func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }) +} + func TestSyncPathsCodexAcceptsFlatArchived(t *testing.T) { env := setupTestEnv(t) diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index ac1d7070f..0090f76b7 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -649,55 +649,6 @@ func TestProcessFileProviderAuthoritativeSkipsFreshCoworkBeforeFingerprint(t *te assert.Empty(t, provider.calls) } -func TestProcessFileProviderAuthoritativeSkipsFreshGeminiBeforeFingerprint(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join( - root, "tmp", "alias", "chats", "session-001.json", - ) - sourceMtime := writeFreshProviderDBSession( - t, sourcePath, nil, - ) - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentGemini, - DisplayName: "Gemini CLI", - }, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentGemini, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - }, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentGemini: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentGemini: parser.ProviderMigrationProviderAuthoritative, - }, - }) - requireFreshProviderSession(t, engine.db, parser.AgentGemini, sourcePath, sourceMtime) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentGemini, - }) - - require.NoError(t, result.err) - assert.True(t, result.skip) - assert.Equal(t, sourceMtime, result.mtime) - assert.Empty(t, provider.calls) -} - func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *testing.T) { root := t.TempDir() database := dbtest.OpenTestDB(t) From 4e7507037440bd49a7f15c7d4bcbc264f7e28110 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 00:28:41 -0400 Subject: [PATCH 08/10] feat(parser): discover Codex S3 sessions through the provider facade This branch migrates Codex to a provider-authoritative source set, making discoverProviderSources its sole on-disk discovery path. codexSourceSet.discover only walked a local session-directory layout, so a migrated Codex pointed at an s3:// rollout root enumerated nothing -- a regression against the pre-migration DiscoverFunc, which enumerated remote objects directly. Add an s3:// branch to discover: it lists rollout objects via discoverCodexS3 and builds an S3 SourceRef per object, carrying the durable metadata in the Opaque payload for the engine to thread back into the DiscoveredFile. Each object is its own URI-keyed session, so the local live-over-archived preference does not apply. s3:// objects keep processing on the dedicated S3 sync path. --- internal/parser/codex_provider.go | 15 +++++++++++ internal/parser/s3_discovery_test.go | 39 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/internal/parser/codex_provider.go b/internal/parser/codex_provider.go index cd63eca70..eafa51d94 100644 --- a/internal/parser/codex_provider.go +++ b/internal/parser/codex_provider.go @@ -208,6 +208,21 @@ func (s codexSourceSet) discover( if !includeRoot(root) { continue } + if strings.HasPrefix(root, "s3://") { + // s3:// roots have no local layout to walk: enumerate the objects + // directly and carry each one's durable metadata in the Opaque + // payload. Each object is its own session keyed by URI, so the + // live-over-archived preference (which inspects a local codexSource + // layout) does not apply here. + for _, file := range discoverCodexS3(root) { + source := s3SourceRefFromDiscoveredFile(file) + if _, ok := byKey[source.Key]; ok { + continue + } + byKey[source.Key] = source + } + continue + } for _, path := range s.discoverSessionPaths(root) { source, ok := s.sourceRef(root, path, true) if !ok { diff --git a/internal/parser/s3_discovery_test.go b/internal/parser/s3_discovery_test.go index f1f5322f3..2137f6010 100644 --- a/internal/parser/s3_discovery_test.go +++ b/internal/parser/s3_discovery_test.go @@ -107,3 +107,42 @@ func TestClaudeSourceSetMixedLocalAndS3Roots(t *testing.T) { assert.Equal(t, 1, s3Count, "exactly one remote source") assert.Equal(t, 1, localCount, "exactly one local source") } + +// TestCodexSourceSetDiscoversS3Sessions verifies the Codex source set enumerates +// rollout objects under an s3:// sessions root through its provider Discover +// path and carries the object metadata in the S3DiscoveredSource opaque. +func TestCodexSourceSetDiscoversS3Sessions(t *testing.T) { + oldList := listS3Objects + t.Cleanup(func() { listS3Objects = oldList }) + + root := "s3://bucket/coder/raw/codex" + rolloutURI := root + "/2026/06/24/rollout-2026-06-24T00-00-00-" + + "11111111-1111-4111-8111-111111111111.jsonl" + mtime := time.Unix(100, 0) + listS3Objects = func(got string) ([]S3Object, error) { + require.Equal(t, root, got) + return []S3Object{{ + URI: rolloutURI, + Size: 11, + LastModified: mtime, + Fingerprint: "s3-meta:rollout", + }}, nil + } + + sources, err := newCodexSourceSet([]string{root}).Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + src := sources[0] + assert.Equal(t, AgentCodex, src.Provider) + assert.Equal(t, rolloutURI, src.DisplayPath) + assert.Equal(t, rolloutURI, src.Key) + + s3, ok := src.Opaque.(S3DiscoveredSource) + require.True(t, ok, "s3 source carries S3DiscoveredSource opaque") + assert.Equal(t, rolloutURI, s3.URI) + assert.Equal(t, "coder", s3.Machine) + assert.Equal(t, int64(11), s3.Size) + assert.Equal(t, mtime.UnixNano(), s3.MtimeNS) + assert.Contains(t, s3.Fingerprint, "rollout") +} From cce34e64b887063bcf47538f39da842a0ac359ca Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 08:31:57 -0400 Subject: [PATCH 09/10] test(sync): cover S3 discovery+sync against a real object store The provider migration once silently dropped s3:// discovery because every S3 test stubbed the listS3Objects/fetchS3Object seam or injected already-formed s3:// paths into processing -- nothing ran real discovery against an object store, so the regression passed the whole suite. Add a testcontainers integration test (s3test build tag + Docker, mirroring the pgtest setup) that boots a MinIO container, points the production env-driven s3Client at it, uploads a Claude session and a Codex rollout under the //raw/ layout, and runs a full provider-authoritative SyncAll against s3:// roots. It asserts both remote sessions are discovered, fetched, parsed, and persisted machine-namespaced from the s3 root -- the end-to-end path with no other real-store coverage. testcontainers manages the container lifecycle, so make test-s3 needs only a working Docker daemon. Any S3-compatible image (e.g. rustfs) works by swapping s3ContainerImage. --- Makefile | 9 +- internal/sync/s3_container_test.go | 178 +++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 internal/sync/s3_container_test.go diff --git a/Makefile b/Makefile index fe2619fa9..c200b8281 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ AIR_BIN := $(shell if command -v air >/dev/null 2>&1; then command -v air; \ elif [ -x "$(GOPATH_FIRST)/bin/air" ]; then printf "%s" "$(GOPATH_FIRST)/bin/air"; \ fi) -.PHONY: build build-release install frontend frontend-dev dev check-air air-install desktop-dev desktop-build desktop-macos-app desktop-macos-dmg desktop-windows-installer desktop-linux-appimage desktop-app docs-install docs-build docs-serve docs-check docs-screenshots docs-assets-branch docs-generated-assets-branch docs-deploy-staging docs-deploy test test-short bench-backends test-postgres test-postgres-ci postgres-up postgres-down test-ssh test-ssh-ci ssh-up ssh-down e2e e2e-duckdb vet lint lint-ci lint-golangci lint-golangci-ci nilaway nilaway-golangci-build lint-tools tidy clean release release-darwin-arm64 release-darwin-amd64 release-linux-amd64 install-hooks ensure-embed-dir pricing-snapshot dev-snapshot help +.PHONY: build build-release install frontend frontend-dev dev check-air air-install desktop-dev desktop-build desktop-macos-app desktop-macos-dmg desktop-windows-installer desktop-linux-appimage desktop-app docs-install docs-build docs-serve docs-check docs-screenshots docs-assets-branch docs-generated-assets-branch docs-deploy-staging docs-deploy test test-short bench-backends test-postgres test-postgres-ci test-s3 postgres-up postgres-down test-ssh test-ssh-ci ssh-up ssh-down e2e e2e-duckdb vet lint lint-ci lint-golangci lint-golangci-ci nilaway nilaway-golangci-build lint-tools tidy clean release release-darwin-arm64 release-darwin-amd64 release-linux-amd64 install-hooks ensure-embed-dir pricing-snapshot dev-snapshot help # Ensure go:embed has at least one file (no-op if frontend is built) ensure-embed-dir: @@ -259,6 +259,12 @@ test-postgres: pricing-snapshot ensure-embed-dir postgres-up test-postgres-ci: pricing-snapshot ensure-embed-dir CGO_ENABLED=1 go test -tags "fts5,pgtest" -v ./internal/postgres/... ./internal/activity/... -count=1 +# S3 discovery integration tests. testcontainers starts and tears down an +# S3-compatible (MinIO) container automatically, so only a working Docker +# daemon is required. +test-s3: pricing-snapshot ensure-embed-dir + CGO_ENABLED=1 go test -tags "fts5,s3test" -v ./internal/sync/... -run TestS3 -count=1 + # Start test SSH container ssh-up: docker compose -f docker-compose.test.yml up -d --build --wait sshd @@ -453,6 +459,7 @@ help: @echo " test-short - Run fast tests only" @echo " bench-backends - Benchmark SQLite, DuckDB, and PostgreSQL stores" @echo " test-postgres - Run PostgreSQL integration tests" + @echo " test-s3 - Run S3 discovery integration tests (Docker)" @echo " postgres-up - Start test PostgreSQL container" @echo " postgres-down - Stop test PostgreSQL container" @echo " test-ssh - Run SSH integration tests" diff --git a/internal/sync/s3_container_test.go b/internal/sync/s3_container_test.go new file mode 100644 index 000000000..86e189a70 --- /dev/null +++ b/internal/sync/s3_container_test.go @@ -0,0 +1,178 @@ +//go:build s3test + +// Package sync's S3 container integration test. It exercises the real S3 +// discovery+sync path -- listS3Objects -> s3Client -> a live S3-compatible +// object store -- rather than stubbing the seam, so a regression that silently +// drops s3:// discovery (as the provider migration once did) cannot pass. +// +// Gated behind the s3test build tag and Docker, mirroring the pgtest setup. Run +// with: +// +// CGO_ENABLED=1 go test -tags "fts5,s3test" ./internal/sync/ -run TestS3 -v +// +// The container image is MinIO (a widely-deployed S3-compatible server); any +// S3-compatible store (e.g. rustfs) works by swapping s3ContainerImage. +package sync + +import ( + "bytes" + "context" + "fmt" + "testing" + "time" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" + + "go.kenn.io/agentsview/internal/db" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/testjsonl" +) + +const ( + s3ContainerImage = "minio/minio:latest" + s3TestAccessKey = "minioadmin" + s3TestSecretKey = "minioadmin" + s3TestBucket = "agentsview" + // s3TestMachine is the machine segment of the //raw/ + // layout; discovery derives the session machine namespace from it. + s3TestMachine = "laptop" +) + +// startS3Container boots a throwaway S3-compatible object store and returns its +// host:port endpoint. The container is terminated on test cleanup. +func startS3Container(ctx context.Context, t *testing.T) string { + t.Helper() + req := testcontainers.ContainerRequest{ + Image: s3ContainerImage, + Cmd: []string{"server", "/data"}, + ExposedPorts: []string{"9000/tcp"}, + Env: map[string]string{ + "MINIO_ROOT_USER": s3TestAccessKey, + "MINIO_ROOT_PASSWORD": s3TestSecretKey, + }, + WaitingFor: wait.ForHTTP("/minio/health/live"). + WithPort("9000/tcp"). + WithStartupTimeout(90 * time.Second), + } + container, err := testcontainers.GenericContainer( + ctx, testcontainers.GenericContainerRequest{ + ContainerRequest: req, + Started: true, + }, + ) + require.NoError(t, err, "start S3 container") + t.Cleanup(func() { + // Use a fresh context so cleanup runs even if the test ctx is done. + _ = container.Terminate(context.Background()) + }) + + host, err := container.Host(ctx) + require.NoError(t, err, "container host") + port, err := container.MappedPort(ctx, "9000") + require.NoError(t, err, "container mapped port") + return fmt.Sprintf("%s:%s", host, port.Port()) +} + +// putS3Object uploads body to s3:/// using a direct client. +func putS3Object( + ctx context.Context, t *testing.T, cl *minio.Client, key, body string, +) { + t.Helper() + _, err := cl.PutObject( + ctx, s3TestBucket, key, + bytes.NewReader([]byte(body)), int64(len(body)), + minio.PutObjectOptions{ContentType: "application/jsonl"}, + ) + require.NoError(t, err, "put object %s", key) +} + +// TestS3DiscoverySyncAgainstContainer uploads a Claude session and a Codex +// rollout to a live S3-compatible store, points the production s3Client at it +// via the standard AWS_* env vars, and runs a full provider-authoritative +// SyncAll against s3:// roots. It asserts both remote sessions are discovered, +// fetched, parsed, and persisted machine-namespaced under the s3 root's machine +// segment -- the end-to-end path that has no other real-store coverage. +func TestS3DiscoverySyncAgainstContainer(t *testing.T) { + ctx := context.Background() + endpoint := startS3Container(ctx, t) + + // Point the production s3Client (env-driven) at the container. The endpoint + // is loopback (127.0.0.1), so http is allowed without the insecure override. + t.Setenv("AWS_S3_ENDPOINT", "http://"+endpoint) + t.Setenv("AWS_ACCESS_KEY_ID", s3TestAccessKey) + t.Setenv("AWS_SECRET_ACCESS_KEY", s3TestSecretKey) + t.Setenv("AWS_REGION", "us-east-1") + + uploadClient, err := minio.New(endpoint, &minio.Options{ + Creds: credentials.NewStaticV4(s3TestAccessKey, s3TestSecretKey, ""), + Secure: false, + }) + require.NoError(t, err, "build upload client") + require.NoError(t, uploadClient.MakeBucket( + ctx, s3TestBucket, minio.MakeBucketOptions{}, + ), "make bucket") + + const ( + claudeID = "11111111-1111-4111-8111-111111111111" + codexID = "22222222-2222-4222-8222-222222222222" + ) + claudeKey := fmt.Sprintf( + "%s/raw/claude/myproj/%s.jsonl", s3TestMachine, claudeID, + ) + claudeBody := testjsonl.NewSessionBuilder(). + AddClaudeUser("2024-01-01T00:00:00Z", "hello from claude s3"). + AddClaudeAssistant("2024-01-01T00:00:05Z", "hi there"). + String() + putS3Object(ctx, t, uploadClient, claudeKey, claudeBody) + + codexKey := fmt.Sprintf( + "%s/raw/codex/2026/06/24/rollout-2026-06-24T00-00-00-%s.jsonl", + s3TestMachine, codexID, + ) + codexBody := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + codexID, "/home/coder/project", "user", "2026-06-24T00:00:00Z", + ), + testjsonl.CodexMsgJSON("user", "review this from s3", "2026-06-24T00:00:01Z"), + ) + putS3Object(ctx, t, uploadClient, codexKey, codexBody) + + database := openTestDB(t) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {"s3://" + s3TestBucket + "/" + s3TestMachine + "/raw/claude"}, + parser.AgentCodex: {"s3://" + s3TestBucket + "/" + s3TestMachine + "/raw/codex"}, + }, + Machine: "central", + }) + + stats := engine.SyncAll(ctx, nil) + require.GreaterOrEqual(t, stats.Synced, 2, + "both s3 sessions discovered and synced") + + // The Claude session ID is the object's filename stem, namespaced by the s3 + // root's machine ("laptop"), not the host machine ("central"). + claudeSess, err := database.GetSessionFull(ctx, s3TestMachine+"~"+claudeID) + require.NoError(t, err) + require.NotNil(t, claudeSess, "claude s3 session persisted") + assert.Equal(t, s3TestMachine, claudeSess.Machine) + assert.Equal(t, "s3://"+s3TestBucket+"/"+claudeKey, derefString(claudeSess.FilePath)) + + // Codex namespaces its ID differently (machine~codex:); assert via the + // persisted set so the exact format is not hard-coded. + page, err := database.ListSessions(ctx, db.SessionFilter{Limit: 100}) + require.NoError(t, err) + agents := map[string]bool{} + for _, s := range page.Sessions { + assert.Equal(t, s3TestMachine, s.Machine, + "every synced s3 session is namespaced under the s3 root machine") + agents[s.Agent] = true + } + assert.True(t, agents["claude"], "claude s3 session discovered") + assert.True(t, agents["codex"], "codex s3 session discovered") +} From 47dd52c4524055246683c02e7f497fb78378115f Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 08:46:44 -0400 Subject: [PATCH 10/10] test(sync): run the S3 integration container on rustfs MinIO is no longer maintained, so the S3 discovery integration test now boots rustfs -- an actively maintained S3-compatible object store -- instead. rustfs takes credentials via RUSTFS_ACCESS_KEY/RUSTFS_SECRET_KEY, serves the S3 API on :9000, and answers /health with 200 once ready, which drives the testcontainers wait strategy. The discovery+sync assertions are unchanged; only the backing container image and its credential/health wiring move. --- Makefile | 4 ++-- internal/sync/s3_container_test.go | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index c200b8281..397c1986c 100644 --- a/Makefile +++ b/Makefile @@ -259,8 +259,8 @@ test-postgres: pricing-snapshot ensure-embed-dir postgres-up test-postgres-ci: pricing-snapshot ensure-embed-dir CGO_ENABLED=1 go test -tags "fts5,pgtest" -v ./internal/postgres/... ./internal/activity/... -count=1 -# S3 discovery integration tests. testcontainers starts and tears down an -# S3-compatible (MinIO) container automatically, so only a working Docker +# S3 discovery integration tests. testcontainers starts and tears down a +# rustfs (S3-compatible) container automatically, so only a working Docker # daemon is required. test-s3: pricing-snapshot ensure-embed-dir CGO_ENABLED=1 go test -tags "fts5,s3test" -v ./internal/sync/... -run TestS3 -count=1 diff --git a/internal/sync/s3_container_test.go b/internal/sync/s3_container_test.go index 86e189a70..ebfed5c70 100644 --- a/internal/sync/s3_container_test.go +++ b/internal/sync/s3_container_test.go @@ -10,8 +10,9 @@ // // CGO_ENABLED=1 go test -tags "fts5,s3test" ./internal/sync/ -run TestS3 -v // -// The container image is MinIO (a widely-deployed S3-compatible server); any -// S3-compatible store (e.g. rustfs) works by swapping s3ContainerImage. +// The container image is rustfs, an actively maintained S3-compatible object +// store; any S3-compatible image works by swapping s3ContainerImage and its +// credential env vars. package sync import ( @@ -34,9 +35,9 @@ import ( ) const ( - s3ContainerImage = "minio/minio:latest" - s3TestAccessKey = "minioadmin" - s3TestSecretKey = "minioadmin" + s3ContainerImage = "rustfs/rustfs:latest" + s3TestAccessKey = "rustfsadmin" + s3TestSecretKey = "rustfsadmin" s3TestBucket = "agentsview" // s3TestMachine is the machine segment of the //raw/ // layout; discovery derives the session machine namespace from it. @@ -49,13 +50,15 @@ func startS3Container(ctx context.Context, t *testing.T) string { t.Helper() req := testcontainers.ContainerRequest{ Image: s3ContainerImage, - Cmd: []string{"server", "/data"}, ExposedPorts: []string{"9000/tcp"}, Env: map[string]string{ - "MINIO_ROOT_USER": s3TestAccessKey, - "MINIO_ROOT_PASSWORD": s3TestSecretKey, + "RUSTFS_ACCESS_KEY": s3TestAccessKey, + "RUSTFS_SECRET_KEY": s3TestSecretKey, }, - WaitingFor: wait.ForHTTP("/minio/health/live"). + // rustfs serves the S3 API on :9000 and answers /health with 200 once + // the object store is ready; the image's default entrypoint starts the + // server, so no Cmd override is needed. + WaitingFor: wait.ForHTTP("/health"). WithPort("9000/tcp"). WithStartupTimeout(90 * time.Second), }