diff --git a/Makefile b/Makefile index fe2619fa9..397c1986c 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ AIR_BIN := $(shell if command -v air >/dev/null 2>&1; then command -v air; \ elif [ -x "$(GOPATH_FIRST)/bin/air" ]; then printf "%s" "$(GOPATH_FIRST)/bin/air"; \ fi) -.PHONY: build build-release install frontend frontend-dev dev check-air air-install desktop-dev desktop-build desktop-macos-app desktop-macos-dmg desktop-windows-installer desktop-linux-appimage desktop-app docs-install docs-build docs-serve docs-check docs-screenshots docs-assets-branch docs-generated-assets-branch docs-deploy-staging docs-deploy test test-short bench-backends test-postgres test-postgres-ci postgres-up postgres-down test-ssh test-ssh-ci ssh-up ssh-down e2e e2e-duckdb vet lint lint-ci lint-golangci lint-golangci-ci nilaway nilaway-golangci-build lint-tools tidy clean release release-darwin-arm64 release-darwin-amd64 release-linux-amd64 install-hooks ensure-embed-dir pricing-snapshot dev-snapshot help +.PHONY: build build-release install frontend frontend-dev dev check-air air-install desktop-dev desktop-build desktop-macos-app desktop-macos-dmg desktop-windows-installer desktop-linux-appimage desktop-app docs-install docs-build docs-serve docs-check docs-screenshots docs-assets-branch docs-generated-assets-branch docs-deploy-staging docs-deploy test test-short bench-backends test-postgres test-postgres-ci test-s3 postgres-up postgres-down test-ssh test-ssh-ci ssh-up ssh-down e2e e2e-duckdb vet lint lint-ci lint-golangci lint-golangci-ci nilaway nilaway-golangci-build lint-tools tidy clean release release-darwin-arm64 release-darwin-amd64 release-linux-amd64 install-hooks ensure-embed-dir pricing-snapshot dev-snapshot help # Ensure go:embed has at least one file (no-op if frontend is built) ensure-embed-dir: @@ -259,6 +259,12 @@ test-postgres: pricing-snapshot ensure-embed-dir postgres-up test-postgres-ci: pricing-snapshot ensure-embed-dir CGO_ENABLED=1 go test -tags "fts5,pgtest" -v ./internal/postgres/... ./internal/activity/... -count=1 +# S3 discovery integration tests. testcontainers starts and tears down a +# rustfs (S3-compatible) container automatically, so only a working Docker +# daemon is required. +test-s3: pricing-snapshot ensure-embed-dir + CGO_ENABLED=1 go test -tags "fts5,s3test" -v ./internal/sync/... -run TestS3 -count=1 + # Start test SSH container ssh-up: docker compose -f docker-compose.test.yml up -d --build --wait sshd @@ -453,6 +459,7 @@ help: @echo " test-short - Run fast tests only" @echo " bench-backends - Benchmark SQLite, DuckDB, and PostgreSQL stores" @echo " test-postgres - Run PostgreSQL integration tests" + @echo " test-s3 - Run S3 discovery integration tests (Docker)" @echo " postgres-up - Start test PostgreSQL container" @echo " postgres-down - Stop test PostgreSQL container" @echo " test-ssh - Run SSH integration tests" diff --git a/cmd/agentsview/session_export.go b/cmd/agentsview/session_export.go index 21ab5aa96..18b42bb1a 100644 --- a/cmd/agentsview/session_export.go +++ b/cmd/agentsview/session_export.go @@ -114,7 +114,7 @@ func newSessionExportCommand() *cobra.Command { // conversations, so streaming the whole file would disclose // unrelated conversations. Filter to the requested conversation. if tracePath, conversationID, ok := - parser.ParseVisualStudioCopilotVirtualPath(storedPath); ok { + parser.SplitVisualStudioCopilotVirtualPath(storedPath); ok { err := parser.WriteVisualStudioCopilotConversationJSONL( cmd.OutOrStdout(), tracePath, conversationID, ) diff --git a/cmd/agentsview/token_use.go b/cmd/agentsview/token_use.go index bccd5e55d..8d3799603 100644 --- a/cmd/agentsview/token_use.go +++ b/cmd/agentsview/token_use.go @@ -86,11 +86,11 @@ func resolveRawSessionID( // Canonical disk probe: if the input starts with a known // agent prefix, trust that interpretation first and strip - // before calling FindSourceFunc (which rejects IDs with + // before resolving the source (which rejects IDs with // colons via IsValidSessionID). for _, def := range parser.Registry { if def.IDPrefix == "" || !def.FileBased || - def.FindSourceFunc == nil { + !agentHasDiskSourceLookup(def) { continue } if !strings.HasPrefix(input, def.IDPrefix) { @@ -98,7 +98,7 @@ func resolveRawSessionID( } bareID := strings.TrimPrefix(input, def.IDPrefix) for _, dir := range agentDirs[def.Type] { - if def.FindSourceFunc(dir, bareID) != "" { + if findAgentSourceFile(def, dir, bareID) != "" { return input, true } } @@ -110,11 +110,11 @@ func resolveRawSessionID( // colon-bearing raw IDs (Kimi, OpenClaw, Kiro IDE) may // match. for _, def := range parser.Registry { - if !def.FileBased || def.FindSourceFunc == nil { + if !def.FileBased || !agentHasDiskSourceLookup(def) { continue } for _, dir := range agentDirs[def.Type] { - if def.FindSourceFunc(dir, input) != "" { + if findAgentSourceFile(def, dir, input) != "" { return def.IDPrefix + input, true } } @@ -123,6 +123,63 @@ func resolveRawSessionID( return input, false } +// agentHasDiskSourceLookup reports whether a session source can be located on +// disk by raw ID for the agent: via the legacy AgentDef FindSourceFunc hook, or +// via a provider-authoritative provider's FindSource for agents whose lookup was +// folded onto the provider (e.g. Codex). +func agentHasDiskSourceLookup(def parser.AgentDef) bool { + if def.FindSourceFunc != nil { + return true + } + if parser.ProviderMigrationModes()[def.Type] != + parser.ProviderMigrationProviderAuthoritative { + return false + } + _, ok := parser.ProviderFactoryByType(def.Type) + return ok +} + +// findAgentSourceFile resolves a raw agent session ID to an on-disk source path +// under dir, using the legacy FindSourceFunc when present and otherwise the +// provider's FindSource (RawSessionID lookup). Returns "" when no source +// resolves or the agent has no on-disk lookup. +func findAgentSourceFile(def parser.AgentDef, dir, rawID string) string { + if def.FindSourceFunc != nil { + return def.FindSourceFunc(dir, rawID) + } + factory, ok := parser.ProviderFactoryByType(def.Type) + if !ok { + return "" + } + provider := factory.NewProvider(parser.ProviderConfig{Roots: []string{dir}}) + source, found, err := provider.FindSource( + context.Background(), + parser.FindSourceRequest{RawSessionID: rawID}, + ) + if err != nil || !found { + return "" + } + if path, ok := providerSourcePath(source); ok { + return path + } + return "" +} + +// providerSourcePath extracts the on-disk path a provider SourceRef points to, +// preferring the display path and falling back to the fingerprint key or key. +func providerSourcePath(source parser.SourceRef) (string, bool) { + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if candidate != "" { + return candidate, true + } + } + return "", false +} + // usageExitCode classifies a SessionUsage into an exit code: 2 when // the session is not in the DB, 0 when token data OR cost is present, // 3 when the session exists but has neither. Cost-only sessions diff --git a/internal/parser/codex.go b/internal/parser/codex.go index af763aada..d104b73ef 100644 --- a/internal/parser/codex.go +++ b/internal/parser/codex.go @@ -1305,12 +1305,31 @@ func IsCodexExecSessionFile(path string) bool { return false } -// ParseCodexSession parses a Codex JSONL session file. -// The includeExec parameter is retained for backward -// compatibility; exec-originated sessions are now always -// parsed and imported. +// ParseCodexSession and ParseCodexSessionFrom are the exported seam used by the +// S3 sync path (internal/sync), which buffers an s3:// Codex object to a temp +// file and parses it through the legacy processCodex. The Codex provider owns +// these bodies as receiver methods that use no receiver state, so the wrappers +// invoke them on a zero-value provider. They are removed once S3 support folds +// into the JSONL source sets. func ParseCodexSession( path, machine string, includeExec bool, +) (*ParsedSession, []ParsedMessage, error) { + return (&codexProvider{}).parseSession(path, machine, includeExec) +} + +func ParseCodexSessionFrom( + path string, offset int64, startOrdinal int, includeExec bool, +) ([]ParsedMessage, time.Time, int64, error) { + return (&codexProvider{}).parseSessionFrom(path, offset, startOrdinal, includeExec) +} + +// parseSession parses a Codex JSONL session file into a session and its +// messages. The includeExec parameter is retained for backward compatibility; +// exec-originated sessions are now always parsed and imported. This is the +// provider-owned parse entrypoint; the package-level free function was folded +// onto the provider. +func (p *codexProvider) parseSession( + path, machine string, includeExec bool, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) if err != nil { @@ -1675,12 +1694,13 @@ func CodexTranscriptConsumedSize(path string) (int64, error) { return readJSONLFrom(path, 0, func(line string) {}) } -// ParseCodexSessionFrom parses only new lines from a Codex -// JSONL file starting at the given byte offset. Returns only -// the newly parsed messages (with ordinals starting at -// startOrdinal) and the latest timestamp seen. Used for -// incremental re-parsing of large append-only session files. -func ParseCodexSessionFrom( +// parseSessionFrom parses only new lines from a Codex JSONL file starting at +// the given byte offset. Returns only the newly parsed messages (with ordinals +// starting at startOrdinal) and the latest timestamp seen. Used for incremental +// re-parsing of large append-only session files. This is the provider-owned +// incremental parse entrypoint; the package-level free function was folded onto +// the provider. +func (p *codexProvider) parseSessionFrom( path string, offset int64, startOrdinal int, diff --git a/internal/parser/codex_parser_test.go b/internal/parser/codex_parser_test.go index cc2acf282..d01fff3df 100644 --- a/internal/parser/codex_parser_test.go +++ b/internal/parser/codex_parser_test.go @@ -19,11 +19,74 @@ func runCodexParserTest(t *testing.T, fileName, content string, includeExec bool fileName = "test.jsonl" } path := createTestFile(t, fileName, content) - sess, msgs, err := ParseCodexSession(path, "local", includeExec) + sess, msgs, err := parseCodexTestSession(t, path, "local", includeExec) require.NoError(t, err) return sess, msgs } +// newCodexTestProvider builds a concrete codexProvider so package tests can +// exercise the folded parse, discovery, and source-lookup behavior directly +// through provider methods now that the package-level ParseCodexSession, +// ParseCodexSessionFrom, DiscoverCodexSessions, and FindCodexSourceFile free +// functions are gone. +func newCodexTestProvider(t *testing.T, roots ...string) *codexProvider { + t.Helper() + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + cp, ok := provider.(*codexProvider) + require.True(t, ok) + return cp +} + +// parseCodexTestSession parses a Codex session through the provider-owned +// parseSession method, replacing the removed package-level ParseCodexSession. +func parseCodexTestSession( + t *testing.T, path, machine string, includeExec bool, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newCodexTestProvider(t).parseSession(path, machine, includeExec) +} + +// parseCodexTestSessionFrom parses appended Codex lines through the +// provider-owned parseSessionFrom method, replacing the removed package-level +// ParseCodexSessionFrom. +func parseCodexTestSessionFrom( + t *testing.T, path string, offset int64, startOrdinal int, includeExec bool, +) ([]ParsedMessage, time.Time, int64, error) { + t.Helper() + return newCodexTestProvider(t).parseSessionFrom(path, offset, startOrdinal, includeExec) +} + +// discoverCodexTestSessions discovers Codex session paths under root through +// the provider source set, returning the legacy DiscoveredFile shape the tests +// assert against, replacing the removed DiscoverCodexSessions. +func discoverCodexTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newCodexTestProvider(t, root) + paths := provider.sources.discoverSessionPaths(root) + if len(paths) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(paths)) + for _, path := range paths { + files = append(files, DiscoveredFile{ + Path: path, + Agent: AgentCodex, + }) + } + return files +} + +// findCodexTestSourceFile resolves a Codex session UUID to a transcript path +// through the provider source set, replacing the removed FindCodexSourceFile. +func findCodexTestSourceFile(t *testing.T, root, sessionID string) string { + t.Helper() + return newCodexTestProvider(t, root).sources.findSourceFile(root, sessionID) +} + func assertToolResultEvents( t *testing.T, got []ParsedToolResultEvent, @@ -65,7 +128,7 @@ func TestParseCodexSession_UsesThreadNameFromSessionIndex(t *testing.T) { index := `{"id":"abc-123","thread_name":"Renamed from Codex","updated_at":"2026-06-11T17:34:20.3755243Z"}` + "\n" require.NoError(t, os.WriteFile(indexPath, []byte(index), 0o644)) - sess, msgs, err := ParseCodexSession(sessionPath, "local", false) + sess, msgs, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "Renamed from Codex", sess.SessionName) @@ -86,7 +149,7 @@ func TestParseCodexSession_LeavesSessionNameEmptyWithoutThreadName(t *testing.T) index := `{"id":"abc-123","updated_at":"2026-06-11T17:34:20.3755243Z"}` + "\n" require.NoError(t, os.WriteFile(indexPath, []byte(index), 0o644)) - sess, _, err := ParseCodexSession(sessionPath, "local", false) + sess, _, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Empty(t, sess.SessionName) @@ -106,7 +169,7 @@ func TestParseCodexSession_UsesThreadNameFromArchivedSessions(t *testing.T) { index := `{"id":"abc-123","thread_name":"Archived title","updated_at":"2026-06-11T17:34:20.3755243Z"}` + "\n" require.NoError(t, os.WriteFile(indexPath, []byte(index), 0o644)) - sess, _, err := ParseCodexSession(sessionPath, "local", false) + sess, _, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "Archived title", sess.SessionName) @@ -125,7 +188,7 @@ func TestParseCodexSession_MtimeIncludesSessionIndex(t *testing.T) { index := `{"id":"abc-123","thread_name":"Original","updated_at":"2026-06-11T17:34:20Z"}` + "\n" require.NoError(t, os.WriteFile(indexPath, []byte(index), 0o644)) - sess1, _, err := ParseCodexSession(sessionPath, "local", false) + sess1, _, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) mtime1 := sess1.File.Mtime @@ -135,7 +198,7 @@ func TestParseCodexSession_MtimeIncludesSessionIndex(t *testing.T) { require.NoError(t, os.WriteFile(indexPath, []byte(renamed), 0o644)) require.NoError(t, os.Chtimes(indexPath, future, future)) - sess2, _, err := ParseCodexSession(sessionPath, "local", false) + sess2, _, err := parseCodexTestSession(t, sessionPath, "local", false) require.NoError(t, err) assert.Greater(t, sess2.File.Mtime, mtime1, "mtime must advance when session_index.jsonl is updated") assert.Equal(t, "Renamed", sess2.SessionName) @@ -1345,7 +1408,7 @@ func TestParseCodexSessionFrom_ForkReplaySpansOffset(t *testing.T) { ) path := createTestFile(t, "fork-incremental.jsonl", initial) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "codex:"+forkID, sess.ID) @@ -1371,7 +1434,7 @@ func TestParseCodexSessionFrom_ForkReplaySpansOffset(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, _, _, err := ParseCodexSessionFrom(path, offset, 0, false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, 0, false) require.NoError(t, err) // Only the genuine turn survives; the replayed assistant answer @@ -1732,7 +1795,7 @@ func TestParseCodexSessionFrom_Incremental(t *testing.T) { path := createTestFile(t, "incremental.jsonl", initial) // Full parse to get baseline. - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "codex:inc-1", sess.ID) @@ -1762,7 +1825,7 @@ func TestParseCodexSessionFrom_Incremental(t *testing.T) { require.NoError(t, f.Close()) // Incremental parse from the offset. - newMsgs, endedAt, _, err := ParseCodexSessionFrom( + newMsgs, endedAt, _, err := parseCodexTestSessionFrom(t, path, offset, 1, false, ) require.NoError(t, err) @@ -1814,7 +1877,7 @@ func TestParseCodexSessionFrom_LateTokenCountRequiresFullParse(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 2, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 2, false) require.Error(t, err) assert.True(t, IsIncrementalFullParseFallback(err)) } @@ -1849,7 +1912,7 @@ func TestParseCodexSessionFrom_FunctionCallOutputRequiresFullParse(t *testing.T) require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 2, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 2, false) require.Error(t, err) assert.True(t, IsIncrementalFullParseFallback(err)) } @@ -1879,7 +1942,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "looking", tsEarlyS5), ) path := createTestFile(t, "incremental.jsonl", initial) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.Equal(t, 1, sess.UserMessageCount) require.Len(t, msgs, 2) @@ -1893,7 +1956,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "No issues found.", tsLateS5), )) - newMsgs, _, _, err := ParseCodexSessionFrom(path, offset, len(msgs), false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, len(msgs), false) require.NoError(t, err) require.Len(t, newMsgs, 2) assert.Equal(t, RoleUser, newMsgs[0].Role) @@ -1908,7 +1971,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "looking", tsEarlyS5), ) path := createTestFile(t, "incremental.jsonl", initial) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.Equal(t, 1, sess.UserMessageCount) require.Len(t, msgs, 2) @@ -1923,7 +1986,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "No issues found.", tsLateS5), )) - newMsgs, _, _, err := ParseCodexSessionFrom(path, offset, len(msgs), false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, len(msgs), false) require.NoError(t, err) require.Len(t, newMsgs, 1) assert.Equal(t, RoleAssistant, newMsgs[0].Role) @@ -1940,7 +2003,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { testjsonl.CodexMsgJSON("user", "something else entirely", "2024-01-01T10:00:03Z"), ) path := createTestFile(t, "incremental.jsonl", initial) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err) require.Equal(t, 2, sess.UserMessageCount) @@ -1950,7 +2013,7 @@ func TestParseCodexSessionFrom_DedupsReemittedPrompt(t *testing.T) { appendLines(t, path, testjsonl.CodexMsgJSON("user", prompt, tsLate)) - newMsgs, _, _, err := ParseCodexSessionFrom(path, offset, len(msgs), false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, len(msgs), false) require.NoError(t, err) require.Len(t, newMsgs, 1) assert.Equal(t, RoleUser, newMsgs[0].Role) @@ -1989,7 +2052,7 @@ func TestParseCodexSessionFrom_SkipsSessionMeta(t *testing.T) { f.WriteString(extra) f.Close() - newMsgs, _, _, err := ParseCodexSessionFrom( + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, 5, false, ) require.NoError(t, err) @@ -2013,7 +2076,7 @@ func TestParseCodexSessionFrom_NoNewData(t *testing.T) { offset := info.Size() // Parse from end of file — no new data. - newMsgs, endedAt, _, err := ParseCodexSessionFrom( + newMsgs, endedAt, _, err := parseCodexTestSessionFrom(t, path, offset, 10, false, ) require.NoError(t, err) @@ -2046,7 +2109,7 @@ func TestParseCodexSessionFrom_SubagentOutputRequiresFullParse(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 2, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 2, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2077,7 +2140,7 @@ func TestParseCodexSessionFrom_CollabAgentSpawnEndRequiresFullParse(t *testing.T require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 2, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 2, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2115,7 +2178,7 @@ func TestParseCodexSessionFrom_WaitCallRequiresFullParse(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 4, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 4, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2153,7 +2216,7 @@ func TestParseCodexSessionFrom_WaitAgentCallRequiresFullParse(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 4, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 4, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2179,7 +2242,7 @@ func TestParseCodexSessionFrom_SystemMessageDoesNotRequireFullParse(t *testing.T require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, endedAt, _, err := ParseCodexSessionFrom(path, offset, 1, false) + newMsgs, endedAt, _, err := parseCodexTestSessionFrom(t, path, offset, 1, false) require.NoError(t, err) assert.Equal(t, 0, len(newMsgs)) assert.False(t, endedAt.IsZero()) @@ -2210,7 +2273,7 @@ func TestParseCodexSessionFrom_RunningNotificationRequiresFullParse(t *testing.T require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 1, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 1, false) require.Error(t, err) assert.Contains(t, err.Error(), "full parse") } @@ -2236,7 +2299,7 @@ func TestParseCodexSessionFrom_NonSubagentFunctionOutputRequiresFullParse(t *tes require.NoError(t, err) require.NoError(t, f.Close()) - _, _, _, err = ParseCodexSessionFrom(path, offset, 1, false) + _, _, _, err = parseCodexTestSessionFrom(t, path, offset, 1, false) require.Error(t, err) assert.True(t, IsIncrementalFullParseFallback(err)) } @@ -2277,7 +2340,7 @@ func TestParseCodexSessionFrom_SeedsModelFromTurnContext( require.NoError(t, err) require.NoError(t, f2.Close()) - newMsgs2, _, _, err := ParseCodexSessionFrom( + newMsgs2, _, _, err := parseCodexTestSessionFrom(t, path, offset, 2, false, ) require.NoError(t, err) @@ -2324,7 +2387,7 @@ func TestParseCodexSessionFrom_SeedsBoundaryAfterTurnContext( require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, _, _, err := ParseCodexSessionFrom( + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, 0, false, ) require.NoError(t, err) @@ -2373,7 +2436,7 @@ func TestParseCodexSessionFrom_EmptyModelReset( require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, _, _, err := ParseCodexSessionFrom( + newMsgs, _, _, err := parseCodexTestSessionFrom(t, path, offset, 2, false, ) require.NoError(t, err) diff --git a/internal/parser/codex_provider.go b/internal/parser/codex_provider.go new file mode 100644 index 000000000..eafa51d94 --- /dev/null +++ b/internal/parser/codex_provider.go @@ -0,0 +1,642 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "slices" + "strings" +) + +var _ Provider = (*codexProvider)(nil) + +type codexProviderFactory struct { + def AgentDef +} + +func newCodexProviderFactory(def AgentDef) ProviderFactory { + return codexProviderFactory{def: cloneAgentDef(def)} +} + +func (f codexProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f codexProviderFactory) Capabilities() Capabilities { + return codexProviderCapabilities() +} + +func (f codexProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &codexProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: codexProviderCapabilities(), + Config: cfg, + }, + sources: newCodexSourceSet(cfg.Roots), + } +} + +type codexProvider struct { + ProviderBase + sources codexSourceSet +} + +func (p *codexProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *codexProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *codexProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *codexProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +// AllSourcePathsForUUID returns every on-disk Codex transcript path under the +// provider's roots whose filename carries the given session UUID, without the +// live-over-archived deduplication Discover applies. A UUID can exist as both a +// live dated copy and a flat archived copy under the same root; the sync engine +// uses the full set so an mtime cutoff can judge each copy independently. +func (p *codexProvider) AllSourcePathsForUUID(uuid string) []string { + if uuid == "" { + return nil + } + seen := make(map[string]struct{}) + var paths []string + for _, root := range p.sources.roots { + for _, path := range p.sources.discoverSessionPaths(root) { + if CodexSessionUUIDFromFilename(filepath.Base(path)) != uuid { + continue + } + clean := filepath.Clean(path) + if _, ok := seen[clean]; ok { + continue + } + seen[clean] = struct{}{} + paths = append(paths, path) + } + } + return paths +} + +// SourceRefForPath builds a SourceRef pinned to the exact transcript path, +// without live-over-archived canonicalization. Discovery, raw-ID lookup, and +// fresh stored-source lookup still prefer the live dated transcript, but +// changed-path events and non-fresh stored paths preserve the already-selected +// on-disk copy. The sync engine uses this when its DB-aware or mtime-aware +// logic has chosen a duplicated Codex UUID source (e.g. a stored archived copy +// or the cutoff-newer copy), so that choice is honored instead of being flipped +// back to the preferred dated layout. Returns false when the path is not a +// recognizable Codex source. +func (p *codexProvider) SourceRefForPath(path string) (SourceRef, bool) { + for _, root := range p.sources.roots { + if source, ok := p.sources.sourceRef(root, path, true); ok { + return source, true + } + if source, ok := p.sources.directPathSource(root, path, true); ok { + return source, true + } + } + return SourceRef{}, false +} + +func (p *codexProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *codexProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("codex source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, machine, false) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + // Codex transcripts are append-only and the provider always emits a full + // parse (it does not advertise incremental append). A full parse is the + // authoritative message set, so force-replace the stored rows; this + // preserves the legacy behavior where a late token_count line appended to + // an existing turn rewrites the stored message instead of being dropped by + // an append-only write. + ForceReplace: true, + }, nil +} + +func (p *codexProvider) ParseIncremental( + ctx context.Context, + _ IncrementalRequest, +) (IncrementalOutcome, IncrementalStatus, error) { + if err := ctx.Err(); err != nil { + return IncrementalOutcome{}, IncrementalUnsupported, err + } + return IncrementalOutcome{}, IncrementalUnsupported, nil +} + +type codexSource struct { + Root string + Path string + UUID string + Layout CodexLayout +} + +type codexSourceSet struct { + roots []string +} + +func newCodexSourceSet(roots []string) codexSourceSet { + return codexSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s codexSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + return s.discover(ctx, func(string) bool { return true }) +} + +func (s codexSourceSet) discover( + ctx context.Context, + includeRoot func(string) bool, +) ([]SourceRef, error) { + var sources []SourceRef + byKey := make(map[string]SourceRef) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + if !includeRoot(root) { + continue + } + if strings.HasPrefix(root, "s3://") { + // s3:// roots have no local layout to walk: enumerate the objects + // directly and carry each one's durable metadata in the Opaque + // payload. Each object is its own session keyed by URI, so the + // live-over-archived preference (which inspects a local codexSource + // layout) does not apply here. + for _, file := range discoverCodexS3(root) { + source := s3SourceRefFromDiscoveredFile(file) + if _, ok := byKey[source.Key]; ok { + continue + } + byKey[source.Key] = source + } + continue + } + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path, true) + if !ok { + source, ok = s.directPathSource(root, path, true) + } + if !ok { + continue + } + if current, ok := byKey[source.Key]; ok && + !preferCodexSource(source, current) { + continue + } + byKey[source.Key] = source + } + } + for _, source := range byKey { + sources = append(sources, source) + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Codex JSONL session file paths under +// sessionsDir, covering both the standard year/month/day layout and a flat +// archived directory. Paths are returned sorted for deterministic discovery, +// matching the behavior the package-level entrypoint provided before the fold. +func (s codexSourceSet) discoverSessionPaths(sessionsDir string) []string { + var paths []string + + entries, err := os.ReadDir(sessionsDir) + if err != nil { + return nil + } + for _, entry := range entries { + if entry.IsDir() { + continue + } + if !isCodexSessionFilename(entry.Name()) { + continue + } + paths = append(paths, filepath.Join(sessionsDir, entry.Name())) + } + + walkCodexDayDirs(sessionsDir, func(dayPath string) bool { + dayEntries, err := os.ReadDir(dayPath) + if err != nil { + return true + } + for _, sf := range dayEntries { + if sf.IsDir() { + continue + } + if !isCodexSessionFilename(sf.Name()) { + continue + } + paths = append(paths, filepath.Join(dayPath, sf.Name())) + } + return true + }) + + slices.Sort(paths) + return paths +} + +// findSourceFile resolves a Codex session file by UUID under sessionsDir. +// It prefers the standard year/month/day live path when present, then falls +// back to a flat archived directory entry, matching the lookup precedence the +// package-level entrypoint provided before the fold. +func (s codexSourceSet) findSourceFile(sessionsDir, sessionID string) string { + if !IsValidSessionID(sessionID) { + return "" + } + + var archived string + entries, err := os.ReadDir(sessionsDir) + if err == nil { + for _, f := range entries { + if f.IsDir() { + continue + } + name := f.Name() + if !isCodexSessionFilename(name) { + continue + } + if extractUUIDFromRollout(name) == sessionID { + archived = filepath.Join(sessionsDir, name) + break + } + } + } + + var live string + walkCodexDayDirs(sessionsDir, func(dayPath string) bool { + if live != "" { + return false + } + dayEntries, err := os.ReadDir(dayPath) + if err != nil { + return true + } + for _, f := range dayEntries { + if f.IsDir() { + continue + } + name := f.Name() + if !isCodexSessionFilename(name) { + continue + } + if extractUUIDFromRollout(name) == sessionID { + live = filepath.Join(dayPath, name) + return false + } + } + return true + }) + if live != "" { + return live + } + return archived +} + +func (s codexSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)*2) + seenShallow := make(map[string]struct{}) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"*.jsonl"}, + DebounceKey: string(AgentCodex) + ":sessions:" + root, + }) + for _, shallow := range ResolveCodexShallowWatchRoots(root) { + shallow = filepath.Clean(shallow) + if _, ok := seenShallow[shallow]; ok { + continue + } + seenShallow[shallow] = struct{}{} + roots = append(roots, WatchRoot{ + Path: shallow, + Recursive: false, + IncludeGlobs: []string{CodexSessionIndexFilename}, + DebounceKey: string(AgentCodex) + ":index:" + shallow, + }) + } + } + return WatchPlan{Roots: roots}, nil +} + +func (s codexSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if filepath.Base(req.Path) == CodexSessionIndexFilename { + return s.sourcesForIndexPath(ctx, req.Path) + } + for _, root := range s.roots { + source, ok := s.sourceRef(root, req.Path, true) + if !ok { + source, ok = s.directPathSource(root, req.Path, true) + } + if ok { + return []SourceRef{source}, nil + } + if !jsonlMissingPathFallbackAllowed(req) { + continue + } + source, ok = s.sourceRef(root, req.Path, false) + if !ok { + source, ok = s.directPathSource(root, req.Path, false) + } + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s codexSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path, true); ok { + if !req.RequireFreshSource || req.PreferStoredSource { + return source, true, nil + } + return s.canonicalSource(ctx, source) + } + if source, ok := s.directPathSource(root, path, true); ok { + if !req.RequireFreshSource || req.PreferStoredSource { + return source, true, nil + } + return s.canonicalSource(ctx, source) + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path, true); ok { + return s.canonicalSource(ctx, source) + } + } + return SourceRef{}, false, nil +} + +func (s codexSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("codex source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash, err := hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: CodexEffectiveMtime(path, info.ModTime().UnixNano()), + Hash: hash, + }, nil +} + +func (s codexSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case codexSource: + return src.Path, src.Path != "" + case *codexSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate, true); ok { + src := ref.Opaque.(codexSource) + return src.Path, true + } + if ref, ok := s.directPathSource(root, candidate, true); ok { + src := ref.Opaque.(codexSource) + return src.Path, true + } + } + } + return "", false +} + +func (s codexSourceSet) sourcesForIndexPath( + ctx context.Context, + indexPath string, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + indexDir := filepath.Dir(indexPath) + return s.discover(ctx, func(root string) bool { + return filepath.Dir(root) == indexDir + }) +} + +func (s codexSourceSet) sourceRef( + root string, + path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + layout, uuid, ok := CodexSessionPathInfo(root, path) + if !ok || uuid == "" { + return SourceRef{}, false + } + if requireRegular && !IsRegularFile(path) { + return SourceRef{}, false + } + return SourceRef{ + Provider: AgentCodex, + Key: codexSourceKey(uuid), + DisplayPath: path, + FingerprintKey: path, + Opaque: codexSource{ + Root: root, + Path: path, + UUID: uuid, + Layout: layout, + }, + }, true +} + +func (s codexSourceSet) directPathSource( + root string, + path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !strings.HasSuffix(path, ".jsonl") || !pathUnderRoot(root, path) { + return SourceRef{}, false + } + if requireRegular && !IsRegularFile(path) { + return SourceRef{}, false + } + return SourceRef{ + Provider: AgentCodex, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: codexSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s codexSourceSet) canonicalSource( + ctx context.Context, + source SourceRef, +) (SourceRef, bool, error) { + src, ok := source.Opaque.(codexSource) + if !ok || src.UUID == "" { + return source, true, nil + } + best := source + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + path := s.findSourceFile(root, src.UUID) + if path == "" { + continue + } + candidate, ok := s.sourceRef(root, path, true) + if !ok { + continue + } + if preferCodexSource(candidate, best) { + best = candidate + } + } + return best, true, nil +} + +func codexSourceKey(uuid string) string { + return string(AgentCodex) + ":" + uuid +} + +func preferCodexSource(candidate, current SourceRef) bool { + cand := candidate.Opaque.(codexSource) + curr := current.Opaque.(codexSource) + if cand.Layout != curr.Layout { + return cand.Layout == CodexLayoutDated + } + return candidate.DisplayPath < current.DisplayPath +} + +func codexProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Relationships: CapabilitySupported, + Subagents: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + ToolResultEvents: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + TerminationStatus: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/codex_provider_test.go b/internal/parser/codex_provider_test.go new file mode 100644 index 000000000..f86267e43 --- /dev/null +++ b/internal/parser/codex_provider_test.go @@ -0,0 +1,368 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestCodexProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCodex) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCodexProviderSourceMethods(t *testing.T) { + base := t.TempDir() + root := filepath.Join(base, "sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e1" + sourcePath := writeCodexProviderSession(t, root, uuid, "Rename me") + indexPath := filepath.Join(base, CodexSessionIndexFilename) + require.NoError(t, os.WriteFile(indexPath, []byte( + `{"id":"`+uuid+`","thread_name":"Renamed title","updated_at":"2026-06-11T17:34:20Z"}`+"\n", + ), 0o644)) + newer := time.Now().Add(time.Hour) + require.NoError(t, os.Chtimes(indexPath, newer, newer)) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, base, plan.Roots[1].Path) + assert.False(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{CodexSessionIndexFilename}, plan.Roots[1].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + source := discovered[0] + assert.Equal(t, AgentCodex, source.Provider) + assert.Equal(t, sourcePath, source.DisplayPath) + assert.Equal(t, sourcePath, source.FingerprintKey) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~codex:" + uuid, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + for _, path := range []string{sourcePath, indexPath} { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + } + + info, err := os.Stat(sourcePath) + require.NoError(t, err) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Equal(t, info.Size(), fingerprint.Size) + assert.Equal(t, newer.UnixNano(), fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "codex:"+uuid, result.Result.Session.ID) + assert.Equal(t, AgentCodex, result.Result.Session.Agent) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, "api", result.Result.Session.Project) + assert.Equal(t, "Renamed title", result.Result.Session.SessionName) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 1) +} + +func TestCodexProviderDoesNotAdvertiseIncrementalAppend(t *testing.T) { + root := t.TempDir() + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e2" + writeCodexProviderSession(t, root, uuid, "hello") + + provider, ok := NewProvider(AgentCodex, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + assert.Equal(t, + CapabilityNotApplicable, + provider.Capabilities().Source.IncrementalAppend, + ) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "codex:" + uuid, + }) + require.NoError(t, err) + require.True(t, ok) + + outcome, status, err := provider.ParseIncremental( + context.Background(), + IncrementalRequest{ + Source: source, + Fingerprint: SourceFingerprint{}, + SessionID: "codex:" + uuid, + Offset: 0, + StartOrdinal: 1, + }, + ) + require.NoError(t, err) + assert.Equal(t, IncrementalUnsupported, status) + assert.Empty(t, outcome.Messages) +} + +func TestCodexProviderDiscoverDedupesLiveAndArchivedByUUID(t *testing.T) { + base := t.TempDir() + liveRoot := filepath.Join(base, "sessions") + archivedRoot := filepath.Join(base, "archived_sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e5" + livePath := writeCodexProviderSession(t, liveRoot, uuid, "live") + archivedPath := writeCodexProviderArchivedSession( + t, archivedRoot, uuid, "archived", + ) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{archivedRoot, liveRoot}, + }) + require.True(t, ok) + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, livePath, discovered[0].DisplayPath) + assert.NotEqual(t, archivedPath, discovered[0].DisplayPath) +} + +func TestCodexProviderFindSourcePinsExactArchivedDuplicate(t *testing.T) { + base := t.TempDir() + liveRoot := filepath.Join(base, "sessions") + archivedRoot := filepath.Join(base, "archived_sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e6" + livePath := writeCodexProviderSession(t, liveRoot, uuid, "live") + archivedPath := writeCodexProviderArchivedSession( + t, archivedRoot, uuid, "archived", + ) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{archivedRoot, liveRoot}, + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: archivedPath, + FullSessionID: "codex:" + uuid, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, archivedPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "codex:" + uuid, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, livePath, found.DisplayPath) +} + +func TestCodexProviderFindSourceAcceptsLegacyShapedStoredPath(t *testing.T) { + root := t.TempDir() + sessionID := "test-uuid" + sourcePath := filepath.Join( + root, + "2024", + "01", + "15", + "rollout-20240115-"+sessionID+".jsonl", + ) + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + sessionID, + "/home/user/code/api", + "codex_cli_rs", + tsEarly, + ), + testjsonl.CodexMsgJSON("user", "Add tests", tsEarlyS1), + ) + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile(sourcePath, []byte(content), 0o644)) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + source, found, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: sourcePath, + FingerprintKey: sourcePath, + }) + require.NoError(t, err) + require.True(t, found) + assert.Equal(t, AgentCodex, source.Provider) + assert.Equal(t, sourcePath, source.DisplayPath) + assert.Equal(t, sourcePath, source.FingerprintKey) + + fingerprint, err := provider.Fingerprint(context.Background(), source) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Fingerprint: fingerprint, + Machine: "devbox", + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, "codex:"+sessionID, result.Result.Session.ID) + assert.Equal(t, "api", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 1) +} + +func TestCodexProviderChangedPathPinsArchivedDuplicate(t *testing.T) { + base := t.TempDir() + liveRoot := filepath.Join(base, "sessions") + archivedRoot := filepath.Join(base, "archived_sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e7" + _ = writeCodexProviderSession(t, liveRoot, uuid, "live") + archivedPath := writeCodexProviderArchivedSession( + t, archivedRoot, uuid, "archived", + ) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{archivedRoot, liveRoot}, + }) + require.True(t, ok) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: archivedPath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, archivedPath, changed[0].DisplayPath) +} + +func TestCodexProviderChangedPathClassifiesRemovedTranscript(t *testing.T) { + root := t.TempDir() + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e8" + sourcePath := writeCodexProviderSession(t, root, uuid, "remove") + provider, ok := NewProvider(AgentCodex, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + require.NoError(t, os.Remove(sourcePath)) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestCodexProviderIndexPathClassifiesAllSiblingSources(t *testing.T) { + base := t.TempDir() + root := filepath.Join(base, "sessions") + firstUUID := "019eb791-cf7d-75c1-8439-9ed74c1229e9" + secondUUID := "019eb791-cf7d-75c1-8439-9ed74c1229ea" + firstPath := writeCodexProviderSession(t, root, firstUUID, "first") + secondPath := writeCodexProviderSession(t, root, secondUUID, "second") + indexPath := filepath.Join(base, CodexSessionIndexFilename) + require.NoError(t, os.WriteFile(indexPath, []byte( + `{"id":"`+firstUUID+`","thread_name":"Only first remains","updated_at":"2026-06-11T17:34:20Z"}`+"\n", + ), 0o644)) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: indexPath, EventKind: "write"}, + ) + require.NoError(t, err) + assert.Equal(t, []string{firstPath, secondPath}, sourceDisplayPaths(changed)) +} + +func writeCodexProviderSession( + t *testing.T, + root, uuid, prompt string, +) string { + t.Helper() + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON(uuid, "/home/user/code/api", "codex_cli_rs", tsEarly), + testjsonl.CodexMsgJSON("user", prompt, tsEarlyS1), + ) + return writeCodexProviderSessionContent(t, root, uuid, content) +} + +func writeCodexProviderArchivedSession( + t *testing.T, + root, uuid, prompt string, +) string { + t.Helper() + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON(uuid, "/home/user/code/archive", "codex_cli_rs", tsEarly), + testjsonl.CodexMsgJSON("user", prompt, tsEarlyS1), + ) + path := filepath.Join(root, "rollout-2026-06-11T12-44-06-"+uuid+".jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + return path +} + +func writeCodexProviderSessionContent( + t *testing.T, + root, uuid, content string, +) string { + t.Helper() + path := filepath.Join( + root, + "2026", + "06", + "11", + "rollout-2026-06-11T12-44-06-"+uuid+".jsonl", + ) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + return path +} diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index 57ee77d36..0c4c35be2 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -333,10 +333,12 @@ func readCopilotWorkspaceName(eventsPath string) string { return "" } -// ParseCopilotSession parses a Copilot JSONL session file. -// Returns (nil, nil, nil, nil) if the file doesn't exist or -// contains no user/assistant messages. -func ParseCopilotSession( +// parseSession parses a Copilot JSONL session file into the session, messages, +// and usage events the provider consumes. Returns (nil, nil, nil, nil) if the +// file doesn't exist or contains no user/assistant messages. This is the +// provider-owned parse entrypoint; the package-level free function was folded +// onto the provider. +func (p *copilotProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) diff --git a/internal/parser/copilot_ide_provider_test.go b/internal/parser/copilot_ide_provider_test.go new file mode 100644 index 000000000..1d7aa9ce7 --- /dev/null +++ b/internal/parser/copilot_ide_provider_test.go @@ -0,0 +1,416 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestCopilotIDEProvidersOwnLegacyEntrypoints guards the fold: the +// provider-specific Discover/Find/Parse free functions (and the Visual Studio +// virtual-path splitter) must stay deleted, and neither the provider files nor +// their legacy source files may reach back into them as a shim. Discovery and +// lookup live on the provider source sets; parse lives on the provider methods; +// the Visual Studio virtual-path resolution is reproduced via the +// provider-neutral ParseVirtualSourcePath helper. +func TestCopilotIDEProvidersOwnLegacyEntrypoints(t *testing.T) { + files := map[string]string{} + for _, name := range []string{ + "discovery.go", + "vscode_copilot.go", + "vscode_copilot_provider.go", + "visualstudio_copilot.go", + "visualstudio_copilot_provider.go", + } { + data, err := os.ReadFile(name) + require.NoError(t, err) + files[name] = string(data) + } + + deletedSymbols := []string{ + "func DiscoverVSCodeCopilotSessions", + "func FindVSCodeCopilotSourceFile", + "func ParseVSCodeCopilotSession", + "func DiscoverVisualStudioCopilotSessions", + "func FindVisualStudioCopilotSourceFile", + "func ParseVisualStudioCopilotConversation", + "func ParseVisualStudioCopilotSession", + "func ParseVisualStudioCopilotVirtualPath", + } + for name, src := range files { + for _, symbol := range deletedSymbols { + assert.NotContainsf(t, src, symbol, "%s still defines %s", name, symbol) + } + } + + deletedCalls := []string{ + "DiscoverVSCodeCopilotSessions(", + "FindVSCodeCopilotSourceFile(", + "ParseVSCodeCopilotSession(", + "DiscoverVisualStudioCopilotSessions(", + "FindVisualStudioCopilotSourceFile(", + "ParseVisualStudioCopilotConversation(", + "ParseVisualStudioCopilotSession(", + "ParseVisualStudioCopilotVirtualPath(", + } + for _, providerFile := range []string{ + "vscode_copilot_provider.go", + "visualstudio_copilot_provider.go", + } { + for _, call := range deletedCalls { + assert.NotContainsf( + t, files[providerFile], call, + "%s references removed legacy entrypoint %s", providerFile, call, + ) + } + } +} + +func TestCopilotIDEProviderFactoriesReplaceLegacyAdapter(t *testing.T) { + for _, agent := range []AgentType{AgentVSCodeCopilot, AgentVSCopilot} { + t.Run(string(agent), func(t *testing.T) { + factory, ok := ProviderFactoryByType(agent) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(agent, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) + }) + } +} + +func TestVSCodeCopilotProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "vscode-provider" + hashDir := filepath.Join(root, "workspaceStorage", "workspace-hash") + chatDir := filepath.Join(hashDir, "chatSessions") + jsonPath := filepath.Join(chatDir, sessionID+".json") + jsonlPath := filepath.Join(chatDir, sessionID+".jsonl") + writeSourceFile(t, filepath.Join(hashDir, "workspace.json"), + `{"folder":"file:///Users/alice/code/copilot-app"}`) + writeSourceFile(t, jsonPath, `{"version":3,"sessionId":"`+sessionID+`","requests":[]}`) + writeSourceFile(t, jsonlPath, strings.Join([]string{ + `{"kind":0,"v":{"version":3,"sessionId":"` + sessionID + `","creationDate":1770650022790,"requests":[]}}`, + `{"kind":2,"k":["requests"],"v":[{"requestId":"req1","timestamp":1770650031889,"message":{"text":"Hello VS Code","parts":[]},"response":[{"value":"Hi from VS Code"}],"modelId":"copilot/claude-opus-4.8","result":{"metadata":{"promptTokens":42,"outputTokens":7,"resolvedModel":"claude-opus-4-8"}}}]}`, + }, "\n")+"\n") + + provider, ok := NewProvider(AgentVSCodeCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, filepath.Join(root, "workspaceStorage"), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, filepath.Join(root, "globalStorage"), plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, jsonlPath, discovered[0].DisplayPath) + assert.Equal(t, "copilot-app", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~vscode-copilot:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, jsonlPath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: jsonlPath, EventKind: "write", WatchRoot: filepath.Join(root, "workspaceStorage")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, jsonlPath, changed[0].DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, jsonlPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + require.False(t, outcome.ForceReplace) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "vscode-copilot:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentVSCodeCopilot, result.Result.Session.Agent) + assert.Equal(t, "copilot-app", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, fingerprint.Size, result.Result.Session.File.Size) + assert.Equal(t, fingerprint.MTimeNS, result.Result.Session.File.Mtime) + assert.Len(t, result.Result.Messages, 2) + require.Len(t, result.Result.UsageEvents, 1) + assert.Equal(t, "vscode-copilot", result.Result.UsageEvents[0].Source) + assert.Equal(t, "claude-opus-4-8", result.Result.UsageEvents[0].Model) + assert.Equal(t, 42, result.Result.UsageEvents[0].InputTokens) + assert.Equal(t, 7, result.Result.UsageEvents[0].OutputTokens) +} + +func TestVSCodeCopilotProviderClassifiesDeletedAndMetadataPaths(t *testing.T) { + root := t.TempDir() + hashDir := filepath.Join(root, "workspaceStorage", "workspace-hash") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + jsonlPath := filepath.Join(chatDir, "deleted-jsonl.jsonl") + jsonPath := filepath.Join(chatDir, "fallback-json.json") + globalPath := filepath.Join( + root, + "globalStorage", + "emptyWindowChatSessions", + "deleted-global.json", + ) + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/copilot-app"}`) + writeSourceFile(t, jsonlPath, vscodeCopilotProviderJSONL("deleted-jsonl", "Hello deleted")) + writeSourceFile(t, jsonPath, vscodeCopilotProviderJSON("fallback-json", "Hello fallback")) + writeSourceFile(t, globalPath, vscodeCopilotProviderJSON("deleted-global", "Hello global")) + + provider, ok := NewProvider(AgentVSCodeCopilot, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + metadataChanged, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: workspacePath, EventKind: "write"}, + ) + require.NoError(t, err) + assert.ElementsMatch(t, + []string{jsonlPath, jsonPath}, + sourceDisplayPaths(metadataChanged), + ) + require.Len(t, metadataChanged, 2) + beforeMetadata, err := provider.Fingerprint(context.Background(), metadataChanged[0]) + require.NoError(t, err) + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/copilot-renamed-app"}`) + afterMetadata, err := provider.Fingerprint(context.Background(), metadataChanged[0]) + require.NoError(t, err) + assert.NotEqual(t, beforeMetadata.Hash, afterMetadata.Hash) + + require.NoError(t, os.Remove(jsonlPath)) + deletedJSONL, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: jsonlPath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, deletedJSONL, 1) + assert.Equal(t, jsonlPath, deletedJSONL[0].DisplayPath) + + require.NoError(t, os.Remove(globalPath)) + deletedGlobal, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: globalPath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, deletedGlobal, 1) + assert.Equal(t, globalPath, deletedGlobal[0].DisplayPath) +} + +func TestVisualStudioCopilotProviderSourceMethods(t *testing.T) { + root := t.TempDir() + conversationID := "4a8f63f6-7626-4416-a874-fc7bd2c3f005" + tracePath := filepath.Join( + root, + "20260612T194439_257709a3_VSGitHubCopilot_traces.jsonl", + ) + writeSourceFile(t, tracePath, strings.Join([]string{ + vsCopilotTraceLineJSON(conversationID, + "execute_tool run_command_in_terminal", + "1781293588624985000", "1781293588769581200", + map[string]string{ + "gen_ai.tool.name": "run_command_in_terminal", + "gen_ai.tool.call.id": "call_123", + "gen_ai.tool.call.arguments": `{"command":"go test ./..."}`, + "gen_ai.tool.call.result": `{"Value":"ok"}`, + }), + vsCopilotTraceLineJSON(conversationID, + "invoke_agent GitHub Copilot", + "1781293600000000000", "1781293610000000000", + map[string]string{ + "gen_ai.agent.name": "GitHub Copilot", + "gen_ai.request.model": "gpt-5.5", + "copilot_chat.mode": "Agent", + "copilot_chat.turn_count": "1", + }), + }, "\n")+"\n") + + provider, ok := NewProvider(AgentVSCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*_VSGitHubCopilot_traces.jsonl"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + virtualPath := VisualStudioCopilotVirtualPath(tracePath, conversationID) + assert.Equal(t, virtualPath, discovered[0].DisplayPath) + assert.Equal(t, "visualstudio", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: conversationID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, virtualPath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tracePath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tracePath, changed[0].DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, virtualPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + foundWithProject := found + foundWithProject.ProjectHint = "stored-solution" + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: foundWithProject, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.True(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "visualstudio-copilot:"+conversationID, result.Result.Session.ID) + assert.Equal(t, AgentVSCopilot, result.Result.Session.Agent) + assert.Equal(t, "stored-solution", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 1) +} + +func TestVisualStudioCopilotProviderClassifiesDeletedTraceAndFansOutPhysicalTrace( + t *testing.T, +) { + root := t.TempDir() + firstConversationID := "4a8f63f6-7626-4416-a874-fc7bd2c3f005" + secondConversationID := "5b9f63f6-7626-4416-a874-fc7bd2c3f006" + tracePath := filepath.Join( + root, + "20260612T194439_257709a3_VSGitHubCopilot_traces.jsonl", + ) + writeSourceFile(t, tracePath, strings.Join([]string{ + vsCopilotTraceLineJSON(firstConversationID, + "execute_tool run_command_in_terminal", + "1781293588624985000", "1781293588769581200", + map[string]string{ + "gen_ai.tool.name": "run_command_in_terminal", + "gen_ai.tool.call.id": "call_123", + "gen_ai.tool.call.arguments": `{"command":"go test ./..."}`, + "gen_ai.tool.call.result": `{"Value":"ok"}`, + }), + vsCopilotTraceLineJSON(secondConversationID, + "execute_tool run_command_in_terminal", + "1781293688624985000", "1781293688769581200", + map[string]string{ + "gen_ai.tool.name": "run_command_in_terminal", + "gen_ai.tool.call.id": "call_456", + "gen_ai.tool.call.arguments": `{"command":"go vet ./..."}`, + "gen_ai.tool.call.result": `{"Value":"ok"}`, + }), + }, "\n")+"\n") + + provider, ok := NewProvider(AgentVSCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{ + VisualStudioCopilotVirtualPath(tracePath, firstConversationID), + VisualStudioCopilotVirtualPath(tracePath, secondConversationID), + }, sourceDisplayPaths(discovered)) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tracePath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tracePath, changed[0].DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: changed[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 2) + assert.ElementsMatch(t, []string{ + "visualstudio-copilot:" + firstConversationID, + "visualstudio-copilot:" + secondConversationID, + }, parseOutcomeSessionIDs(outcome)) + + require.NoError(t, os.Remove(tracePath)) + deleted, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tracePath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, deleted, 1) + assert.Equal(t, tracePath, deleted[0].DisplayPath) +} + +func vscodeCopilotProviderJSON(sessionID, prompt string) string { + return `{"version":3,"sessionId":"` + sessionID + `","creationDate":1770650022790,"requests":[{"requestId":"req1","timestamp":1770650031889,"message":{"text":"` + prompt + `","parts":[]},"response":[{"value":"Hi from VS Code"}],"modelId":"copilot/gpt-4o"}]}` +} + +func vscodeCopilotProviderJSONL(sessionID, prompt string) string { + return strings.Join([]string{ + `{"kind":0,"v":{"version":3,"sessionId":"` + sessionID + `","creationDate":1770650022790,"requests":[]}}`, + `{"kind":2,"k":["requests"],"v":[{"requestId":"req1","timestamp":1770650031889,"message":{"text":"` + prompt + `","parts":[]},"response":[{"value":"Hi from VS Code"}],"modelId":"copilot/gpt-4o"}]}`, + }, "\n") + "\n" +} + +func parseOutcomeSessionIDs(outcome ParseOutcome) []string { + ids := make([]string, 0, len(outcome.Results)) + for _, result := range outcome.Results { + ids = append(ids, result.Result.Session.ID) + } + return ids +} diff --git a/internal/parser/copilot_ide_test_helpers_test.go b/internal/parser/copilot_ide_test_helpers_test.go new file mode 100644 index 000000000..3dad2913f --- /dev/null +++ b/internal/parser/copilot_ide_test_helpers_test.go @@ -0,0 +1,113 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +// newVSCodeCopilotTestProvider builds a concrete vscodeCopilotProvider for the +// given roots so package tests can exercise the folded parse, discovery, and +// source-lookup behavior directly through provider-owned methods. +func newVSCodeCopilotTestProvider( + t *testing.T, roots ...string, +) *vscodeCopilotProvider { + t.Helper() + provider, ok := NewProvider(AgentVSCodeCopilot, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + p, ok := provider.(*vscodeCopilotProvider) + require.True(t, ok) + return p +} + +// parseVSCodeCopilotTestSession parses a VSCode Copilot session file through +// the provider-owned parse method, replacing the removed package-level +// ParseVSCodeCopilotSession entrypoint. +func parseVSCodeCopilotTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newVSCodeCopilotTestProvider(t).parseSession(path, project, machine) +} + +// discoverVSCodeCopilotTestSessions discovers VSCode Copilot session files +// under root through the provider source set, returning the legacy +// DiscoveredFile shape the tests assert against. +func discoverVSCodeCopilotTestSessions( + t *testing.T, root string, +) []DiscoveredFile { + t.Helper() + return newVSCodeCopilotTestProvider(t, root).sources.discoverSessionFiles(root) +} + +// findVSCodeCopilotTestSourceFile resolves a raw VSCode Copilot session ID to a +// session file through the provider source set, replacing the removed +// FindVSCodeCopilotSourceFile. +func findVSCodeCopilotTestSourceFile( + t *testing.T, root, rawID string, +) string { + t.Helper() + return newVSCodeCopilotTestProvider(t, root).sources.findSourceFile(root, rawID) +} + +// parseVisualStudioCopilotTestConversation parses one Visual Studio Copilot +// conversation through the folded free function, replacing the removed +// package-level ParseVisualStudioCopilotConversation entrypoint. +func parseVisualStudioCopilotTestConversation( + t *testing.T, tracePath, conversationID, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return parseVisualStudioCopilotConversation( + tracePath, conversationID, project, machine, + ) +} + +// parseVisualStudioCopilotTestSession reproduces the removed package-level +// ParseVisualStudioCopilotSession entrypoint. The path may be a real trace file +// or a # virtual path; a real trace file resolves to +// its first conversation. +func parseVisualStudioCopilotTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + if tracePath, conversationID, ok := + splitVisualStudioCopilotVirtualPath(path); ok { + return parseVisualStudioCopilotConversation( + tracePath, conversationID, project, machine, + ) + } + if !IsVisualStudioCopilotTraceFile(path) { + return nil, nil, nil + } + ids, err := VisualStudioCopilotFileConversationIDs(path) + if err != nil { + return nil, nil, err + } + if len(ids) == 0 { + return nil, nil, nil + } + return parseVisualStudioCopilotConversation(path, ids[0], project, machine) +} + +// discoverVisualStudioCopilotTestSessions discovers Visual Studio Copilot +// session work items under root, replacing the removed +// DiscoverVisualStudioCopilotSessions. +func discoverVisualStudioCopilotTestSessions( + t *testing.T, root string, +) []DiscoveredFile { + t.Helper() + return discoverVisualStudioCopilotSessionFilesUnderRoot(root) +} + +// findVisualStudioCopilotTestSourceFile resolves a raw Visual Studio Copilot +// conversation ID to a conversation-scoped virtual path, replacing the removed +// FindVisualStudioCopilotSourceFile. +func findVisualStudioCopilotTestSourceFile( + t *testing.T, root, rawID string, +) string { + t.Helper() + return findVisualStudioCopilotSourceFile(root, rawID) +} diff --git a/internal/parser/copilot_provider.go b/internal/parser/copilot_provider.go new file mode 100644 index 000000000..2d7d4b5ca --- /dev/null +++ b/internal/parser/copilot_provider.go @@ -0,0 +1,489 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*copilotProvider)(nil) + +type copilotProviderFactory struct { + def AgentDef +} + +func newCopilotProviderFactory(def AgentDef) ProviderFactory { + return copilotProviderFactory{def: cloneAgentDef(def)} +} + +func (f copilotProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f copilotProviderFactory) Capabilities() Capabilities { + return copilotProviderCapabilities() +} + +func (f copilotProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &copilotProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: copilotProviderCapabilities(), + Config: cfg, + }, + sources: newCopilotSourceSet(cfg.Roots), + } +} + +type copilotProvider struct { + ProviderBase + sources copilotSourceSet +} + +func (p *copilotProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *copilotProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *copilotProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *copilotProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *copilotProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *copilotProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("copilot source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, usage, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + sess.UsageEvents = usage + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + UsageEvents: usage, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type copilotSource struct { + Root string + Path string +} + +type copilotSourceSet struct { + roots []string +} + +func newCopilotSourceSet(roots []string) copilotSourceSet { + return copilotSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s copilotSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Copilot session file paths under +// /session-state/. It supports both the bare layout (.jsonl) and +// the directory layout (/events.jsonl); when both exist for the same +// session, the directory layout wins and the bare file is dropped so a session +// is not discovered twice. +func (s copilotSourceSet) discoverSessionPaths(root string) []string { + if root == "" { + return nil + } + + stateDir := filepath.Join(root, copilotStateDir) + entries, err := os.ReadDir(stateDir) + if err != nil { + return nil + } + + dirs := make(map[string]struct{}) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + eventsPath := filepath.Join(stateDir, entry.Name(), "events.jsonl") + if _, err := os.Stat(eventsPath); err == nil { + dirs[entry.Name()] = struct{}{} + } + } + + var paths []string + for _, entry := range entries { + name := entry.Name() + if entry.IsDir() { + candidate := filepath.Join(stateDir, name, "events.jsonl") + if _, err := os.Stat(candidate); err == nil { + paths = append(paths, candidate) + } + continue + } + if stem, ok := strings.CutSuffix(name, ".jsonl"); ok { + if _, dup := dirs[stem]; dup { + continue + } + paths = append(paths, filepath.Join(stateDir, name)) + } + } + return paths +} + +func (s copilotSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + stateDir := filepath.Join(root, copilotStateDir) + roots = append(roots, WatchRoot{ + Path: stateDir, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "workspace.yaml"}, + DebounceKey: string(AgentCopilot) + ":state:" + stateDir, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s copilotSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + source, ok := s.sourceForChangedPath(root, req) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s copilotSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a Copilot session file by UUID under root. It checks +// the directory layout (/events.jsonl) first, then the bare layout +// (.jsonl), so the richer directory form takes precedence. Returns "" for +// invalid IDs or when no file resolves. +func (s copilotSourceSet) findSourceFile(root, rawID string) string { + if root == "" || !IsValidSessionID(rawID) { + return "" + } + + stateDir := filepath.Join(root, copilotStateDir) + + dirFmt := filepath.Join(stateDir, rawID, "events.jsonl") + if _, err := os.Stat(dirFmt); err == nil { + return dirFmt + } + + bare := filepath.Join(stateDir, rawID+".jsonl") + if _, err := os.Stat(bare); err == nil { + return bare + } + + return "" +} + +func (s copilotSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("copilot source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + size := info.Size() + mtime := info.ModTime().UnixNano() + if workspace := copilotWorkspacePath(path); workspace != "" { + if wsInfo, err := os.Stat(workspace); err == nil { + size += wsInfo.Size() + if wsMtime := wsInfo.ModTime().UnixNano(); wsMtime > mtime { + mtime = wsMtime + } + } + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: size, + MTimeNS: mtime, + } + h := sha256.New() + if err := addCopilotFingerprintPart(h, "events", path, info); err != nil { + return SourceFingerprint{}, err + } + if workspace := copilotWorkspacePath(path); workspace != "" { + if wsInfo, err := os.Stat(workspace); err == nil && !wsInfo.IsDir() { + if err := addCopilotFingerprintPart(h, "workspace", workspace, wsInfo); err != nil { + return SourceFingerprint{}, err + } + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s copilotSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case copilotSource: + return src.Path, src.Path != "" + case *copilotSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(copilotSource) + return src.Path, true + } + } + } + return "", false +} + +func (s copilotSourceSet) sourceForChangedPath( + root string, + req ChangedPathRequest, +) (SourceRef, bool) { + path := req.Path + if filepath.Base(path) == "workspace.yaml" { + return s.sourceRef(root, filepath.Join(filepath.Dir(path), "events.jsonl")) + } + if source, ok := s.sourceRef(root, path); ok { + return source, true + } + if !jsonlMissingPathFallbackAllowed(req) { + return SourceRef{}, false + } + if filepath.Base(path) == "events.jsonl" { + barePath := filepath.Join( + root, + copilotStateDir, + filepath.Base(filepath.Dir(path))+".jsonl", + ) + if source, ok := s.sourceRef(root, barePath); ok { + return source, true + } + } + return s.sourceRefForPath(root, path, false) +} + +func (s copilotSourceSet) sourceRef(root, path string) (SourceRef, bool) { + return s.sourceRefForPath(root, path, true) +} + +func (s copilotSourceSet) sourceRefForPath( + root, path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || (requireRegular && !IsRegularFile(path)) { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 3 && + parts[0] == copilotStateDir && + parts[2] == "events.jsonl" { + return s.newSourceRef(root, path), true + } + if len(parts) == 2 && + parts[0] == copilotStateDir && + strings.HasSuffix(parts[1], ".jsonl") { + stem := strings.TrimSuffix(parts[1], ".jsonl") + if dirPath := s.findSourceFile(root, stem); dirPath != "" && + dirPath != path { + return s.sourceRef(root, dirPath) + } + return s.newSourceRef(root, path), true + } + return SourceRef{}, false +} + +func (s copilotSourceSet) newSourceRef(root, path string) SourceRef { + return SourceRef{ + Provider: AgentCopilot, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: copilotSource{ + Root: root, + Path: path, + }, + } +} + +func copilotWorkspacePath(eventsPath string) string { + if filepath.Base(eventsPath) != "events.jsonl" { + return "" + } + return filepath.Join(filepath.Dir(eventsPath), "workspace.yaml") +} + +func addCopilotFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func copilotProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index 26a6d2f82..c5dc9cc72 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "strings" @@ -10,6 +11,60 @@ import ( "github.com/stretchr/testify/require" ) +// newCopilotTestProvider builds a concrete copilotProvider for the given roots +// so package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods, replacing the removed +// package-level entrypoints. +func newCopilotTestProvider(t *testing.T, roots ...string) *copilotProvider { + t.Helper() + provider, ok := NewProvider(AgentCopilot, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + cp, ok := provider.(*copilotProvider) + require.True(t, ok) + return cp +} + +// parseCopilotTestSession parses a Copilot JSONL session file at path through +// the provider-owned parse method, replacing the removed package-level +// ParseCopilotSession entrypoint. +func parseCopilotTestSession( + t *testing.T, path, machine string, +) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { + t.Helper() + return newCopilotTestProvider(t).parseSession(path, machine) +} + +// discoverCopilotTestSessions discovers Copilot sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path) the tests assert +// against. +func discoverCopilotTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newCopilotTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Agent: AgentCopilot, + }) + } + return files +} + +// findCopilotTestSourceFile resolves a Copilot session ID to a session file +// path through the provider, replacing the removed FindCopilotSourceFile. +func findCopilotTestSourceFile(t *testing.T, root, rawID string) string { + t.Helper() + return newCopilotTestProvider(t, root).sources.findSourceFile(root, rawID) +} + // writeCopilotJSONL writes JSONL lines to a temp file and // returns the file path. func writeCopilotJSONL( @@ -28,7 +83,7 @@ func writeCopilotJSONL( // parseAndValidateHelper parses the session and fails the test on basic errors. func parseAndValidateHelper(t *testing.T, path string, machine string, wantMsgs int) (*ParsedSession, []ParsedMessage) { t.Helper() - sess, msgs, _, err := ParseCopilotSession(path, machine) + sess, msgs, _, err := parseCopilotTestSession(t, path, machine) require.NoError(t, err) require.NotNil(t, sess, "expected non-nil session") require.Len(t, msgs, wantMsgs) @@ -349,7 +404,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { `{"type":"session.start","data":{"sessionId":"empty"},"timestamp":"2025-01-15T10:00:00Z"}`, ) - sess, msgs, _, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := parseCopilotTestSession(t, path, "m") require.NoError(t, err) assert.Nil(t, sess, "expected nil session for empty") assert.Nil(t, msgs, "expected nil messages for empty") @@ -358,7 +413,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { func TestParseCopilotSession_NonexistentFile(t *testing.T) { path := filepath.Join(t.TempDir(), "nonexistent.jsonl") - sess, msgs, _, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := parseCopilotTestSession(t, path, "m") require.NoError(t, err, "expected nil error") assert.Nil(t, sess, "expected nil session for nonexistent file") assert.Nil(t, msgs, "expected nil messages for nonexistent file") @@ -588,7 +643,7 @@ func parseCopilotFull( t *testing.T, path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent) { t.Helper() - sess, msgs, usage, err := ParseCopilotSession(path, machine) + sess, msgs, usage, err := parseCopilotTestSession(t, path, machine) require.NoError(t, err) return sess, msgs, usage } diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index 09cdc1495..77484729e 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -23,6 +23,11 @@ var uuidRe = regexp.MustCompile( `[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})$`, ) +const ( + copilotStateDir = "session-state" + geminiChatsDir = "chats" +) + // isDirOrSymlink reports whether the entry is a directory or a // symlink that resolves to a directory. parentDir is needed to // build the full path for symlink resolution. @@ -524,6 +529,10 @@ func ClaudeProjectSessionFiles(projectsDir string) []DiscoveredFile { // DiscoverCodexSessions finds all Codex JSONL session files under // either the standard year/month/day layout or a flat archived dir. +// +// Local Codex discovery is owned by the Codex provider source set; this entry +// is retained as the s3:// discovery path (via discoverCodexS3), which the +// legacy S3 sync path consumes until S3 support folds into the source sets. func DiscoverCodexSessions(sessionsDir string) []DiscoveredFile { if strings.HasPrefix(sessionsDir, "s3://") { return discoverCodexS3(sessionsDir) @@ -645,62 +654,6 @@ func claudeFindSourceFile( return "" } -// FindCodexSourceFile finds a Codex session file by UUID. -// Prefers the standard year/month/day live path when present, -// then falls back to a flat archived dir entry. -func FindCodexSourceFile(sessionsDir, sessionID string) string { - if !IsValidSessionID(sessionID) { - return "" - } - - var archived string - entries, err := os.ReadDir(sessionsDir) - if err == nil { - for _, f := range entries { - if f.IsDir() { - continue - } - name := f.Name() - if !isCodexSessionFilename(name) { - continue - } - if extractUUIDFromRollout(name) == sessionID { - archived = filepath.Join(sessionsDir, name) - break - } - } - } - - var live string - walkCodexDayDirs(sessionsDir, func(dayPath string) bool { - if live != "" { - return false - } - entries, err := os.ReadDir(dayPath) - if err != nil { - return true - } - for _, f := range entries { - if f.IsDir() { - continue - } - name := f.Name() - if !isCodexSessionFilename(name) { - continue - } - if extractUUIDFromRollout(name) == sessionID { - live = filepath.Join(dayPath, name) - return false - } - } - return true - }) - if live != "" { - return live - } - return archived -} - func isCodexSessionFilename(name string) bool { return strings.HasPrefix(name, "rollout-") && strings.HasSuffix(name, ".jsonl") @@ -883,117 +836,6 @@ func isGeminiSessionFilename(name string) bool { strings.HasSuffix(name, ".jsonl")) } -// DiscoverGeminiSessions finds all Gemini session files under -// the Gemini directory (~/.gemini/tmp/*/chats/session-*). -func DiscoverGeminiSessions( - geminiDir string, -) []DiscoveredFile { - if geminiDir == "" { - return nil - } - - tmpDir := filepath.Join(geminiDir, "tmp") - hashDirs, err := os.ReadDir(tmpDir) - if err != nil { - return nil - } - - projectMap := BuildGeminiProjectMap(geminiDir) - - var files []DiscoveredFile - for _, hd := range hashDirs { - if !isDirOrSymlink(hd, tmpDir) { - continue - } - hash := hd.Name() - chatsDir := filepath.Join(tmpDir, hash, "chats") - entries, err := os.ReadDir(chatsDir) - if err != nil { - continue - } - - project := ResolveGeminiProject(hash, projectMap) - - for _, sf := range entries { - if sf.IsDir() { - continue - } - name := sf.Name() - if !isGeminiSessionFilename(name) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(chatsDir, name), - Project: project, - Agent: AgentGemini, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindGeminiSourceFile locates a Gemini session file by its -// session UUID. Searches all project hash directories. -func FindGeminiSourceFile( - geminiDir, sessionID string, -) string { - if geminiDir == "" || !IsValidSessionID(sessionID) || - len(sessionID) < 8 { - return "" - } - - tmpDir := filepath.Join(geminiDir, "tmp") - hashDirs, err := os.ReadDir(tmpDir) - if err != nil { - return "" - } - - for _, hd := range hashDirs { - if !isDirOrSymlink(hd, tmpDir) { - continue - } - chatsDir := filepath.Join(tmpDir, hd.Name(), "chats") - entries, err := os.ReadDir(chatsDir) - if err != nil { - continue - } - for _, sf := range entries { - if sf.IsDir() { - continue - } - name := sf.Name() - if !isGeminiSessionFilename(name) { - continue - } - if strings.Contains(name, sessionID[:8]) { - path := filepath.Join(chatsDir, name) - if confirmGeminiSessionID( - path, sessionID, - ) { - return path - } - } - } - } - return "" -} - -// confirmGeminiSessionID reads the sessionId field from a -// Gemini file to confirm it matches the expected ID. -func confirmGeminiSessionID( - path, sessionID string, -) bool { - data, err := os.ReadFile(path) - if err != nil { - return false - } - return GeminiSessionID(data) == sessionID -} - // geminiProjectsFile holds the structure of // ~/.gemini/projects.json. type geminiProjectsFile struct { @@ -1109,92 +951,6 @@ func ResolveGeminiProject( return NormalizeName(dirName) } -// DiscoverCopilotSessions finds all JSONL files under -// /session-state/. Supports both bare format -// (.jsonl) and directory format (/events.jsonl). -func DiscoverCopilotSessions( - copilotDir string, -) []DiscoveredFile { - if copilotDir == "" { - return nil - } - - stateDir := filepath.Join(copilotDir, "session-state") - entries, err := os.ReadDir(stateDir) - if err != nil { - return nil - } - - dirs := make(map[string]struct{}) - for _, entry := range entries { - if !entry.IsDir() { - continue - } - eventsPath := filepath.Join( - stateDir, entry.Name(), "events.jsonl", - ) - if _, err := os.Stat(eventsPath); err == nil { - dirs[entry.Name()] = struct{}{} - } - } - - var files []DiscoveredFile - for _, entry := range entries { - name := entry.Name() - if entry.IsDir() { - candidate := filepath.Join( - stateDir, name, "events.jsonl", - ) - if _, err := os.Stat(candidate); err == nil { - files = append(files, DiscoveredFile{ - Path: candidate, - Agent: AgentCopilot, - }) - } - continue - } - if stem, ok := strings.CutSuffix(name, ".jsonl"); ok { - if _, dup := dirs[stem]; dup { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(stateDir, name), - Agent: AgentCopilot, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindCopilotSourceFile locates a Copilot session file by -// UUID. Checks both bare (.jsonl) and directory -// (/events.jsonl) layouts. -func FindCopilotSourceFile( - copilotDir, rawID string, -) string { - if copilotDir == "" || !IsValidSessionID(rawID) { - return "" - } - - stateDir := filepath.Join(copilotDir, "session-state") - - dirFmt := filepath.Join(stateDir, rawID, "events.jsonl") - if _, err := os.Stat(dirFmt); err == nil { - return dirFmt - } - - bare := filepath.Join(stateDir, rawID+".jsonl") - if _, err := os.Stat(bare); err == nil { - return bare - } - - return "" -} - // IsPiSessionFile reads the first non-blank line of path and returns true // when the JSON type field equals "session". The scanner buffer grows up to // 64 MiB to match parser.maxLineSize. Leading blank lines are skipped to @@ -1249,82 +1005,11 @@ func isContainedIn(child, root string) bool { !strings.HasPrefix(rel, ".."+string(filepath.Separator)) } -// DiscoverVSCodeCopilotSessions traverses the VSCode -// workspaceStorage directory to find chatSessions/*.json -// and *.jsonl files. When both formats exist for the same -// session UUID, the .jsonl file takes priority. -// It also checks globalStorage/emptyWindowChatSessions. -// The vscodeUserDir should point to e.g. -// -// ~/Library/Application Support/Code/User (macOS) -// ~/.config/Code/User (Linux) -func DiscoverVSCodeCopilotSessions( - vscodeUserDir string, -) []DiscoveredFile { - if vscodeUserDir == "" { - return nil - } - - var files []DiscoveredFile - - // 1. Scan workspaceStorage//chatSessions/*.{json,jsonl} - wsDir := filepath.Join(vscodeUserDir, "workspaceStorage") - hashDirs, err := os.ReadDir(wsDir) - if err == nil { - for _, entry := range hashDirs { - if !entry.IsDir() { - continue - } - - hashPath := filepath.Join(wsDir, entry.Name()) - chatDir := filepath.Join(hashPath, "chatSessions") - sessionFiles, err := os.ReadDir(chatDir) - if err != nil { - continue - } - - // Read workspace.json to get project name - project := ReadVSCodeWorkspaceManifest(hashPath) - if project == "" { - project = "unknown" - } - - files = append(files, - discoverVSCodeSessionFiles( - chatDir, sessionFiles, project, - )..., - ) - } - } - - // 2. Scan globalStorage/emptyWindowChatSessions/*.{json,jsonl} - for _, subdir := range []string{ - "globalStorage/emptyWindowChatSessions", - "globalStorage/transferredChatSessions", - } { - globalDir := filepath.Join(vscodeUserDir, subdir) - globalFiles, err := os.ReadDir(globalDir) - if err != nil { - continue - } - files = append(files, - discoverVSCodeSessionFiles( - globalDir, globalFiles, "empty-window", - )..., - ) - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - // discoverVSCodeSessionFiles collects .json and .jsonl // session files from a directory, preferring .jsonl when // both exist for the same UUID. func discoverVSCodeSessionFiles( - dir string, entries []os.DirEntry, project string, + dir string, entries []os.DirEntry, project string, agent AgentType, ) []DiscoveredFile { // Collect UUIDs that have .jsonl files hasJSONL := make(map[string]bool) @@ -1350,7 +1035,7 @@ func discoverVSCodeSessionFiles( files = append(files, DiscoveredFile{ Path: filepath.Join(dir, name), Project: project, - Agent: AgentVSCodeCopilot, + Agent: agent, }) } else if uuid, ok := strings.CutSuffix(name, ".json"); ok { // Skip .json if a .jsonl exists for the same UUID @@ -1360,77 +1045,13 @@ func discoverVSCodeSessionFiles( files = append(files, DiscoveredFile{ Path: filepath.Join(dir, name), Project: project, - Agent: AgentVSCodeCopilot, + Agent: agent, }) } } return files } -// FindVSCodeCopilotSourceFile locates a VSCode Copilot -// session file by UUID (.jsonl preferred over .json). -func FindVSCodeCopilotSourceFile( - vscodeUserDir, rawID string, -) string { - if vscodeUserDir == "" || !IsValidSessionID(rawID) { - return "" - } - - // Search through workspaceStorage - wsDir := filepath.Join(vscodeUserDir, "workspaceStorage") - hashDirs, err := os.ReadDir(wsDir) - if err == nil { - for _, entry := range hashDirs { - if !entry.IsDir() { - continue - } - base := filepath.Join( - wsDir, entry.Name(), "chatSessions", - ) - // Prefer .jsonl - for _, ext := range []string{".jsonl", ".json"} { - candidate := filepath.Join( - base, rawID+ext, - ) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - } - } - - // Check global dirs - for _, subdir := range []string{ - "globalStorage/emptyWindowChatSessions", - "globalStorage/transferredChatSessions", - } { - base := filepath.Join(vscodeUserDir, subdir) - for _, ext := range []string{".jsonl", ".json"} { - candidate := filepath.Join(base, rawID+ext) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - } - - return "" -} - -// DiscoverVisualStudioCopilotSessions finds Visual Studio Copilot -// trace files under the configured traces directory. -func DiscoverVisualStudioCopilotSessions(vsRoot string) []DiscoveredFile { - if vsRoot == "" { - return nil - } - entries, err := os.ReadDir(vsRoot) - if err != nil { - return nil - } - files := discoverVisualStudioCopilotSessionFiles(vsRoot, entries) - sort.Slice(files, func(i, j int) bool { return files[i].Path < files[j].Path }) - return files -} - // discoverVisualStudioCopilotSessionFiles emits one work item per conversation // found across the trace files in a directory. A single physical trace file // can hold spans for several conversations, and one conversation can be split @@ -1490,15 +1111,6 @@ func discoverVisualStudioCopilotSessionFiles( return files } -// FindVisualStudioCopilotSourceFile locates a Visual Studio Copilot -// trace file by conversation UUID. -func FindVisualStudioCopilotSourceFile(vsRoot, rawID string) string { - if vsRoot == "" || !IsValidSessionID(rawID) { - return "" - } - return findVisualStudioCopilotTraceSourceFile(vsRoot, rawID) -} - func findVisualStudioCopilotTraceSourceFile( dir, rawID string, ) string { diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index b44bb3c4b..3170a9b20 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -11,11 +11,6 @@ import ( "github.com/stretchr/testify/require" ) -const ( - copilotStateDir = "session-state" - geminiChatsDir = "chats" -) - // setupFileSystem creates a temporary directory and populates // it with the given relative file paths and contents. func setupFileSystem(t *testing.T, dir string, files map[string]string) { @@ -199,7 +194,7 @@ func TestDiscoverCodexSessions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCodexSessions(dir) + files := discoverCodexTestSessions(t, dir) assertDiscoveredFiles(t, files, tt.wantFiles, AgentCodex) }) } @@ -439,7 +434,7 @@ func TestFindCodexSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindCodexSourceFile(dir, tt.targetID) + got := findCodexTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -583,7 +578,7 @@ func TestDiscoverGeminiSessions(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverGeminiSessions(dir) + files := discoverGeminiTestSessions(t, dir) require.Len(t, files, len(tt.wantFiles), "files count") @@ -603,17 +598,17 @@ func TestDiscoverGeminiSessions(t *testing.T) { t.Run("EmptyChatDir", func(t *testing.T) { dir := t.TempDir() require.NoError(t, os.MkdirAll(filepath.Join(dir, "tmp", "hash1", geminiChatsDir), 0o755), "mkdir") - files := DiscoverGeminiSessions(dir) + files := discoverGeminiTestSessions(t, dir) assert.Nil(t, files, "expected nil") }) t.Run("Nonexistent", func(t *testing.T) { - files := DiscoverGeminiSessions(filepath.Join(t.TempDir(), "does-not-exist")) + files := discoverGeminiTestSessions(t, filepath.Join(t.TempDir(), "does-not-exist")) assert.Nil(t, files, "expected nil") }) t.Run("EmptyDir", func(t *testing.T) { - files := DiscoverGeminiSessions("") + files := discoverGeminiTestSessions(t, "") assert.Nil(t, files, "expected nil") }) } @@ -656,7 +651,7 @@ func TestFindGeminiSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindGeminiSourceFile(dir, tt.targetID) + got := findGeminiTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -669,13 +664,13 @@ func TestFindGeminiSourceFile(t *testing.T) { t.Run("ShortID", func(t *testing.T) { dir := t.TempDir() for _, id := range []string{"", "a", "abc", "1234567"} { - got := FindGeminiSourceFile(dir, id) + got := findGeminiTestSourceFile(t, dir, id) assert.Emptyf(t, got, "FindGeminiSourceFile(%q)", id) } }) t.Run("EmptyDir", func(t *testing.T) { - got := FindGeminiSourceFile("", "b0a4eadd-cb99-4165-94d9-64cad5a66d24") + got := findGeminiTestSourceFile(t, "", "b0a4eadd-cb99-4165-94d9-64cad5a66d24") assert.Empty(t, got, "expected empty") }) } @@ -893,7 +888,7 @@ func TestDiscoverCopilotSessions(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCopilotSessions(dir) + files := discoverCopilotTestSessions(t, dir) require.Len(t, files, len(tt.wantFiles), "files count") @@ -911,12 +906,12 @@ func TestDiscoverCopilotSessions(t *testing.T) { } t.Run("EmptyDir", func(t *testing.T) { - files := DiscoverCopilotSessions("") + files := discoverCopilotTestSessions(t, "") assert.Nil(t, files, "expected nil") }) t.Run("Nonexistent", func(t *testing.T) { - files := DiscoverCopilotSessions(filepath.Join(t.TempDir(), "does-not-exist")) + files := discoverCopilotTestSessions(t, filepath.Join(t.TempDir(), "does-not-exist")) assert.Nil(t, files, "expected nil") }) } @@ -962,7 +957,7 @@ func TestFindCopilotSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindCopilotSourceFile(dir, tt.targetID) + got := findCopilotTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -975,13 +970,13 @@ func TestFindCopilotSourceFile(t *testing.T) { t.Run("InvalidID", func(t *testing.T) { dir := t.TempDir() for _, id := range []string{"", "../etc/passwd", "a/b", "a b"} { - got := FindCopilotSourceFile(dir, id) + got := findCopilotTestSourceFile(t, dir, id) assert.Emptyf(t, got, "FindCopilotSourceFile(%q)", id) } }) t.Run("EmptyDir", func(t *testing.T) { - got := FindCopilotSourceFile("", "abc-123") + got := findCopilotTestSourceFile(t, "", "abc-123") assert.Empty(t, got, "expected empty") }) } diff --git a/internal/parser/gemini.go b/internal/parser/gemini.go index 432fad42b..60bb98519 100644 --- a/internal/parser/gemini.go +++ b/internal/parser/gemini.go @@ -53,10 +53,12 @@ func normalizedGeminiTokenUsage(tok geminiTokens) json.RawMessage { return raw } -// ParseGeminiSession parses a Gemini CLI session JSON file. -// Unlike Claude/Codex JSONL, each Gemini file is a single JSON -// document containing all messages. -func ParseGeminiSession( +// parseSession parses a Gemini CLI session JSON file into the session and +// messages the provider consumes. Unlike Claude/Codex JSONL, each Gemini file +// is a single JSON document containing all messages. This is the provider-owned +// parse entrypoint; the package-level free function was folded onto the +// provider. +func (p *geminiProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/gemini_copilot_provider_test.go b/internal/parser/gemini_copilot_provider_test.go new file mode 100644 index 000000000..c2d9d1344 --- /dev/null +++ b/internal/parser/gemini_copilot_provider_test.go @@ -0,0 +1,321 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestGeminiCopilotProviderFactoriesReplaceLegacyAdapter(t *testing.T) { + for _, agent := range []AgentType{AgentGemini, AgentCopilot} { + t.Run(string(agent), func(t *testing.T) { + factory, ok := ProviderFactoryByType(agent) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(agent, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) + }) + } +} + +func TestGeminiProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-provider" + sourcePath := filepath.Join( + root, + "tmp", + "my-project", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-provider.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "my-project", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, filepath.Join(root, "tmp"), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"session-*.json", "session-*.jsonl"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, root, plan.Roots[1].Path) + assert.False(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"projects.json", "trustedFolders.json"}, plan.Roots[1].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Equal(t, "my_project", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write", WatchRoot: filepath.Join(root, "tmp")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: filepath.Join(root, "tmp")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "my_project", changed[0].ProjectHint) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.Error(t, err) + require.Empty(t, fingerprint) +} + +func TestGeminiProviderProjectMetadataChangesClassifyAndFingerprint(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-project-metadata" + projectsPath := filepath.Join(root, "projects.json") + writeSourceFile(t, projectsPath, `{"projects":{"/Users/alice/code/one":"alias"}}`) + sourcePath := filepath.Join( + root, + "tmp", + "alias", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-project-metadata.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "alias", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, "one", found.ProjectHint) + + fingerprintOne, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + + writeSourceFile(t, projectsPath, `{"projects":{"/Users/alice/code/two":"alias"}}`) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: projectsPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "two", changed[0].ProjectHint) + + fingerprintTwo, err := provider.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + assert.NotEqual(t, fingerprintOne.Hash, fingerprintTwo.Hash) +} + +func TestGeminiProviderParse(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-provider" + sourcePath := filepath.Join( + root, + "tmp", + "my-project", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-provider.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "my-project", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "gemini:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentGemini, result.Result.Session.Agent) + assert.Equal(t, "my_project", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCopilotProviderSourceMethods(t *testing.T) { + root := t.TempDir() + barePath := filepath.Join(root, copilotStateDir, "copilot-provider.jsonl") + dirEvents := filepath.Join(root, copilotStateDir, "copilot-provider", "events.jsonl") + workspacePath := filepath.Join(root, copilotStateDir, "copilot-provider", "workspace.yaml") + content := strings.Join([]string{ + `{"type":"session.start","data":{"sessionId":"copilot-provider","context":{"cwd":"/home/user/code/copilot-app","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"hello copilot"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"hi"},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"gpt-5":{"usage":{"inputTokens":100,"outputTokens":20,"cacheReadTokens":30,"cacheWriteTokens":10,"reasoningTokens":5}}}},"timestamp":"2025-01-15T10:00:03Z"}`, + }, "\n") + "\n" + writeSourceFile(t, barePath, content) + writeSourceFile(t, dirEvents, content) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + + provider, ok := NewProvider(AgentCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, filepath.Join(root, copilotStateDir), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl", "workspace.yaml"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, dirEvents, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "copilot-provider", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, dirEvents, found.DisplayPath) + + for _, path := range []string{dirEvents, workspacePath} { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, dirEvents, changed[0].DisplayPath) + } + + require.NoError(t, os.Remove(dirEvents)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: dirEvents, EventKind: "remove", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, barePath, changed[0].DisplayPath) + writeSourceFile(t, dirEvents, content) + + require.NoError(t, os.Remove(workspacePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: workspacePath, EventKind: "remove", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, dirEvents, changed[0].DisplayPath) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, dirEvents, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + writeSourceFile(t, workspacePath, "name: Workspace other\n") + renamedFingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.NotEqual(t, fingerprint.Hash, renamedFingerprint.Hash) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + fingerprint, err = provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "copilot:copilot-provider", result.Result.Session.ID) + assert.Equal(t, AgentCopilot, result.Result.Session.Agent) + assert.Equal(t, "copilot_app", result.Result.Session.Project) + assert.Equal(t, "Workspace title", result.Result.Session.FirstMessage) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, fingerprint.Size, result.Result.Session.File.Size) + assert.Equal(t, fingerprint.MTimeNS, result.Result.Session.File.Mtime) + assert.Len(t, result.Result.Messages, 2) + require.Len(t, result.Result.UsageEvents, 1) + assert.Equal(t, "gpt-5", result.Result.UsageEvents[0].Model) +} diff --git a/internal/parser/gemini_parser_test.go b/internal/parser/gemini_parser_test.go index 547f80d86..5178f4db4 100644 --- a/internal/parser/gemini_parser_test.go +++ b/internal/parser/gemini_parser_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "strings" "testing" "time" @@ -11,10 +12,65 @@ import ( "go.kenn.io/agentsview/internal/testjsonl" ) +// newGeminiTestProvider builds a concrete geminiProvider for the given roots so +// package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods, replacing the removed +// package-level entrypoints. +func newGeminiTestProvider(t *testing.T, roots ...string) *geminiProvider { + t.Helper() + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + gp, ok := provider.(*geminiProvider) + require.True(t, ok) + return gp +} + +// parseGeminiTestSession parses a Gemini session file at path through the +// provider-owned parse method, replacing the removed package-level +// ParseGeminiSession entrypoint. +func parseGeminiTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newGeminiTestProvider(t).parseSession(path, project, machine) +} + +// discoverGeminiTestSessions discovers Gemini sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path + project) the +// tests assert against. +func discoverGeminiTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newGeminiTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Project: source.ProjectHint, + Agent: AgentGemini, + }) + } + return files +} + +// findGeminiTestSourceFile resolves a Gemini session ID to a session file path +// through the provider, replacing the removed FindGeminiSourceFile. +func findGeminiTestSourceFile(t *testing.T, root, sessionID string) string { + t.Helper() + return newGeminiTestProvider(t, root).sources.findSourceFile(root, sessionID) +} + func runGeminiParserTest(t *testing.T, content string) (*ParsedSession, []ParsedMessage) { t.Helper() path := createTestFile(t, "session.json", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) return sess, msgs } @@ -45,7 +101,7 @@ func TestParseGeminiSession_JSONLStream(t *testing.T) { `{"$set":{"lastUpdated":"2026-04-23T16:12:50.158Z"}}`, }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -74,7 +130,7 @@ func TestParseGeminiSession_JSONLStreamLargeRecord(t *testing.T) { `{"id":"u1","timestamp":"2026-04-23T16:12:43.085Z","type":"user","content":[{"text":"` + largeContent + `"}]}`, }, "\n") path := createTestFile(t, "large-session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -91,7 +147,7 @@ func TestParseGeminiSession_JSONLStreamTolerantOfPartialLines(t *testing.T) { `{"id":"a1","timestamp":"2026-04-23T16:12:50.158Z","type":"gemini","content":"reply"`, }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -108,7 +164,7 @@ func TestParseGeminiSession_JSONLStreamTolerantOfPartialLines(t *testing.T) { "", }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -452,12 +508,12 @@ func TestParseGeminiSession_EdgeCases(t *testing.T) { t.Run("malformed JSON", func(t *testing.T) { path := createTestFile(t, "session.json", "not valid json {{{") - _, _, err := ParseGeminiSession(path, "my_project", "local") + _, _, err := parseGeminiTestSession(t, path, "my_project", "local") assert.Error(t, err) }) t.Run("missing file", func(t *testing.T) { - _, _, err := ParseGeminiSession("/nonexistent.json", "my_project", "local") + _, _, err := parseGeminiTestSession(t, "/nonexistent.json", "my_project", "local") assert.Error(t, err) }) @@ -500,7 +556,7 @@ func TestParseGeminiSession_EdgeCases(t *testing.T) { t.Run("missing sessionId", func(t *testing.T) { content := `{"projectHash":"abc","startTime":"2024-01-01T00:00:00Z","lastUpdated":"2024-01-01T00:00:00Z","messages":[]}` path := createTestFile(t, "session.json", content) - _, _, err := ParseGeminiSession(path, "my_project", "local") + _, _, err := parseGeminiTestSession(t, path, "my_project", "local") assert.Error(t, err) }) } diff --git a/internal/parser/gemini_provider.go b/internal/parser/gemini_provider.go new file mode 100644 index 000000000..4433f4d76 --- /dev/null +++ b/internal/parser/gemini_provider.go @@ -0,0 +1,509 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*geminiProvider)(nil) + +type geminiProviderFactory struct { + def AgentDef +} + +func newGeminiProviderFactory(def AgentDef) ProviderFactory { + return geminiProviderFactory{def: cloneAgentDef(def)} +} + +func (f geminiProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f geminiProviderFactory) Capabilities() Capabilities { + return geminiProviderCapabilities() +} + +func (f geminiProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &geminiProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: geminiProviderCapabilities(), + Config: cfg, + }, + sources: newGeminiSourceSet(cfg.Roots), + } +} + +type geminiProvider struct { + ProviderBase + sources geminiSourceSet +} + +func (p *geminiProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *geminiProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *geminiProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *geminiProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *geminiProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *geminiProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("gemini source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type geminiSource struct { + Root string + Path string +} + +type geminiSourceSet struct { + roots []string +} + +func newGeminiSourceSet(roots []string) geminiSourceSet { + return geminiSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s geminiSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + rootSources, err := s.discoverRoot(ctx, root) + if err != nil { + return nil, err + } + for _, source := range rootSources { + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s geminiSourceSet) discoverRoot( + ctx context.Context, + root string, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + sources := make([]SourceRef, 0) + seen := make(map[string]struct{}) + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Gemini session file paths under the Gemini +// directory (/tmp//chats/session-*.json[l]). is either a +// SHA-256 project hash (old layout) or a project name (new layout); symlinked +// hash directories are followed (matching the watcher). Project resolution is +// applied by sourceRef via BuildGeminiProjectMap/ResolveGeminiProject, so this +// helper only enumerates source paths. +func (s geminiSourceSet) discoverSessionPaths(root string) []string { + if root == "" { + return nil + } + + tmpDir := filepath.Join(root, "tmp") + hashDirs, err := os.ReadDir(tmpDir) + if err != nil { + return nil + } + + var paths []string + for _, hd := range hashDirs { + if !isDirOrSymlink(hd, tmpDir) { + continue + } + chatsDir := filepath.Join(tmpDir, hd.Name(), geminiChatsDir) + entries, err := os.ReadDir(chatsDir) + if err != nil { + continue + } + for _, sf := range entries { + if sf.IsDir() { + continue + } + name := sf.Name() + if !isGeminiSessionFilename(name) { + continue + } + paths = append(paths, filepath.Join(chatsDir, name)) + } + } + return paths +} + +func (s geminiSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + tmp := filepath.Join(root, "tmp") + roots = append(roots, WatchRoot{ + Path: tmp, + Recursive: true, + IncludeGlobs: []string{"session-*.json", "session-*.jsonl"}, + DebounceKey: string(AgentGemini) + ":tmp:" + tmp, + }) + roots = append(roots, WatchRoot{ + Path: root, + Recursive: false, + IncludeGlobs: []string{"projects.json", "trustedFolders.json"}, + DebounceKey: string(AgentGemini) + ":projects:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s geminiSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + if geminiProjectMetadataPath(root, req.Path) { + return s.discoverRoot(ctx, root) + } + source, ok := s.sourceRef(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + if jsonlMissingPathFallbackAllowed(req) { + source, ok = s.sourceRefForPath(root, req.Path, false) + if ok { + return []SourceRef{source}, nil + } + } + } + return nil, nil +} + +func (s geminiSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a Gemini session file by its session UUID under root, +// searching all project hash directories. The session filename embeds the first +// eight characters of the UUID, so candidates are pre-filtered on that prefix +// before confirming the recorded sessionId matches. +func (s geminiSourceSet) findSourceFile(root, sessionID string) string { + if root == "" || !IsValidSessionID(sessionID) || + len(sessionID) < 8 { + return "" + } + + tmpDir := filepath.Join(root, "tmp") + hashDirs, err := os.ReadDir(tmpDir) + if err != nil { + return "" + } + + for _, hd := range hashDirs { + if !isDirOrSymlink(hd, tmpDir) { + continue + } + chatsDir := filepath.Join(tmpDir, hd.Name(), geminiChatsDir) + entries, err := os.ReadDir(chatsDir) + if err != nil { + continue + } + for _, sf := range entries { + if sf.IsDir() { + continue + } + name := sf.Name() + if !isGeminiSessionFilename(name) { + continue + } + if strings.Contains(name, sessionID[:8]) { + path := filepath.Join(chatsDir, name) + if confirmGeminiSessionID(path, sessionID) { + return path + } + } + } + } + return "" +} + +// confirmGeminiSessionID reads the sessionId field from a Gemini file to +// confirm it matches the expected ID. +func confirmGeminiSessionID(path, sessionID string) bool { + data, err := os.ReadFile(path) + if err != nil { + return false + } + return GeminiSessionID(data) == sessionID +} + +func (s geminiSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + root, path, ok := s.rootPathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("gemini source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + h := sha256.New() + if err := addGeminiFingerprintPart(h, "session", path, info); err != nil { + return SourceFingerprint{}, err + } + for _, metadataPath := range geminiProjectMetadataPaths(root) { + metadataInfo, err := os.Stat(metadataPath) + if err != nil || metadataInfo.IsDir() { + continue + } + fingerprint.Size += metadataInfo.Size() + if mtime := metadataInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if err := addGeminiFingerprintPart(h, "project", metadataPath, metadataInfo); err != nil { + return SourceFingerprint{}, err + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s geminiSourceSet) pathFromSource(source SourceRef) (string, bool) { + _, path, ok := s.rootPathFromSource(source) + return path, ok +} + +func (s geminiSourceSet) rootPathFromSource(source SourceRef) (string, string, bool) { + switch src := source.Opaque.(type) { + case geminiSource: + return src.Root, src.Path, src.Path != "" + case *geminiSource: + if src != nil && src.Path != "" { + return src.Root, src.Path, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(geminiSource) + return src.Root, src.Path, true + } + } + } + return "", "", false +} + +func (s geminiSourceSet) sourceRef(root, path string) (SourceRef, bool) { + return s.sourceRefForPath(root, path, true) +} + +func (s geminiSourceSet) sourceRefForPath( + root, path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || (requireRegular && !IsRegularFile(path)) { + return SourceRef{}, false + } + sepParts := strings.Split(filepath.ToSlash(rel), "/") + if len(sepParts) != 4 || + sepParts[0] != "tmp" || + sepParts[2] != geminiChatsDir || + !isGeminiSessionFilename(sepParts[3]) { + return SourceRef{}, false + } + project := ResolveGeminiProject(sepParts[1], BuildGeminiProjectMap(root)) + return SourceRef{ + Provider: AgentGemini, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: geminiSource{ + Root: root, + Path: path, + }, + }, true +} + +func geminiProjectMetadataPaths(root string) []string { + return []string{ + filepath.Join(root, "projects.json"), + filepath.Join(root, "trustedFolders.json"), + } +} + +func geminiProjectMetadataPath(root, path string) bool { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return false + } + rel = filepath.ToSlash(rel) + return rel == "projects.json" || rel == "trustedFolders.json" +} + +func addGeminiFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func geminiProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index adff7c24a..de249d22f 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -969,7 +969,7 @@ func TestCodexUserMessageCount(t *testing.T) { ) path := createTestFile(t, "codex-umc.jsonl", content) - sess, msgs, err := ParseCodexSession(path, "local", false) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "ParseCodexSession") require.NotNil(t, sess, "session") require.Len(t, msgs, 4, "messages") @@ -983,9 +983,7 @@ func TestCodexSessionTimestampSemantics(t *testing.T) { path := createTestFile(t, "codex-ts-invalid.jsonl", content) buf := captureLog(t) - sess, msgs, err := ParseCodexSession( - path, "local", false, - ) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "ParseCodexSession") assertZeroTimestamp(t, sess.StartedAt, "StartedAt") @@ -1002,9 +1000,7 @@ func TestCodexSessionTimestampSemantics(t *testing.T) { path := createTestFile(t, "codex-ts-long-invalid.jsonl", content) buf := captureLog(t) - _, _, err := ParseCodexSession( - path, "local", false, - ) + _, _, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "ParseCodexSession") assertLogContains(t, buf, @@ -1038,9 +1034,7 @@ func TestParseCodexSessionOversizedLineSkipped(t *testing.T) { // skipping from the re-emitted-prompt dedup. content := meta + firstLine + oversizedLine + secondLine path := createTestFile(t, "oversized.jsonl", content) - sess, msgs, err := ParseCodexSession( - path, "local", false, - ) + sess, msgs, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "unexpected error") require.NotNil(t, sess, "session") require.Len(t, msgs, 2, "messages (oversized skipped)") @@ -1083,7 +1077,7 @@ func TestParseCodexSession_WorktreeBranchFallback(t *testing.T) { `{"type":"response_item","timestamp":"2024-01-01T00:00:01Z","payload":{"role":"user","content":[{"type":"input_text","text":"hello"}]}}` + "\n" path := createTestFile(t, "codex-worktree.jsonl", content) - sess, _, err := ParseCodexSession(path, "local", false) + sess, _, err := parseCodexTestSession(t, path, "local", false) require.NoError(t, err, "ParseCodexSession") require.NotNil(t, sess, "session") assert.Equal(t, "agentsview", sess.Project, "project") @@ -1230,10 +1224,10 @@ func TestGeminiUserMessageCount(t *testing.T) { ) path := createTestFile(t, "gemini-umc.json", content) - sess, msgs, err := ParseGeminiSession( - path, "my_project", "local", + sess, msgs, err := parseGeminiTestSession( + t, path, "my_project", "local", ) - require.NoError(t, err, "ParseGeminiSession") + require.NoError(t, err, "parseGeminiTestSession") require.NotNil(t, sess, "session") require.Len(t, msgs, 4, "messages") assert.Equal(t, 2, sess.UserMessageCount, "UserMessageCount") diff --git a/internal/parser/positron.go b/internal/parser/positron.go deleted file mode 100644 index 25d3739b4..000000000 --- a/internal/parser/positron.go +++ /dev/null @@ -1,148 +0,0 @@ -package parser - -import ( - "fmt" - "os" - "path/filepath" - "strings" -) - -// ParsePositronSession parses a Positron Assistant chat session -// file. The format is identical to VSCode Copilot sessions. -// Returns (nil, nil, nil) if the file is empty or contains no -// meaningful content. -func ParsePositronSession( - path, project, machine string, -) (*ParsedSession, []ParsedMessage, error) { - info, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return nil, nil, nil - } - return nil, nil, fmt.Errorf("stat %s: %w", path, err) - } - - var data []byte - if strings.HasSuffix(path, ".jsonl") { - data, err = reconstructJSONL(path) - } else { - data, err = os.ReadFile(path) - } - if err != nil { - return nil, nil, fmt.Errorf("read %s: %w", path, err) - } - if len(data) == 0 { - return nil, nil, nil - } - - // Reuse VSCode Copilot parsing logic since formats are identical - sess, msgs, err := parseVSCodeCopilotData( - data, path, project, machine, - ) - if err != nil { - return nil, nil, err - } - if sess == nil { - return nil, nil, nil - } - - // Override agent type and ID prefix for Positron - sess.Agent = AgentPositron - sess.ID = "positron:" + sess.ID - - sess.File = FileInfo{ - Path: path, - Size: info.Size(), - Mtime: info.ModTime().UnixNano(), - } - - return sess, msgs, nil -} - -// DiscoverPositronSessions finds all chat session files under the -// Positron User directory. The structure mirrors VSCode: -// /workspaceStorage//chatSessions/.json -func DiscoverPositronSessions(userDir string) []DiscoveredFile { - if userDir == "" { - return nil - } - - var files []DiscoveredFile - - // Scan workspaceStorage//chatSessions/*.{json,jsonl} - wsDir := filepath.Join(userDir, "workspaceStorage") - hashDirs, err := os.ReadDir(wsDir) - if err != nil { - return nil - } - - for _, entry := range hashDirs { - if !entry.IsDir() { - continue - } - - hashPath := filepath.Join(wsDir, entry.Name()) - chatDir := filepath.Join(hashPath, "chatSessions") - sessionFiles, err := os.ReadDir(chatDir) - if err != nil { - continue - } - - // Read workspace.json to get project name - project := ReadVSCodeWorkspaceManifest(hashPath) - if project == "" { - project = "unknown" - } - - for _, f := range sessionFiles { - if f.IsDir() { - continue - } - name := f.Name() - if !strings.HasSuffix(name, ".json") && - !strings.HasSuffix(name, ".jsonl") { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(chatDir, name), - Project: project, - Agent: AgentPositron, - }) - } - } - - return files -} - -// FindPositronSourceFile locates a Positron session file by its -// raw ID (prefix already stripped). -func FindPositronSourceFile(userDir, rawID string) string { - if userDir == "" || !IsValidSessionID(rawID) { - return "" - } - - // Search through workspaceStorage - wsDir := filepath.Join(userDir, "workspaceStorage") - hashDirs, err := os.ReadDir(wsDir) - if err != nil { - return "" - } - - for _, entry := range hashDirs { - if !entry.IsDir() { - continue - } - base := filepath.Join( - wsDir, entry.Name(), "chatSessions", - ) - // Prefer .jsonl over .json - for _, ext := range []string{".jsonl", ".json"} { - candidate := filepath.Join(base, rawID+ext) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - } - - return "" -} diff --git a/internal/parser/positron_provider.go b/internal/parser/positron_provider.go new file mode 100644 index 000000000..81f47f287 --- /dev/null +++ b/internal/parser/positron_provider.go @@ -0,0 +1,574 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*positronProvider)(nil) + +type positronProviderFactory struct { + def AgentDef +} + +func newPositronProviderFactory(def AgentDef) ProviderFactory { + return positronProviderFactory{def: cloneAgentDef(def)} +} + +func (f positronProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f positronProviderFactory) Capabilities() Capabilities { + return positronProviderCapabilities() +} + +func (f positronProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &positronProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: positronProviderCapabilities(), + Config: cfg, + }, + sources: newPositronSourceSet(cfg.Roots), + } +} + +type positronProvider struct { + ProviderBase + sources positronSourceSet +} + +func (p *positronProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *positronProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *positronProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *positronProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *positronProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *positronProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, project, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("positron source path unavailable") + } + if req.Source.ProjectHint != "" { + project = req.Source.ProjectHint + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, project, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +// parseSession parses a Positron Assistant chat session file. The format is +// identical to VSCode Copilot sessions. Returns (nil, nil, nil) if the file is +// empty or contains no meaningful content. +func (p *positronProvider) parseSession( + path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + info, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil, nil + } + return nil, nil, fmt.Errorf("stat %s: %w", path, err) + } + + var data []byte + if strings.HasSuffix(path, ".jsonl") { + data, err = reconstructJSONL(path) + } else { + data, err = os.ReadFile(path) + } + if err != nil { + return nil, nil, fmt.Errorf("read %s: %w", path, err) + } + if len(data) == 0 { + return nil, nil, nil + } + + // Reuse VSCode Copilot parsing logic since formats are identical. + sess, msgs, err := parseVSCodeCopilotData(data, path, project, machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + + // Override agent type and ID prefix for Positron. + sess.Agent = AgentPositron + sess.ID = "positron:" + sess.ID + + sess.File = FileInfo{ + Path: path, + Size: info.Size(), + Mtime: info.ModTime().UnixNano(), + } + + return sess, msgs, nil +} + +type positronSource struct { + Root string + Path string + Project string +} + +type positronSourceSet struct { + roots []string +} + +func newPositronSourceSet(roots []string) positronSourceSet { + return positronSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s positronSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, file := range s.discoverSessions(root) { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + source.ProjectHint = file.Project + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessions finds all chat session files under a Positron User +// directory. The structure mirrors VSCode: +// /workspaceStorage//chatSessions/.{json,jsonl}. When a +// .jsonl and .json sibling exist for the same UUID, the .jsonl is preferred. +func (s positronSourceSet) discoverSessions(userDir string) []DiscoveredFile { + if userDir == "" { + return nil + } + + var files []DiscoveredFile + + // Scan workspaceStorage//chatSessions/*.{json,jsonl}. + wsDir := filepath.Join(userDir, "workspaceStorage") + hashDirs, err := os.ReadDir(wsDir) + if err != nil { + return nil + } + + for _, entry := range hashDirs { + if !entry.IsDir() { + continue + } + + hashPath := filepath.Join(wsDir, entry.Name()) + chatDir := filepath.Join(hashPath, "chatSessions") + sessionFiles, err := os.ReadDir(chatDir) + if err != nil { + continue + } + + project := positronWorkspaceProject(userDir, entry.Name()) + files = append(files, + discoverVSCodeSessionFiles( + chatDir, sessionFiles, project, AgentPositron, + )..., + ) + } + + return files +} + +// findSourceFile locates a Positron session file by its raw ID (prefix already +// stripped), preferring .jsonl over .json. Returns "" when no matching file +// exists. +func (s positronSourceSet) findSourceFile(userDir, rawID string) string { + if userDir == "" || !IsValidSessionID(rawID) { + return "" + } + + wsDir := filepath.Join(userDir, "workspaceStorage") + hashDirs, err := os.ReadDir(wsDir) + if err != nil { + return "" + } + + for _, entry := range hashDirs { + if !entry.IsDir() { + continue + } + base := filepath.Join(wsDir, entry.Name(), "chatSessions") + // Prefer .jsonl over .json. + for _, ext := range []string{".jsonl", ".json"} { + candidate := filepath.Join(base, rawID+ext) + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + } + + return "" +} + +func (s positronSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + workspace := filepath.Join(root, "workspaceStorage") + roots = append(roots, WatchRoot{ + Path: workspace, + Recursive: true, + IncludeGlobs: []string{"*.json", "*.jsonl"}, + DebounceKey: string(AgentPositron) + ":workspace:" + workspace, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s positronSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + sources := s.sourcesForWorkspaceManifest(root, req.Path) + if len(sources) > 0 { + return sources, nil + } + source, ok := s.sourceRefForChangedPath(root, req) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s positronSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func (s positronSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, _, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("positron source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + workspacePath := s.workspaceManifestForSource(path) + if workspacePath != "" { + if workspaceInfo, err := os.Stat(workspacePath); err == nil { + fingerprint.Size += workspaceInfo.Size() + if mtime := workspaceInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + } + } + fingerprint.Hash, err = vscodeCopilotSourceHash(path, workspacePath) + if err != nil { + return SourceFingerprint{}, err + } + return fingerprint, nil +} + +func (s positronSourceSet) pathFromSource(source SourceRef) (string, string, bool) { + switch src := source.Opaque.(type) { + case positronSource: + return src.Path, src.Project, src.Path != "" + case *positronSource: + if src != nil && src.Path != "" { + return src.Path, src.Project, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(positronSource) + return src.Path, src.Project, true + } + } + } + return "", "", false +} + +func (s positronSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) != 4 || + parts[0] != "workspaceStorage" || + parts[2] != "chatSessions" || + !isVSCodeCopilotSessionPath(parts[3]) { + return SourceRef{}, false + } + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + if !IsRegularFile(path) { + return SourceRef{}, false + } + project := positronWorkspaceProject(root, parts[1]) + return s.newSourceRef(root, path, project), true +} + +func (s positronSourceSet) sourceRefForChangedPath( + root string, + req ChangedPathRequest, +) (SourceRef, bool) { + path := req.Path + if req.EventKind != "remove" && vscodeCopilotJSONLPreferredOver(path) { + return SourceRef{}, false + } + if source, ok := s.sourceRef(root, path); ok { + return source, true + } + return s.syntheticSourceRef(root, path) +} + +func (s positronSourceSet) syntheticSourceRef( + root, path string, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) != 4 || + parts[0] != "workspaceStorage" || + parts[2] != "chatSessions" || + !isVSCodeCopilotSessionPath(parts[3]) { + return SourceRef{}, false + } + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + project := positronWorkspaceProject(root, parts[1]) + return s.newSourceRef(root, path, project), true +} + +func (s positronSourceSet) sourcesForWorkspaceManifest( + root, path string, +) []SourceRef { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return nil + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) != 3 || + parts[0] != "workspaceStorage" || + parts[2] != "workspace.json" { + return nil + } + hashDir := filepath.Join(root, "workspaceStorage", parts[1]) + chatDir := filepath.Join(hashDir, "chatSessions") + entries, err := os.ReadDir(chatDir) + if err != nil { + return nil + } + project := positronWorkspaceProject(root, parts[1]) + files := discoverVSCodeSessionFiles(chatDir, entries, project, AgentPositron) + sources := make([]SourceRef, 0, len(files)) + seen := make(map[string]struct{}, len(files)) + for _, file := range files { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + source.Provider = AgentPositron + source.ProjectHint = file.Project + addJSONLSource(source, &sources, seen) + } + sortJSONLSources(sources) + return sources +} + +func (s positronSourceSet) workspaceManifestForSource(path string) string { + for _, root := range s.roots { + root = filepath.Clean(root) + rel, ok := relUnder(root, path) + if !ok { + continue + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 4 && + parts[0] == "workspaceStorage" && + parts[2] == "chatSessions" && + isVSCodeCopilotSessionPath(parts[3]) { + workspacePath := filepath.Join( + root, + "workspaceStorage", + parts[1], + "workspace.json", + ) + if IsRegularFile(workspacePath) { + return workspacePath + } + } + } + return "" +} + +func (s positronSourceSet) newSourceRef(root, path, project string) SourceRef { + return SourceRef{ + Provider: AgentPositron, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: positronSource{ + Root: root, + Path: path, + Project: project, + }, + } +} + +func positronWorkspaceProject(root, hash string) string { + hashDir := filepath.Join(root, "workspaceStorage", hash) + project := ReadVSCodeWorkspaceManifest(hashDir) + if project == "" { + project = "unknown" + } + return project +} + +func positronProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + Thinking: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/positron_provider_test.go b/internal/parser/positron_provider_test.go new file mode 100644 index 000000000..24ace99e2 --- /dev/null +++ b/internal/parser/positron_provider_test.go @@ -0,0 +1,134 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPositronProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentPositron) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentPositron, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestPositronProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "positron-provider" + hashDir := filepath.Join(root, "workspaceStorage", "workspace-hash") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + sourcePath := filepath.Join(chatDir, sessionID+".jsonl") + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/positron-app"}`) + writeSourceFile(t, sourcePath, + vscodeCopilotProviderJSONL(sessionID, "Hello Positron")) + + provider, ok := NewProvider(AgentPositron, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, filepath.Join(root, "workspaceStorage"), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Equal(t, "positron-app", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~positron:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + require.False(t, outcome.ForceReplace) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "positron:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentPositron, result.Result.Session.Agent) + assert.Equal(t, "positron-app", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 2) +} + +func TestPositronProviderClassifiesDeletedAndMetadataPaths(t *testing.T) { + root := t.TempDir() + hashDir := filepath.Join(root, "workspaceStorage", "workspace-hash") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + sourcePath := filepath.Join(chatDir, "metadata.jsonl") + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/positron-app"}`) + writeSourceFile(t, sourcePath, + vscodeCopilotProviderJSONL("metadata", "Hello metadata")) + + provider, ok := NewProvider(AgentPositron, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + metadataChanged, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: workspacePath, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, metadataChanged, 1) + assert.Equal(t, sourcePath, metadataChanged[0].DisplayPath) + + beforeMetadata, err := provider.Fingerprint(context.Background(), metadataChanged[0]) + require.NoError(t, err) + writeSourceFile(t, workspacePath, + `{"folder":"file:///Users/alice/code/positron-renamed-app"}`) + afterMetadata, err := provider.Fingerprint(context.Background(), metadataChanged[0]) + require.NoError(t, err) + assert.NotEqual(t, beforeMetadata.Hash, afterMetadata.Hash) + + require.NoError(t, os.Remove(sourcePath)) + deleted, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove"}, + ) + require.NoError(t, err) + require.Len(t, deleted, 1) + assert.Equal(t, sourcePath, deleted[0].DisplayPath) +} diff --git a/internal/parser/positron_test.go b/internal/parser/positron_test.go index 8a93ffadb..50b51b724 100644 --- a/internal/parser/positron_test.go +++ b/internal/parser/positron_test.go @@ -9,7 +9,11 @@ import ( "github.com/stretchr/testify/require" ) -func TestParsePositronSession(t *testing.T) { +func newPositronTestSourceSet(roots ...string) positronSourceSet { + return newPositronSourceSet(roots) +} + +func TestPositronProviderParseSession(t *testing.T) { // Create a minimal Positron session JSON sessionJSON := `{ "version": 3, @@ -60,10 +64,11 @@ func TestParsePositronSession(t *testing.T) { sessionPath, []byte(sessionJSON), 0644, )) - sess, msgs, err := ParsePositronSession( + p := &positronProvider{} + sess, msgs, err := p.parseSession( sessionPath, "test-project", "test-machine", ) - require.NoError(t, err, "ParsePositronSession failed") + require.NoError(t, err, "parseSession failed") require.NotNil(t, sess, "expected session, got nil") // Verify session metadata @@ -86,7 +91,7 @@ func TestParsePositronSession(t *testing.T) { assert.True(t, msgs[3].HasToolUse, "msgs[3] should have tool use") } -func TestDiscoverPositronSessions(t *testing.T) { +func TestPositronSourceSetDiscoverSessions(t *testing.T) { tmpDir := t.TempDir() // Create directory structure: @@ -106,11 +111,13 @@ func TestDiscoverPositronSessions(t *testing.T) { 0644, )) - // Create session files + // Create session files. The .json file with a .jsonl sibling must be + // deduped so full discovery matches changed-path sync precedence. sessionJSON := `{"version": 3, "requests": []}` for _, name := range []string{ "session-1.json", "session-2.jsonl", + "session-2.json", } { require.NoError(t, os.WriteFile( filepath.Join(chatDir, name), @@ -126,16 +133,20 @@ func TestDiscoverPositronSessions(t *testing.T) { 0644, )) - files := DiscoverPositronSessions(tmpDir) + set := newPositronTestSourceSet(tmpDir) + files := set.discoverSessions(tmpDir) require.Len(t, files, 2) + paths := make([]string, 0, len(files)) for _, f := range files { + paths = append(paths, filepath.Base(f.Path)) assert.Equal(t, AgentPositron, f.Agent) assert.Equal(t, "myproject", f.Project) } + assert.ElementsMatch(t, []string{"session-1.json", "session-2.jsonl"}, paths) } -func TestFindPositronSourceFile(t *testing.T) { +func TestPositronSourceSetFindSourceFile(t *testing.T) { tmpDir := t.TempDir() // Create directory structure @@ -151,11 +162,13 @@ func TestFindPositronSourceFile(t *testing.T) { sessionPath, []byte(`{}`), 0644, )) + set := newPositronTestSourceSet(tmpDir) + // Test finding existing session - found := FindPositronSourceFile(tmpDir, "test-uuid") + found := set.findSourceFile(tmpDir, "test-uuid") assert.Equal(t, sessionPath, found) // Test finding non-existent session - notFound := FindPositronSourceFile(tmpDir, "nonexistent") + notFound := set.findSourceFile(tmpDir, "nonexistent") assert.Empty(t, notFound) } diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 7a54cf967..fa5d214a3 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -358,6 +358,10 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newClaudeProviderFactory(def) case AgentCommandCode: return newCommandCodeProviderFactory(def) + case AgentCodex: + return newCodexProviderFactory(def) + case AgentCopilot: + return newCopilotProviderFactory(def) case AgentCowork: return newCoworkProviderFactory(def) case AgentCortex: @@ -372,6 +376,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIflowProviderFactory(def) case AgentGptme: return newGptmeProviderFactory(def) + case AgentGemini: + return newGeminiProviderFactory(def) case AgentKimi: return newKimiProviderFactory(def) case AgentKilo: @@ -388,16 +394,22 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newOpenClawProviderFactory(def) case AgentOMP, AgentPi: return newPiProviderFactory(def) - case AgentQwenPaw: - return newQwenPawProviderFactory(def) + case AgentPositron: + return newPositronProviderFactory(def) case AgentQClaw: return newQClawProviderFactory(def) + case AgentQwen: + return newQwenProviderFactory(def) + case AgentQwenPaw: + return newQwenPawProviderFactory(def) + case AgentVSCopilot: + return newVisualStudioCopilotProviderFactory(def) + case AgentVSCodeCopilot: + return newVSCodeCopilotProviderFactory(def) case AgentVibe: return newVibeProviderFactory(def) case AgentWorkBuddy: return newWorkBuddyProviderFactory(def) - case AgentQwen: - return newQwenProviderFactory(def) case AgentZencoder: return newZencoderProviderFactory(def) default: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 6a91112ef..bc8033e17 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -19,9 +19,9 @@ const ( var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentClaude: ProviderMigrationProviderAuthoritative, AgentCowork: ProviderMigrationProviderAuthoritative, - AgentCodex: ProviderMigrationLegacyOnly, - AgentCopilot: ProviderMigrationLegacyOnly, - AgentGemini: ProviderMigrationLegacyOnly, + AgentCodex: ProviderMigrationProviderAuthoritative, + AgentCopilot: ProviderMigrationProviderAuthoritative, + AgentGemini: ProviderMigrationProviderAuthoritative, AgentOpenHands: ProviderMigrationProviderAuthoritative, AgentCursor: ProviderMigrationProviderAuthoritative, AgentMiMoCode: ProviderMigrationProviderAuthoritative, @@ -31,8 +31,8 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentIflow: ProviderMigrationProviderAuthoritative, AgentAmp: ProviderMigrationProviderAuthoritative, AgentZencoder: ProviderMigrationProviderAuthoritative, - AgentVSCodeCopilot: ProviderMigrationLegacyOnly, - AgentVSCopilot: ProviderMigrationLegacyOnly, + AgentVSCodeCopilot: ProviderMigrationProviderAuthoritative, + AgentVSCopilot: ProviderMigrationProviderAuthoritative, AgentPi: ProviderMigrationProviderAuthoritative, AgentQwen: ProviderMigrationProviderAuthoritative, AgentCommandCode: ProviderMigrationProviderAuthoritative, @@ -50,7 +50,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentForge: ProviderMigrationLegacyOnly, AgentPiebald: ProviderMigrationLegacyOnly, AgentWarp: ProviderMigrationLegacyOnly, - AgentPositron: ProviderMigrationLegacyOnly, + AgentPositron: ProviderMigrationProviderAuthoritative, AgentAntigravity: ProviderMigrationLegacyOnly, AgentAntigravityCLI: ProviderMigrationLegacyOnly, AgentVibe: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 8e4aa92c4..8bef3b59d 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -47,19 +47,13 @@ var providerNeutralEntrypoints = map[string]bool{ // tip (the zero-legacy gate) asserts this list is empty, so a provider cannot // remain a permanent shim. var pendingShimProviderFiles = map[string]bool{ - "antigravity_cli_provider.go": true, - "antigravity_provider.go": true, - "codex_provider.go": true, - "copilot_provider.go": true, - "db_backed_provider.go": true, - "gemini_provider.go": true, - "kiro_ide_provider.go": true, - "kiro_provider.go": true, - "positron_provider.go": true, - "shelley_provider.go": true, - "visualstudio_copilot_provider.go": true, - "vscode_copilot_provider.go": true, - "zed_provider.go": true, + "antigravity_cli_provider.go": true, + "antigravity_provider.go": true, + "db_backed_provider.go": true, + "kiro_ide_provider.go": true, + "kiro_provider.go": true, + "shelley_provider.go": true, + "zed_provider.go": true, } // collectLegacyFreeFuncs returns the set of package-level free functions in the diff --git a/internal/parser/provider_test.go b/internal/parser/provider_test.go index e1f73995b..65b689c5e 100644 --- a/internal/parser/provider_test.go +++ b/internal/parser/provider_test.go @@ -149,7 +149,10 @@ func TestProviderRegistryMirrorsAgentRegistry(t *testing.T) { } func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { - provider, ok := NewProvider(AgentCodex, ProviderConfig{ + legacyAgent := legacyProviderTestAgent(t) + def, ok := AgentByType(legacyAgent) + require.True(t, ok) + provider, ok := NewProvider(legacyAgent, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -177,7 +180,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { source, found, err := provider.FindSource(ctx, FindSourceRequest{ RawSessionID: "session", - FullSessionID: "codex:session", + FullSessionID: def.IDPrefix + "session", StoredFilePath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", }) @@ -186,7 +189,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.Empty(t, source) _, err = provider.Fingerprint(ctx, SourceRef{ - Provider: AgentCodex, + Provider: legacyAgent, Key: "session", DisplayPath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", @@ -195,9 +198,9 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) incremental, status, err := provider.ParseIncremental(ctx, IncrementalRequest{ - Source: SourceRef{Provider: AgentCodex, Key: "session"}, + Source: SourceRef{Provider: legacyAgent, Key: "session"}, Fingerprint: SourceFingerprint{Key: "/tmp/session.jsonl"}, - SessionID: "codex:session", + SessionID: def.IDPrefix + "session", StartOrdinal: 1, Machine: "devbox", }) @@ -211,12 +214,13 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { Roots: []string{"/tmp/one", "/tmp/two"}, Machine: "devbox", } + legacyAgent := legacyProviderTestAgent(t) - factory, ok := ProviderFactoryByType(AgentCodex) + factory, ok := ProviderFactoryByType(legacyAgent) require.True(t, ok) - assert.Equal(t, AgentCodex, factory.Definition().Type) + assert.Equal(t, legacyAgent, factory.Definition().Type) - provider, ok := NewProvider(AgentCodex, cfg) + provider, ok := NewProvider(legacyAgent, cfg) require.True(t, ok) require.NotNil(t, provider) @@ -233,7 +237,8 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { } func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { - provider, ok := NewProvider(AgentCodex, ProviderConfig{ + legacyAgent := legacyProviderTestAgent(t) + provider, ok := NewProvider(legacyAgent, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -241,7 +246,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { outcome, err := provider.Parse(context.Background(), ParseRequest{ Source: SourceRef{ - Provider: AgentCodex, + Provider: legacyAgent, Key: "source", DisplayPath: "/tmp/source.jsonl", FingerprintKey: "/tmp/source.jsonl", @@ -257,7 +262,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) var unsupported UnsupportedProviderFeatureError require.ErrorAs(t, err, &unsupported) - assert.Equal(t, AgentCodex, unsupported.Provider) + assert.Equal(t, legacyAgent, unsupported.Provider) assert.Equal(t, ProviderFeatureParse, unsupported.Feature) } @@ -349,6 +354,18 @@ func (p *testProvider) Parse(context.Context, ParseRequest) (ParseOutcome, error return ParseOutcome{}, nil } +func legacyProviderTestAgent(t *testing.T) AgentType { + t.Helper() + for _, def := range Registry { + factory := providerFactoryForDef(def) + if _, ok := factory.(legacyProviderFactory); ok { + return def.Type + } + } + t.Fatal("expected at least one legacy provider for fallback tests") + return "" +} + func assertAgentDefMetadataEqual(t *testing.T, want, got AgentDef) { t.Helper() diff --git a/internal/parser/s3_discovery_test.go b/internal/parser/s3_discovery_test.go index f1f5322f3..2137f6010 100644 --- a/internal/parser/s3_discovery_test.go +++ b/internal/parser/s3_discovery_test.go @@ -107,3 +107,42 @@ func TestClaudeSourceSetMixedLocalAndS3Roots(t *testing.T) { assert.Equal(t, 1, s3Count, "exactly one remote source") assert.Equal(t, 1, localCount, "exactly one local source") } + +// TestCodexSourceSetDiscoversS3Sessions verifies the Codex source set enumerates +// rollout objects under an s3:// sessions root through its provider Discover +// path and carries the object metadata in the S3DiscoveredSource opaque. +func TestCodexSourceSetDiscoversS3Sessions(t *testing.T) { + oldList := listS3Objects + t.Cleanup(func() { listS3Objects = oldList }) + + root := "s3://bucket/coder/raw/codex" + rolloutURI := root + "/2026/06/24/rollout-2026-06-24T00-00-00-" + + "11111111-1111-4111-8111-111111111111.jsonl" + mtime := time.Unix(100, 0) + listS3Objects = func(got string) ([]S3Object, error) { + require.Equal(t, root, got) + return []S3Object{{ + URI: rolloutURI, + Size: 11, + LastModified: mtime, + Fingerprint: "s3-meta:rollout", + }}, nil + } + + sources, err := newCodexSourceSet([]string{root}).Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + src := sources[0] + assert.Equal(t, AgentCodex, src.Provider) + assert.Equal(t, rolloutURI, src.DisplayPath) + assert.Equal(t, rolloutURI, src.Key) + + s3, ok := src.Opaque.(S3DiscoveredSource) + require.True(t, ok, "s3 source carries S3DiscoveredSource opaque") + assert.Equal(t, rolloutURI, s3.URI) + assert.Equal(t, "coder", s3.Machine) + assert.Equal(t, int64(11), s3.Size) + assert.Equal(t, mtime.UnixNano(), s3.MtimeNS) + assert.Contains(t, s3.Fingerprint, "rollout") +} diff --git a/internal/parser/skill_inference_test.go b/internal/parser/skill_inference_test.go index eef4df90e..50595396b 100644 --- a/internal/parser/skill_inference_test.go +++ b/internal/parser/skill_inference_test.go @@ -477,7 +477,7 @@ func TestParseCodexSessionFromInfersSkillNameFromSeededCwd(t *testing.T) { testjsonl.CodexMsgJSON("user", "use the dashboard skill", tsEarlyS1), ) file := createTestFile(t, "incremental-skill.jsonl", initial) - _, msgs, err := ParseCodexSession(file, "local", false) + _, msgs, err := parseCodexTestSession(t, file, "local", false) require.NoError(t, err) info, err := os.Stat(file) @@ -494,7 +494,7 @@ func TestParseCodexSessionFromInfersSkillNameFromSeededCwd(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Close()) - newMsgs, _, _, err := ParseCodexSessionFrom(file, offset, len(msgs), false) + newMsgs, _, _, err := parseCodexTestSessionFrom(t, file, offset, len(msgs), false) require.NoError(t, err) require.Len(t, newMsgs, 1) require.Len(t, newMsgs[0].ToolCalls, 1) diff --git a/internal/parser/types.go b/internal/parser/types.go index 2821dc0d3..bd16c1274 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -128,33 +128,27 @@ var Registry = []AgentDef{ }, IDPrefix: "codex:", FileBased: true, - DiscoverFunc: DiscoverCodexSessions, - FindSourceFunc: FindCodexSourceFile, ShallowWatchRootsFunc: ResolveCodexShallowWatchRoots, }, { - Type: AgentCopilot, - DisplayName: "Copilot", - EnvVar: "COPILOT_DIR", - ConfigKey: "copilot_dirs", - DefaultDirs: []string{".copilot"}, - IDPrefix: "copilot:", - WatchSubdirs: []string{"session-state"}, - FileBased: true, - DiscoverFunc: DiscoverCopilotSessions, - FindSourceFunc: FindCopilotSourceFile, + Type: AgentCopilot, + DisplayName: "Copilot", + EnvVar: "COPILOT_DIR", + ConfigKey: "copilot_dirs", + DefaultDirs: []string{".copilot"}, + IDPrefix: "copilot:", + WatchSubdirs: []string{"session-state"}, + FileBased: true, }, { - Type: AgentGemini, - DisplayName: "Gemini", - EnvVar: "GEMINI_DIR", - ConfigKey: "gemini_dirs", - DefaultDirs: []string{".gemini"}, - IDPrefix: "gemini:", - WatchSubdirs: []string{"tmp"}, - FileBased: true, - DiscoverFunc: DiscoverGeminiSessions, - FindSourceFunc: FindGeminiSourceFile, + Type: AgentGemini, + DisplayName: "Gemini", + EnvVar: "GEMINI_DIR", + ConfigKey: "gemini_dirs", + DefaultDirs: []string{".gemini"}, + IDPrefix: "gemini:", + WatchSubdirs: []string{"tmp"}, + FileBased: true, }, { Type: AgentMiMoCode, @@ -271,9 +265,7 @@ var Registry = []AgentDef{ "workspaceStorage", "globalStorage", }, - FileBased: true, - DiscoverFunc: DiscoverVSCodeCopilotSessions, - FindSourceFunc: FindVSCodeCopilotSourceFile, + FileBased: true, }, { Type: AgentVSCopilot, @@ -288,10 +280,8 @@ var Registry = []AgentDef{ // Linux ".cache/VSGitHubCopilotLogs/traces", }, - IDPrefix: "visualstudio-copilot:", - FileBased: true, - DiscoverFunc: DiscoverVisualStudioCopilotSessions, - FindSourceFunc: FindVisualStudioCopilotSourceFile, + IDPrefix: "visualstudio-copilot:", + FileBased: true, }, { Type: AgentPi, @@ -487,11 +477,9 @@ var Registry = []AgentDef{ DefaultDirs: []string{ "Library/Application Support/Positron/User", }, - IDPrefix: "positron:", - WatchSubdirs: []string{"workspaceStorage"}, - FileBased: true, - DiscoverFunc: DiscoverPositronSessions, - FindSourceFunc: FindPositronSourceFile, + IDPrefix: "positron:", + WatchSubdirs: []string{"workspaceStorage"}, + FileBased: true, }, { Type: AgentZed, diff --git a/internal/parser/visualstudio_copilot.go b/internal/parser/visualstudio_copilot.go index b05755dde..e1ab9e13a 100644 --- a/internal/parser/visualstudio_copilot.go +++ b/internal/parser/visualstudio_copilot.go @@ -16,60 +16,24 @@ import ( "time" ) -// ParseVisualStudioCopilotSession parses a single Visual Studio Copilot -// conversation from an OpenTelemetry trace JSONL file. The path may be a real -// trace file or a # virtual path emitted by -// discovery. A real trace file resolves to the conversation it contains; when -// a file carries spans for more than one conversation, discovery emits one -// virtual-path work item per conversation, so production does not rely on this -// entry point to choose among several. -func ParseVisualStudioCopilotSession( - path, project, machine string, -) (*ParsedSession, []ParsedMessage, error) { - if tracePath, conversationID, ok := - ParseVisualStudioCopilotVirtualPath(path); ok { - return ParseVisualStudioCopilotConversation( - tracePath, conversationID, project, machine, - ) - } - if !IsVisualStudioCopilotTraceFile(path) { - return nil, nil, nil - } - ids, err := VisualStudioCopilotFileConversationIDs(path) - if err != nil { - return nil, nil, err - } - if len(ids) == 0 { - return nil, nil, nil - } - return ParseVisualStudioCopilotConversation( - path, ids[0], project, machine, - ) -} - // VisualStudioCopilotVirtualPath pairs a trace file with one conversation ID. // A single physical trace file can hold spans for multiple conversations, so // each conversation is tracked as its own work item under this virtual path. func VisualStudioCopilotVirtualPath(tracePath, conversationID string) string { - return tracePath + "#" + conversationID + return VirtualSourcePath(tracePath, conversationID) } -// ParseVisualStudioCopilotVirtualPath splits a # -// virtual path. It returns ok=false for a plain trace-file path. -func ParseVisualStudioCopilotVirtualPath( +// SplitVisualStudioCopilotVirtualPath splits a # +// virtual source path into its physical trace file and conversation ID. It +// builds on the provider-neutral ParseVirtualSourcePath splitter and adds the +// Visual Studio Copilot validation that the container names a trace file and +// the source ID is a valid conversation ID. It returns ok=false for a plain +// trace-file path. Callers outside the parser package use it to detect and +// resolve the virtual paths Visual Studio Copilot stores for its sessions. +func SplitVisualStudioCopilotVirtualPath( sourcePath string, ) (tracePath, conversationID string, ok bool) { - idx := strings.LastIndex(sourcePath, "#") - if idx <= 0 || idx >= len(sourcePath)-1 { - return "", "", false - } - tracePath = sourcePath[:idx] - conversationID = sourcePath[idx+1:] - if !IsVisualStudioCopilotTraceFile(tracePath) || - !IsValidSessionID(conversationID) { - return "", "", false - } - return tracePath, conversationID, true + return splitVisualStudioCopilotVirtualPath(sourcePath) } // IsVisualStudioCopilotTraceFile reports whether path names a Visual Studio @@ -89,7 +53,7 @@ func IsVisualStudioCopilotTraceFile(path string) bool { // path whose runs share one physical history file; both resolve to the // physical file. Every other agent stores a real path, returned unchanged. func ResolveSourceFilePath(storedPath string) string { - if tracePath, _, ok := ParseVisualStudioCopilotVirtualPath(storedPath); ok { + if tracePath, _, ok := splitVisualStudioCopilotVirtualPath(storedPath); ok { return tracePath } if historyPath, _, ok := ParseAiderVirtualPath(storedPath); ok { @@ -133,11 +97,11 @@ type vsCopilotTraceValue struct { BoolValue bool `json:"boolValue"` } -// ParseVisualStudioCopilotConversation parses one conversation, gathering its -// spans from the given trace file and every sibling trace file in the same -// directory. File metadata is recorded against the conversation's virtual path -// so that each conversation in a shared trace file is tracked independently. -func ParseVisualStudioCopilotConversation( +// parseConversation parses one conversation, gathering its spans from the given +// trace file and every sibling trace file in the same directory. File metadata +// is recorded against the conversation's virtual path so that each conversation +// in a shared trace file is tracked independently. +func parseVisualStudioCopilotConversation( tracePath, conversationID, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { if conversationID == "" { diff --git a/internal/parser/visualstudio_copilot_provider.go b/internal/parser/visualstudio_copilot_provider.go new file mode 100644 index 000000000..9fb39ddae --- /dev/null +++ b/internal/parser/visualstudio_copilot_provider.go @@ -0,0 +1,274 @@ +package parser + +import ( + "os" + "path/filepath" + "sort" + "strings" +) + +// Visual Studio Copilot stores conversations inside shared trace files +// (*_VSGitHubCopilot_traces.jsonl). It is a multi-session container provider, +// but unlike the SQLite-backed containers it discovers one source per +// conversation (deduplicated across trace files, newest trace wins) plus a bare +// physical source for any trace whose conversation IDs could not be read, so +// the read failure surfaces instead of being silently dropped. Parse of a +// conversation virtual path yields that one session; Parse of a bare trace fans +// out every conversation in it. All behavior is wired into the shared +// multi-session-container base via options. +func newVisualStudioCopilotProviderFactory(def AgentDef) ProviderFactory { + return NewMultiSessionProviderFactory( + def, + visualStudioCopilotProviderCapabilities(), + func(cfg ProviderConfig) multiSessionContainerSourceSet { + return NewMultiSessionContainerSourceSet( + AgentVSCopilot, + cfg.Roots, + WithSourceDiscovery(vsCopilotDiscoverSources), + WithWatchRoots(vsCopilotWatchRoots), + WithChangedPathClassifier(vsCopilotClassifyPath), + WithMemberLookup(vsCopilotFindMember), + WithFingerprint(vsCopilotFingerprintSource), + WithContainerParse(vsCopilotParseContainer), + WithMemberParse(vsCopilotParseMember), + // Every conversation in a trace shares the trace's content hash. + WithContainerHashStamping(), + ) + }, + ) +} + +// vsCopilotDiscoverSources emits one match per conversation (virtual path) plus +// a bare physical match for each unreadable trace, mirroring the legacy +// per-conversation discovery. +func vsCopilotDiscoverSources(root string) []multiSessionMatch { + var out []multiSessionMatch + for _, file := range discoverVisualStudioCopilotSessionFilesUnderRoot(root) { + match, ok := vsCopilotDiscoveredMatch(root, file.Path) + if !ok { + continue + } + match.ProjectHint = file.Project + out = append(out, match) + } + return out +} + +// vsCopilotDiscoveredMatch classifies a discovery path. Discovery emits either a +// # virtual path for a readable trace, or a bare +// physical trace path for one whose conversation IDs could not be read. The +// unreadable physical file must still become a source so the engine surfaces +// the read failure instead of silently dropping it; the regular-file +// requirement is therefore relaxed for the bare physical trace (which os.ReadDir +// already enumerated) while virtual paths keep validating that their backing +// trace exists. +func vsCopilotDiscoveredMatch(root, path string) (multiSessionMatch, bool) { + if match, ok := vsCopilotClassifyPath(root, path, false); ok { + return match, true + } + root = filepath.Clean(root) + path = filepath.Clean(path) + if _, _, ok := splitVisualStudioCopilotVirtualPath(path); ok { + return multiSessionMatch{}, false + } + if !visualStudioCopilotTraceUnderRoot(root, path, false) { + return multiSessionMatch{}, false + } + return multiSessionMatch{ + Path: path, + Container: path, + ProjectHint: "visualstudio", + }, true +} + +func discoverVisualStudioCopilotSessionFilesUnderRoot( + vsRoot string, +) []DiscoveredFile { + if vsRoot == "" { + return nil + } + entries, err := os.ReadDir(vsRoot) + if err != nil { + return nil + } + files := discoverVisualStudioCopilotSessionFiles(vsRoot, entries) + sort.Slice(files, func(i, j int) bool { return files[i].Path < files[j].Path }) + return files +} + +func vsCopilotWatchRoots(roots []string) []WatchRoot { + out := make([]WatchRoot, 0, len(roots)) + for _, root := range roots { + out = append(out, WatchRoot{ + Path: root, + Recursive: false, + IncludeGlobs: []string{"*_VSGitHubCopilot_traces.jsonl"}, + DebounceKey: string(AgentVSCopilot) + ":traces:" + root, + }) + } + return out +} + +// vsCopilotClassifyPath maps a stored or changed path to its trace container and +// conversation. A virtual path always requires its backing trace to exist; a +// bare trace path relaxes the regular-file check under allowMissing so a deleted +// trace still classifies for changed-path tombstones. +func vsCopilotClassifyPath( + root, path string, allowMissing bool, +) (multiSessionMatch, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if tracePath, conversationID, ok := + splitVisualStudioCopilotVirtualPath(path); ok { + if !visualStudioCopilotTraceUnderRoot(root, tracePath, true) { + return multiSessionMatch{}, false + } + return multiSessionMatch{ + Path: path, + Container: tracePath, + MemberID: conversationID, + ProjectHint: "visualstudio", + }, true + } + if visualStudioCopilotTraceUnderRoot(root, path, !allowMissing) { + return multiSessionMatch{ + Path: path, + Container: path, + ProjectHint: "visualstudio", + }, true + } + return multiSessionMatch{}, false +} + +func vsCopilotFindMember(root, rawID string) (multiSessionMatch, bool) { + path := findVisualStudioCopilotSourceFile(root, rawID) + if path == "" { + return multiSessionMatch{}, false + } + return vsCopilotClassifyPath(root, path, false) +} + +// findVisualStudioCopilotSourceFile locates a trace file by conversation UUID +// and returns a conversation-scoped # virtual path. +func findVisualStudioCopilotSourceFile(root, rawID string) string { + if root == "" || !IsValidSessionID(rawID) { + return "" + } + return findVisualStudioCopilotTraceSourceFile(root, rawID) +} + +func vsCopilotFingerprintSource( + src multiSessionSource, +) (SourceFingerprint, error) { + size, mtime, err := VisualStudioCopilotTraceFingerprintStrict(src.Container) + if err != nil { + return SourceFingerprint{}, err + } + hash, err := hashJSONLSourceFile(src.Container) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Size: size, + MTimeNS: mtime, + Hash: hash, + }, nil +} + +func vsCopilotParseMember( + src multiSessionSource, req ParseRequest, +) (*ParseResult, error) { + project := firstNonEmptyJSONLString(req.Source.ProjectHint, "visualstudio") + sess, msgs, err := parseVisualStudioCopilotConversation( + src.Container, src.MemberID, project, req.Machine, + ) + if err != nil { + return nil, err + } + if sess == nil { + return nil, nil + } + return &ParseResult{Session: *sess, Messages: msgs}, nil +} + +func vsCopilotParseContainer( + src multiSessionSource, req ParseRequest, +) ([]ParseResult, error) { + ids, err := VisualStudioCopilotFileConversationIDs(src.Container) + if err != nil { + return nil, err + } + project := firstNonEmptyJSONLString(req.Source.ProjectHint, "visualstudio") + results := make([]ParseResult, 0, len(ids)) + for _, id := range ids { + sess, msgs, err := parseVisualStudioCopilotConversation( + src.Container, id, project, req.Machine, + ) + if err != nil { + return nil, err + } + if sess == nil { + continue + } + results = append(results, ParseResult{Session: *sess, Messages: msgs}) + } + return results, nil +} + +// splitVisualStudioCopilotVirtualPath splits a # +// virtual source path into its physical trace file and conversation ID. It +// builds on the provider-neutral ParseVirtualSourcePath splitter and adds the +// Visual Studio Copilot validation: the container must name a trace file and the +// source ID must be a valid conversation ID. It returns ok=false for a plain +// trace-file path. +func splitVisualStudioCopilotVirtualPath( + sourcePath string, +) (tracePath, conversationID string, ok bool) { + tracePath, conversationID, ok = ParseVirtualSourcePath(sourcePath) + if !ok { + return "", "", false + } + if !IsVisualStudioCopilotTraceFile(tracePath) || + !IsValidSessionID(conversationID) { + return "", "", false + } + return tracePath, conversationID, true +} + +func visualStudioCopilotTraceUnderRoot( + root, path string, + requireRegular bool, +) bool { + rel, ok := relUnder(root, path) + if !ok || strings.Contains(filepath.ToSlash(rel), "/") { + return false + } + if !IsVisualStudioCopilotTraceFile(path) { + return false + } + return !requireRegular || IsRegularFile(path) +} + +func visualStudioCopilotProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/visualstudio_copilot_test.go b/internal/parser/visualstudio_copilot_test.go index aeb1f2ab1..768d1bac7 100644 --- a/internal/parser/visualstudio_copilot_test.go +++ b/internal/parser/visualstudio_copilot_test.go @@ -36,7 +36,7 @@ func TestDiscoverVisualStudioCopilotSessions(t *testing.T) { []byte("{}\n"), 0o644, )) - files := DiscoverVisualStudioCopilotSessions(tracesDir) + files := discoverVisualStudioCopilotTestSessions(t, tracesDir) require.Len(t, files, 1) assert.Equal(t, tracePath+"#"+conversationID, files[0].Path) @@ -55,7 +55,7 @@ func TestDiscoverVisualStudioCopilotSessions_IgnoresParentDirs(t *testing.T) { "20260612T194439_257709a3_VSGitHubCopilot_traces.jsonl", ), []byte("{}\n"), 0o644)) - files := DiscoverVisualStudioCopilotSessions(root) + files := discoverVisualStudioCopilotTestSessions(t, root) assert.Empty(t, files) } @@ -80,7 +80,7 @@ func TestDiscoverVisualStudioCopilotSessions_DeduplicatesConversationTraceFiles( require.NoError(t, os.WriteFile(oldTrace, []byte(data), 0o644)) require.NoError(t, os.WriteFile(newTrace, []byte(data), 0o644)) - files := DiscoverVisualStudioCopilotSessions(root) + files := discoverVisualStudioCopilotTestSessions(t, root) require.Len(t, files, 1) assert.Equal(t, newTrace+"#"+conversationID, files[0].Path) @@ -100,7 +100,7 @@ func TestParseVisualStudioCopilotSession_MalformedTraceLineReturnsError(t *testi }) + "\n" + `{"resourceSpans":[` + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -159,7 +159,7 @@ func TestDiscoverVisualStudioCopilotSessions_EmitsWorkItemPerConversation(t *tes require.NoError(t, os.WriteFile(oldTrace, []byte(oldData), 0o644)) require.NoError(t, os.WriteFile(newTrace, []byte(newData), 0o644)) - files := DiscoverVisualStudioCopilotSessions(dir) + files := discoverVisualStudioCopilotTestSessions(t, dir) got := map[string]string{} for _, f := range files { @@ -185,7 +185,7 @@ func TestDiscoverVisualStudioCopilotSessions_SampleFixturesEnumerateBothConversa t.Skipf("sample dir not available: %v", err) } - files := DiscoverVisualStudioCopilotSessions(sampleDir) + files := discoverVisualStudioCopilotTestSessions(t, sampleDir) got := map[string]struct{}{} for _, f := range files { @@ -228,7 +228,7 @@ func TestParseVisualStudioCopilotConversation_PropagatesSiblingDirReadError(t *t require.NoError(t, os.Chmod(dir, 0o100)) t.Cleanup(func() { _ = os.Chmod(dir, 0o755) }) - _, _, err := ParseVisualStudioCopilotConversation( + _, _, err := parseVisualStudioCopilotTestConversation(t, tracePath, conversationID, "visualstudio", "local", ) require.Error(t, err, @@ -325,7 +325,7 @@ func TestParseVisualStudioCopilotConversation_PropagatesReadError(t *testing.T) ) require.NoError(t, os.Mkdir(dir, 0o755)) - sess, msgs, err := ParseVisualStudioCopilotConversation( + sess, msgs, err := parseVisualStudioCopilotTestConversation(t, dir, "4a8f63f6-7626-4416-a874-fc7bd2c3f005", "visualstudio", "local", ) @@ -349,7 +349,7 @@ func TestDiscoverVisualStudioCopilotSessions_EnqueuesUnreadableTraceFile(t *test ) require.NoError(t, os.Symlink(target, link)) - files := DiscoverVisualStudioCopilotSessions(root) + files := discoverVisualStudioCopilotTestSessions(t, root) require.Len(t, files, 1) assert.Equal(t, link, files[0].Path, @@ -394,7 +394,7 @@ func TestParseVisualStudioCopilotSession_IgnoresNonTraceFiles(t *testing.T) { }` require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -431,7 +431,7 @@ func TestParseVisualStudioCopilotTraceSession(t *testing.T) { }, "\n") + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -475,7 +475,7 @@ func TestParseVisualStudioCopilotTraceSession_GetFileResult(t *testing.T) { }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -521,7 +521,7 @@ func TestParseVisualStudioCopilotTraceSession_InvokeOnlyFirstMessage(t *testing. }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -553,7 +553,7 @@ func TestParseVisualStudioCopilotTraceSession_ChatPromptFirstMessage(t *testing. }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -593,7 +593,7 @@ func TestParseVisualStudioCopilotTraceSession_PreservesPromptMarkdown(t *testing }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -637,7 +637,7 @@ func TestParseVisualStudioCopilotTraceSession_CombinesConversationTraceFiles(t * require.NoError(t, os.WriteFile(path, []byte(firstData), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(secondData), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -687,7 +687,7 @@ func TestParseVisualStudioCopilotTraceSession_PropagatesSiblingReadError(t *test ) require.NoError(t, os.Symlink(target, sibling)) - _, _, err := ParseVisualStudioCopilotSession( + _, _, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) require.Error(t, err, @@ -710,7 +710,7 @@ func TestParseVisualStudioCopilotTraceSession_MalformedTraceLineErrors(t *testin }) + "\n" + `{"resourceSpans":` + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - _, _, err := ParseVisualStudioCopilotSession( + _, _, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) require.Error(t, err, @@ -746,7 +746,7 @@ func TestParseVisualStudioCopilotTraceSession_DeduplicatesChatOutputAcrossFiles( require.NoError(t, os.WriteFile(path, []byte(chatSpan), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(chatSpan), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -805,7 +805,7 @@ func TestParseVisualStudioCopilotTraceSession_PrefersCompleteChatOutputAcrossFil require.NoError(t, os.WriteFile(path, []byte(partial), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(complete), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -865,7 +865,7 @@ func TestParseVisualStudioCopilotTraceSession_PrefersCompleteChatUsageForVisible require.NoError(t, os.WriteFile(path, []byte(richEarlier), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(leanerLater), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -913,7 +913,7 @@ func TestParseVisualStudioCopilotTraceSession_DeduplicatesToolSpanAcrossFiles(t require.NoError(t, os.WriteFile(path, []byte(partial), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(complete), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -970,7 +970,7 @@ func TestParseVisualStudioCopilotTraceSession_PreservesOrderWhenDedupingToolSpan ) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1010,7 +1010,7 @@ func TestParseVisualStudioCopilotTraceSession_ChatOutputMessages(t *testing.T) { }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1071,7 +1071,7 @@ func TestParseVisualStudioCopilotTraceSession_CountsUsageForToolOnlyChatTurn(t * }) + "\n" require.NoError(t, os.WriteFile(path, []byte(chatSpan+toolSpan), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1130,7 +1130,7 @@ func TestParseVisualStudioCopilotTraceSession_DoesNotDoubleCountTextPlusToolUsag }) + "\n" require.NoError(t, os.WriteFile(path, []byte(chatSpan+toolSpan), 0o644)) - sess, _, err := ParseVisualStudioCopilotSession( + sess, _, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1181,7 +1181,7 @@ func TestParseVisualStudioCopilotTraceSession_PrefersCompleteToolOnlyChatUsage(t require.NoError(t, os.WriteFile(path, []byte(chatSpan("200", "10")), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(chatSpan("500", "42")+toolSpan), 0o644)) - sess, _, err := ParseVisualStudioCopilotSession( + sess, _, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1214,7 +1214,7 @@ func TestParseVisualStudioCopilotTraceSession_ChatUsage(t *testing.T) { }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1262,7 +1262,7 @@ func TestParseVisualStudioCopilotTraceSession_StandardToolInputs(t *testing.T) { }, "\n") + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - _, msgs, err := ParseVisualStudioCopilotSession( + _, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1313,7 +1313,7 @@ func TestParseVisualStudioCopilotTraceSession_UsesSiblingPromptSpan(t *testing.T require.NoError(t, os.WriteFile(path, []byte(primaryData), 0o644)) require.NoError(t, os.WriteFile(sibling, []byte(siblingData), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1386,7 +1386,7 @@ func TestParseVisualStudioCopilotTraceSession_DeduplicatesPromptAndToolSpans(t * }, "\n") + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1430,7 +1430,7 @@ func TestParseVisualStudioCopilotTraceSession_ChatSummaryFallback(t *testing.T) }) + "\n" require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) - sess, msgs, err := ParseVisualStudioCopilotSession( + sess, msgs, err := parseVisualStudioCopilotTestSession(t, path, "visualstudio", "local", ) @@ -1473,7 +1473,7 @@ func TestParseVisualStudioCopilotConversation_ParsesEachConversationIndependentl require.NoError(t, os.WriteFile(path, []byte(data), 0o644)) // The prompt conversation parses with its user message. - promptSess, _, err := ParseVisualStudioCopilotConversation( + promptSess, _, err := parseVisualStudioCopilotTestConversation(t, path, promptID, "visualstudio", "local", ) require.NoError(t, err) @@ -1486,7 +1486,7 @@ func TestParseVisualStudioCopilotConversation_ParsesEachConversationIndependentl // The ambient conversation in the same file is not dropped; it // parses on its own with its invoke_agent turn. - ambientSess, ambientMsgs, err := ParseVisualStudioCopilotConversation( + ambientSess, ambientMsgs, err := parseVisualStudioCopilotTestConversation(t, path, ambientID, "visualstudio", "local", ) require.NoError(t, err) @@ -1517,13 +1517,13 @@ func TestFindVisualStudioCopilotSourceFile(t *testing.T) { []byte(traceLine+"\n"), 0o644)) assert.Equal(t, VisualStudioCopilotVirtualPath(newTrace, uuid), - FindVisualStudioCopilotSourceFile(tracesDir, uuid), + findVisualStudioCopilotTestSourceFile(t, tracesDir, uuid), "source lookup must return a conversation-scoped virtual path so a "+ "single-session resync does not reparse the whole trace file") assert.Equal(t, "", - FindVisualStudioCopilotSourceFile(dir, uuid)) + findVisualStudioCopilotTestSourceFile(t, dir, uuid)) assert.Equal(t, "", - FindVisualStudioCopilotSourceFile(tracesDir, "../etc/passwd")) + findVisualStudioCopilotTestSourceFile(t, tracesDir, "../etc/passwd")) } // TestWriteVisualStudioCopilotConversationJSONL verifies that exporting one diff --git a/internal/parser/vscode_copilot.go b/internal/parser/vscode_copilot.go index c93274b87..84beb8b3f 100644 --- a/internal/parser/vscode_copilot.go +++ b/internal/parser/vscode_copilot.go @@ -115,10 +115,10 @@ type vscodeCopilotWorkspace struct { Workspace string `json:"workspace"` } -// ParseVSCodeCopilotSession parses a VSCode Copilot chat -// session file (.json or .jsonl). Returns (nil, nil, nil) -// if the file is empty or contains no meaningful content. -func ParseVSCodeCopilotSession( +// parseSession parses a VSCode Copilot chat session file (.json or .jsonl). +// Returns (nil, nil, nil) if the file is empty or contains no meaningful +// content. +func (p *vscodeCopilotProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/vscode_copilot_provider.go b/internal/parser/vscode_copilot_provider.go new file mode 100644 index 000000000..bfecc74d0 --- /dev/null +++ b/internal/parser/vscode_copilot_provider.go @@ -0,0 +1,654 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "sort" + "strings" +) + +var _ Provider = (*vscodeCopilotProvider)(nil) + +type vscodeCopilotProviderFactory struct { + def AgentDef +} + +func newVSCodeCopilotProviderFactory(def AgentDef) ProviderFactory { + return vscodeCopilotProviderFactory{def: cloneAgentDef(def)} +} + +func (f vscodeCopilotProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f vscodeCopilotProviderFactory) Capabilities() Capabilities { + return vscodeCopilotProviderCapabilities() +} + +func (f vscodeCopilotProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &vscodeCopilotProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: vscodeCopilotProviderCapabilities(), + Config: cfg, + }, + sources: newVSCodeCopilotSourceSet(cfg.Roots), + } +} + +type vscodeCopilotProvider struct { + ProviderBase + sources vscodeCopilotSourceSet +} + +func (p *vscodeCopilotProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *vscodeCopilotProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *vscodeCopilotProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *vscodeCopilotProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *vscodeCopilotProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *vscodeCopilotProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, project, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("vscode copilot source path unavailable") + } + if req.Source.ProjectHint != "" { + project = req.Source.ProjectHint + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, project, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + sess.File.Size = req.Fingerprint.Size + sess.File.Mtime = req.Fingerprint.MTimeNS + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + UsageEvents: sess.UsageEvents, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type vscodeCopilotSource struct { + Root string + Path string + Project string +} + +type vscodeCopilotSourceSet struct { + roots []string +} + +func newVSCodeCopilotSourceSet(roots []string) vscodeCopilotSourceSet { + return vscodeCopilotSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s vscodeCopilotSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, file := range s.discoverSessionFiles(root) { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + source.ProjectHint = file.Project + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionFiles traverses the VSCode workspaceStorage directory to find +// chatSessions/*.json and *.jsonl files. When both formats exist for the same +// session UUID, the .jsonl file takes priority. It also checks +// globalStorage/emptyWindowChatSessions and transferredChatSessions. The root +// should point to e.g. +// +// ~/Library/Application Support/Code/User (macOS) +// ~/.config/Code/User (Linux) +func (s vscodeCopilotSourceSet) discoverSessionFiles( + vscodeUserDir string, +) []DiscoveredFile { + if vscodeUserDir == "" { + return nil + } + + var files []DiscoveredFile + + // 1. Scan workspaceStorage//chatSessions/*.{json,jsonl} + wsDir := filepath.Join(vscodeUserDir, "workspaceStorage") + hashDirs, err := os.ReadDir(wsDir) + if err == nil { + for _, entry := range hashDirs { + if !entry.IsDir() { + continue + } + + hashPath := filepath.Join(wsDir, entry.Name()) + chatDir := filepath.Join(hashPath, "chatSessions") + sessionFiles, err := os.ReadDir(chatDir) + if err != nil { + continue + } + + // Read workspace.json to get project name + project := ReadVSCodeWorkspaceManifest(hashPath) + if project == "" { + project = "unknown" + } + + files = append(files, + discoverVSCodeSessionFiles( + chatDir, sessionFiles, project, + AgentVSCodeCopilot, + )..., + ) + } + } + + // 2. Scan globalStorage/emptyWindowChatSessions/*.{json,jsonl} + for _, subdir := range []string{ + "globalStorage/emptyWindowChatSessions", + "globalStorage/transferredChatSessions", + } { + globalDir := filepath.Join(vscodeUserDir, subdir) + globalFiles, err := os.ReadDir(globalDir) + if err != nil { + continue + } + files = append(files, + discoverVSCodeSessionFiles( + globalDir, globalFiles, "empty-window", + AgentVSCodeCopilot, + )..., + ) + } + + sort.Slice(files, func(i, j int) bool { + return files[i].Path < files[j].Path + }) + return files +} + +func (s vscodeCopilotSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)*2) + for _, root := range s.roots { + workspace := filepath.Join(root, "workspaceStorage") + roots = append(roots, WatchRoot{ + Path: workspace, + Recursive: true, + IncludeGlobs: []string{"*.json", "*.jsonl"}, + DebounceKey: string(AgentVSCodeCopilot) + ":workspace:" + workspace, + }) + global := filepath.Join(root, "globalStorage") + roots = append(roots, WatchRoot{ + Path: global, + Recursive: true, + IncludeGlobs: []string{"*.json", "*.jsonl"}, + DebounceKey: string(AgentVSCodeCopilot) + ":global:" + global, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s vscodeCopilotSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + sources := s.sourcesForWorkspaceManifest(root, req.Path) + if len(sources) > 0 { + return sources, nil + } + source, ok := s.sourceRefForChangedPath(root, req) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s vscodeCopilotSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a VSCode Copilot session file by UUID (.jsonl +// preferred over .json) across workspaceStorage and the global session dirs. +func (s vscodeCopilotSourceSet) findSourceFile( + vscodeUserDir, rawID string, +) string { + if vscodeUserDir == "" || !IsValidSessionID(rawID) { + return "" + } + + // Search through workspaceStorage + wsDir := filepath.Join(vscodeUserDir, "workspaceStorage") + hashDirs, err := os.ReadDir(wsDir) + if err == nil { + for _, entry := range hashDirs { + if !entry.IsDir() { + continue + } + base := filepath.Join( + wsDir, entry.Name(), "chatSessions", + ) + // Prefer .jsonl + for _, ext := range []string{".jsonl", ".json"} { + candidate := filepath.Join( + base, rawID+ext, + ) + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + } + } + + // Check global dirs + for _, subdir := range []string{ + "globalStorage/emptyWindowChatSessions", + "globalStorage/transferredChatSessions", + } { + base := filepath.Join(vscodeUserDir, subdir) + for _, ext := range []string{".jsonl", ".json"} { + candidate := filepath.Join(base, rawID+ext) + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + } + + return "" +} + +func (s vscodeCopilotSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, _, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("vscode copilot source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + workspacePath := s.workspaceManifestForSource(path) + if workspacePath != "" { + if workspaceInfo, err := os.Stat(workspacePath); err == nil { + fingerprint.Size += workspaceInfo.Size() + if mtime := workspaceInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + } + } + fingerprint.Hash, err = vscodeCopilotSourceHash(path, workspacePath) + if err != nil { + return SourceFingerprint{}, err + } + return fingerprint, nil +} + +func (s vscodeCopilotSourceSet) pathFromSource(source SourceRef) (string, string, bool) { + switch src := source.Opaque.(type) { + case vscodeCopilotSource: + return src.Path, src.Project, src.Path != "" + case *vscodeCopilotSource: + if src != nil && src.Path != "" { + return src.Path, src.Project, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(vscodeCopilotSource) + return src.Path, src.Project, true + } + } + } + return "", "", false +} + +func (s vscodeCopilotSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 4 && + parts[0] == "workspaceStorage" && + parts[2] == "chatSessions" && + isVSCodeCopilotSessionPath(parts[3]) { + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + if !IsRegularFile(path) { + return SourceRef{}, false + } + hashDir := filepath.Join(root, "workspaceStorage", parts[1]) + project := ReadVSCodeWorkspaceManifest(hashDir) + if project == "" { + project = "unknown" + } + return s.newSourceRef(root, path, project), true + } + if len(parts) == 3 && + parts[0] == "globalStorage" && + (parts[1] == "emptyWindowChatSessions" || + parts[1] == "transferredChatSessions") && + isVSCodeCopilotSessionPath(parts[2]) { + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + if !IsRegularFile(path) { + return SourceRef{}, false + } + return s.newSourceRef(root, path, "empty-window"), true + } + return SourceRef{}, false +} + +func (s vscodeCopilotSourceSet) sourceRefForChangedPath( + root string, + req ChangedPathRequest, +) (SourceRef, bool) { + path := req.Path + if req.EventKind != "remove" && vscodeCopilotJSONLPreferredOver(path) { + return SourceRef{}, false + } + if source, ok := s.sourceRef(root, path); ok { + return source, true + } + return s.syntheticSourceRef(root, path) +} + +func (s vscodeCopilotSourceSet) syntheticSourceRef( + root, path string, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 4 && + parts[0] == "workspaceStorage" && + parts[2] == "chatSessions" && + isVSCodeCopilotSessionPath(parts[3]) { + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + hashDir := filepath.Join(root, "workspaceStorage", parts[1]) + project := ReadVSCodeWorkspaceManifest(hashDir) + if project == "" { + project = "unknown" + } + return s.newSourceRef(root, path, project), true + } + if len(parts) == 3 && + parts[0] == "globalStorage" && + (parts[1] == "emptyWindowChatSessions" || + parts[1] == "transferredChatSessions") && + isVSCodeCopilotSessionPath(parts[2]) { + if promoted := vscodeCopilotPreferredExistingPath(path); promoted != "" { + path = promoted + } + return s.newSourceRef(root, path, "empty-window"), true + } + return SourceRef{}, false +} + +func (s vscodeCopilotSourceSet) sourcesForWorkspaceManifest( + root, path string, +) []SourceRef { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return nil + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) != 3 || + parts[0] != "workspaceStorage" || + parts[2] != "workspace.json" { + return nil + } + hashDir := filepath.Join(root, "workspaceStorage", parts[1]) + chatDir := filepath.Join(hashDir, "chatSessions") + entries, err := os.ReadDir(chatDir) + if err != nil { + return nil + } + project := ReadVSCodeWorkspaceManifest(hashDir) + if project == "" { + project = "unknown" + } + files := discoverVSCodeSessionFiles( + chatDir, entries, project, AgentVSCodeCopilot, + ) + sources := make([]SourceRef, 0, len(files)) + seen := make(map[string]struct{}, len(files)) + for _, file := range files { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + source.ProjectHint = file.Project + addJSONLSource(source, &sources, seen) + } + sortJSONLSources(sources) + return sources +} + +func (s vscodeCopilotSourceSet) workspaceManifestForSource(path string) string { + for _, root := range s.roots { + root = filepath.Clean(root) + rel, ok := relUnder(root, path) + if !ok { + continue + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 4 && + parts[0] == "workspaceStorage" && + parts[2] == "chatSessions" && + isVSCodeCopilotSessionPath(parts[3]) { + workspacePath := filepath.Join( + root, + "workspaceStorage", + parts[1], + "workspace.json", + ) + if IsRegularFile(workspacePath) { + return workspacePath + } + } + } + return "" +} + +func (s vscodeCopilotSourceSet) newSourceRef(root, path, project string) SourceRef { + return SourceRef{ + Provider: AgentVSCodeCopilot, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: vscodeCopilotSource{ + Root: root, + Path: path, + Project: project, + }, + } +} + +func isVSCodeCopilotSessionPath(name string) bool { + return strings.HasSuffix(name, ".json") || strings.HasSuffix(name, ".jsonl") +} + +func vscodeCopilotPreferredExistingPath(path string) string { + if base, ok := strings.CutSuffix(path, ".json"); ok { + candidate := base + ".jsonl" + if IsRegularFile(candidate) { + return candidate + } + } + if IsRegularFile(path) { + return path + } + if base, ok := strings.CutSuffix(path, ".jsonl"); ok { + candidate := base + ".json" + if IsRegularFile(candidate) { + return candidate + } + } + return "" +} + +func vscodeCopilotJSONLPreferredOver(path string) bool { + base, ok := strings.CutSuffix(path, ".json") + if !ok { + return false + } + return IsRegularFile(base + ".jsonl") +} + +func vscodeCopilotSourceHash(path, workspacePath string) (string, error) { + hash, err := hashJSONLSourceFile(path) + if err != nil { + return "", err + } + if workspacePath == "" { + return hash, nil + } + workspaceHash, err := hashJSONLSourceFile(workspacePath) + if err != nil { + return "", err + } + h := sha256.New() + _, _ = h.Write([]byte("chat\x00" + hash + "\x00workspace\x00" + workspaceHash)) + return fmt.Sprintf("%x", h.Sum(nil)), nil +} + +func vscodeCopilotProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + Thinking: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/vscode_copilot_test.go b/internal/parser/vscode_copilot_test.go index bf73482ac..e7fbd3782 100644 --- a/internal/parser/vscode_copilot_test.go +++ b/internal/parser/vscode_copilot_test.go @@ -127,7 +127,7 @@ func TestParseVSCodeCopilotSession(t *testing.T) { path, []byte(tt.json), 0644, )) - sess, msgs, err := ParseVSCodeCopilotSession( + sess, msgs, err := parseVSCodeCopilotTestSession(t, path, "testproject", "local", ) require.NoError(t, err) @@ -166,7 +166,7 @@ func TestParseVSCodeCopilotSession(t *testing.T) { } func TestParseVSCodeCopilotSession_NonExistent(t *testing.T) { - sess, msgs, err := ParseVSCodeCopilotSession( + sess, msgs, err := parseVSCodeCopilotTestSession(t, "/nonexistent/path.json", "proj", "local", ) require.NoError(t, err, "expected nil error") @@ -196,7 +196,7 @@ func TestParseVSCodeCopilotSession_MixedTextAndTools(t *testing.T) { path := filepath.Join(dir, "test.json") require.NoError(t, os.WriteFile(path, []byte(data), 0644)) - _, msgs, err := ParseVSCodeCopilotSession(path, "proj", "local") + _, msgs, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) // Find assistant message @@ -242,7 +242,7 @@ func TestParseVSCodeCopilotSession_TerminalToolData(t *testing.T) { path := filepath.Join(dir, "test.json") require.NoError(t, os.WriteFile(path, []byte(data), 0644)) - _, msgs, err := ParseVSCodeCopilotSession(path, "proj", "local") + _, msgs, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) var assistant *ParsedMessage @@ -328,7 +328,7 @@ func TestDiscoverVSCodeCopilotSessions(t *testing.T) { globalPath := filepath.Join(globalDir, "global-sess.json") require.NoError(t, os.WriteFile(globalPath, []byte(sessionJSON), 0644)) - files := DiscoverVSCodeCopilotSessions(root) + files := discoverVSCodeCopilotTestSessions(t, root) require.Len(t, files, 2) @@ -518,7 +518,7 @@ func TestParseVSCodeCopilotSession_JSONL(t *testing.T) { path, []byte(content), 0644, )) - sess, msgs, err := ParseVSCodeCopilotSession( + sess, msgs, err := parseVSCodeCopilotTestSession(t, path, "testproject", "local", ) require.NoError(t, err) @@ -722,7 +722,7 @@ func TestDiscoverVSCodeCopilot_JSONLDedup(t *testing.T) { []byte(sessionJSON), 0644, )) - files := DiscoverVSCodeCopilotSessions(root) + files := discoverVSCodeCopilotTestSessions(t, root) // Should get 3 files: dup1.jsonl, only-jsonl.jsonl, only-json.json if !assert.Len(t, files, 3, "expected 3 files") { @@ -767,7 +767,7 @@ func TestFindVSCodeCopilotSourceFile(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := FindVSCodeCopilotSourceFile( + got := findVSCodeCopilotTestSourceFile(t, tt.dir, tt.id, ) assert.Equal(t, tt.want, got) @@ -823,7 +823,7 @@ func TestParseVSCodeCopilotSession_TokenUsage(t *testing.T) { path := filepath.Join(dir, "usage.json") require.NoError(t, os.WriteFile(path, []byte(sessionJSON), 0644)) - sess, _, err := ParseVSCodeCopilotSession(path, "proj", "local") + sess, _, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -871,7 +871,7 @@ func TestParseVSCodeCopilotSession_TokenUsageModelFallback(t *testing.T) { path := filepath.Join(dir, "usage2.json") require.NoError(t, os.WriteFile(path, []byte(sessionJSON), 0644)) - sess, _, err := ParseVSCodeCopilotSession(path, "proj", "local") + sess, _, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -898,7 +898,7 @@ func TestParseVSCodeCopilotSession_NoTokenUsage(t *testing.T) { path := filepath.Join(dir, "nousage.json") require.NoError(t, os.WriteFile(path, []byte(sessionJSON), 0644)) - sess, _, err := ParseVSCodeCopilotSession(path, "proj", "local") + sess, _, err := parseVSCodeCopilotTestSession(t, path, "proj", "local") require.NoError(t, err) require.NotNil(t, sess) diff --git a/internal/service/direct.go b/internal/service/direct.go index 5809a91b6..2cc253603 100644 --- a/internal/service/direct.go +++ b/internal/service/direct.go @@ -311,7 +311,7 @@ func (b *directBackend) Sync( // conversation lives on in a sibling. The single-session path keeps the // conversation scope and follows it across sibling trace files. if _, _, ok := - parser.ParseVisualStudioCopilotVirtualPath(storedPath); ok { + parser.SplitVisualStudioCopilotVirtualPath(storedPath); ok { if err := b.engine.SyncSingleSessionContext( ctx, in.ID, ); err != nil { diff --git a/internal/sync/classify_antigravity_cli_test.go b/internal/sync/classify_antigravity_cli_test.go index f99a7d1ea..be05aba3e 100644 --- a/internal/sync/classify_antigravity_cli_test.go +++ b/internal/sync/classify_antigravity_cli_test.go @@ -67,7 +67,6 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { parser.AgentAntigravityCLI: {dir}, }, } - geminiMap := make(map[string]map[string]string) tests := []struct { name string @@ -131,7 +130,7 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) + got, ok := eng.classifyOnePath(tt.path) assert.Equal(t, tt.want, ok) if ok { assert.Equal(t, parser.AgentAntigravityCLI, got.Agent) @@ -146,7 +145,7 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { orphanTraj := filepath.Join(convDir, orphanUUID+".trajectory.json") require.NoError(t, os.WriteFile(orphanTraj, []byte("orphan"), 0o644)) - _, ok := eng.classifyOnePath(orphanTraj, geminiMap) + _, ok := eng.classifyOnePath(orphanTraj) assert.False(t, ok, "should not classify sidecar when pb file does not exist") }) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index e39a3b86d..b2c837d98 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -380,7 +380,7 @@ func IsVisualStudioCopilotSkipPath(path string) bool { if parser.IsVisualStudioCopilotTraceFile(path) { return true } - _, _, ok := parser.ParseVisualStudioCopilotVirtualPath(path) + _, _, ok := parser.SplitVisualStudioCopilotVirtualPath(path) return ok } @@ -523,7 +523,6 @@ func (e *Engine) SyncPaths(paths []string) { func (e *Engine) classifyPaths( paths []string, ) []parser.DiscoveredFile { - geminiProjectsByDir := make(map[string]map[string]string) seen := make(map[string]int, len(paths)) files := make([]parser.DiscoveredFile, 0, len(paths)) for _, p := range paths { @@ -535,9 +534,7 @@ func (e *Engine) classifyPaths( dfs = e.classifyCodexIndexPath(p) } if len(dfs) == 0 { - if df, ok := e.classifyOnePath( - p, geminiProjectsByDir, - ); ok { + if df, ok := e.classifyOnePath(p); ok { dfs = []parser.DiscoveredFile{df} } } @@ -596,6 +593,17 @@ func (e *Engine) classifyProviderChangedPath( default: continue } + // Codex index (session_index.jsonl) events are owned by the engine's + // DB-aware classifyCodexIndexPath, which fans out only to sessions whose + // stored title changed and resolves a UUID's live/archived duplicate to + // the path the DB already tracks. The provider's broad index fan-out + // would re-add every sibling and prefer the live-over-archived layout, + // resurrecting a stale duplicate over the stored copy, so suppress it + // here and let the engine method classify the index event. + if agentType == parser.AgentCodex && + filepath.Base(path) == parser.CodexSessionIndexFilename { + continue + } roots := e.agentDirs[agentType] if len(roots) == 0 { continue @@ -777,6 +785,19 @@ func providerDeletedPhysicalSQLiteSource( func dedupeDiscoveredFiles( files []parser.DiscoveredFile, +) []parser.DiscoveredFile { + return dedupeDiscoveredFilesByPreference(files, preferDiscoveredFile) +} + +func dedupeDiscoveredFilesPreferNewestCodex( + files []parser.DiscoveredFile, +) []parser.DiscoveredFile { + return dedupeDiscoveredFilesByPreference(files, preferNewestCodexDiscoveredFile) +} + +func dedupeDiscoveredFilesByPreference( + files []parser.DiscoveredFile, + prefer func(candidate, current parser.DiscoveredFile) bool, ) []parser.DiscoveredFile { if len(files) < 2 { return files @@ -786,7 +807,7 @@ func dedupeDiscoveredFiles( for _, file := range files { key := discoveredFileKey(file) if current, ok := bestByKey[key]; ok { - if preferDiscoveredFile(file, current) { + if prefer(file, current) { bestByKey[key] = file } continue @@ -837,6 +858,27 @@ func preferDiscoveredFile( return false } +func preferNewestCodexDiscoveredFile( + candidate, current parser.DiscoveredFile, +) bool { + if candidate.Agent == parser.AgentCodex && current.Agent == parser.AgentCodex { + candMTime, candOK := discoveredFileMTime(candidate.Path) + currMTime, currOK := discoveredFileMTime(current.Path) + if candOK && currOK && candMTime != currMTime { + return candMTime > currMTime + } + } + return preferDiscoveredFile(candidate, current) +} + +func discoveredFileMTime(path string) (int64, bool) { + info, err := os.Stat(path) + if err != nil { + return 0, false + } + return info.ModTime().UnixNano(), true +} + func (e *Engine) expandClaudeDuplicateCandidates( files []parser.DiscoveredFile, ) []parser.DiscoveredFile { @@ -928,7 +970,6 @@ func (e *Engine) classifyContainerPath( func (e *Engine) classifyOnePath( path string, - geminiProjectsByDir map[string]map[string]string, ) (parser.DiscoveredFile, bool) { sep := string(filepath.Separator) pathExists := true @@ -963,165 +1004,6 @@ func (e *Engine) classifyOnePath( // shapes, so the legacy block was removed when Claude was folded // onto its provider. - // Codex: either ////.jsonl - // or /.jsonl for archived sessions. - for _, codexDir := range e.agentDirs[parser.AgentCodex] { - if codexDir == "" { - continue - } - if _, _, ok := parser.CodexSessionPathInfo(codexDir, path); ok { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCodex, - }, true - } - } - - // Copilot: /session-state/.jsonl - // or: /session-state//events.jsonl - for _, copilotDir := range e.agentDirs[parser.AgentCopilot] { - if copilotDir == "" { - continue - } - stateDir := filepath.Join( - copilotDir, "session-state", - ) - if rel, ok := isUnder(stateDir, path); ok { - parts := strings.Split(rel, sep) - switch len(parts) { - case 1: - stem, ok := strings.CutSuffix( - parts[0], ".jsonl", - ) - if !ok { - continue - } - dirEvents := filepath.Join( - stateDir, stem, "events.jsonl", - ) - if _, err := os.Stat(dirEvents); err == nil { - continue - } - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCopilot, - }, true - case 2: - if parts[1] == "events.jsonl" { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCopilot, - }, true - } - // workspace.yaml changes should trigger a re-parse - // of the sibling events.jsonl. - if parts[1] == "workspace.yaml" { - eventsPath := filepath.Join( - stateDir, parts[0], "events.jsonl", - ) - if _, err := os.Stat(eventsPath); err == nil { - return parser.DiscoveredFile{ - Path: eventsPath, - Agent: parser.AgentCopilot, - }, true - } - } - continue - default: - continue - } - } - } - - // Gemini: /tmp//chats/session-*.json(.l) - // is either a SHA-256 hash (old) or project name (new). - for _, geminiDir := range e.agentDirs[parser.AgentGemini] { - if geminiDir == "" { - continue - } - if rel, ok := isUnder(geminiDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) != 4 || - parts[0] != "tmp" || - parts[2] != "chats" { - continue - } - name := parts[3] - if !strings.HasPrefix(name, "session-") || - (!strings.HasSuffix(name, ".json") && - !strings.HasSuffix(name, ".jsonl")) { - continue - } - dirName := parts[1] - if _, ok := geminiProjectsByDir[geminiDir]; !ok { - geminiProjectsByDir[geminiDir] = - parser.BuildGeminiProjectMap(geminiDir) - } - project := parser.ResolveGeminiProject( - dirName, geminiProjectsByDir[geminiDir], - ) - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentGemini, - }, true - } - } - - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} - // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} - for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { - if vscDir == "" { - continue - } - if rel, ok := isUnder(vscDir, path); ok { - parts := strings.Split(rel, sep) - // workspaceStorage//chatSessions/.{json,jsonl} - if len(parts) == 4 && - parts[0] == "workspaceStorage" && - parts[2] == "chatSessions" && - (strings.HasSuffix(parts[3], ".json") || - strings.HasSuffix(parts[3], ".jsonl")) { - if vscodeJSONLSiblingExists(path) { - continue - } - hashDir := filepath.Join( - vscDir, "workspaceStorage", parts[1], - ) - project := parser.ReadVSCodeWorkspaceManifest(hashDir) - if project == "" { - project = "unknown" - } - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentVSCodeCopilot, - }, true - } - // globalStorage/emptyWindowChatSessions/.{json,jsonl} - // globalStorage/transferredChatSessions/.{json,jsonl} - if len(parts) == 3 && - parts[0] == "globalStorage" && - (parts[1] == "emptyWindowChatSessions" || parts[1] == "transferredChatSessions") && - (strings.HasSuffix(parts[2], ".json") || - strings.HasSuffix(parts[2], ".jsonl")) { - if vscodeJSONLSiblingExists(path) { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: "empty-window", - Agent: parser.AgentVSCodeCopilot, - }, true - } - } - } - - // Visual Studio Copilot: /*_VSGitHubCopilot_traces.jsonl - if df, ok := e.classifyVisualStudioCopilotPath(path, sep); ok { - return df, true - } - if df, ok := e.classifyAiderPath(path); ok { return df, true } @@ -1228,37 +1110,6 @@ func (e *Engine) classifyOnePath( return parser.DiscoveredFile{}, false } -// classifyVisualStudioCopilotPath matches a top-level Visual Studio Copilot -// trace file (/*_VSGitHubCopilot_traces.jsonl) under a configured -// trace directory. Trace files live directly in the directory, so nested -// paths are rejected. Split out of classifyOnePath to keep that function -// within NilAway's per-function size limit. -func (e *Engine) classifyVisualStudioCopilotPath( - path, sep string, -) (parser.DiscoveredFile, bool) { - if !parser.IsVisualStudioCopilotTraceFile(path) { - return parser.DiscoveredFile{}, false - } - for _, vsDir := range e.agentDirs[parser.AgentVSCopilot] { - if vsDir == "" { - continue - } - rel, ok := isUnder(vsDir, path) - if !ok { - continue - } - if strings.Contains(rel, sep) { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: "visualstudio", - Agent: parser.AgentVSCopilot, - }, true - } - return parser.DiscoveredFile{}, false -} - // classifyAiderPath handles Aider's rootless chat-history layout: // // /.../.aider.chat.history.md @@ -1514,18 +1365,6 @@ func (e *Engine) classifyShelleySQLitePath( return parser.DiscoveredFile{}, false } -// vscodeJSONLSiblingExists returns true when path is a .json -// file and a .jsonl sibling exists for the same UUID. This -// mirrors the dedup logic in DiscoverVSCodeCopilotSessions. -func vscodeJSONLSiblingExists(path string) bool { - base, ok := strings.CutSuffix(path, ".json") - if !ok { - return false - } - _, err := os.Stat(base + ".jsonl") - return err == nil -} - // resyncTempSuffix is appended to the original DB path to // form the temp database path during resync. const resyncTempSuffix = "-resync" @@ -2347,12 +2186,27 @@ func (e *Engine) syncAllLocked( } all = append(all, providerFound...) - if !since.IsZero() { + quickSyncCutoff := !since.IsZero() + if quickSyncCutoff { all = e.dedupeClaudeDiscoveredFiles(all) + // A Codex UUID can exist as both a live dated transcript and a flat + // archived copy. The provider's discovery deduplicates them to the + // preferred (live) layout, but the mtime cutoff filter runs before the + // engine's own dedup, so a changed archived copy that is newer than the + // cutoff would be lost behind an older live copy that the cutoff drops. + // Re-expand to every on-disk duplicate before filtering so the cutoff + // sees each copy's real mtime; the quick-sync dedupe below then keeps + // the newest surviving duplicate before falling back to normal layout + // preference. + all = e.expandCodexProviderDuplicates(all, scope) all = e.filterFilesByMtime(ctx, all, since) } - all = dedupeDiscoveredFiles(all) + if quickSyncCutoff { + all = dedupeDiscoveredFilesPreferNewestCodex(all) + } else { + all = dedupeDiscoveredFiles(all) + } all = e.dedupeClaudeDiscoveredFiles(all) all = e.filterShadowedLegacyKiroFiles(all) @@ -2830,6 +2684,85 @@ func (e *Engine) discoverProviderSources( return files, failures } +// expandCodexProviderDuplicates re-adds the on-disk duplicate paths of each +// discovered Codex source. The provider deduplicates a UUID's live and archived +// copies to the preferred layout at discovery time; this restores the dropped +// duplicates (scoped to the configured roots) so an mtime cutoff filter can +// judge each copy on its own mtime, matching the legacy discover-then-filter +// order. Non-Codex files and Codex files without a UUID-shaped name pass through +// unchanged. Duplicates are keyed by path so nothing is added twice. +func (e *Engine) expandCodexProviderDuplicates( + files []parser.DiscoveredFile, scope *rootSyncScope, +) []parser.DiscoveredFile { + pather := e.codexUUIDPathLister(scope) + if pather == nil { + return files + } + seen := make(map[string]struct{}, len(files)) + for _, f := range files { + seen[string(f.Agent)+"\x00"+filepath.Clean(f.Path)] = struct{}{} + } + out := files + for _, f := range files { + if f.Agent != parser.AgentCodex { + continue + } + uuid := parser.CodexSessionUUIDFromFilename(filepath.Base(f.Path)) + if uuid == "" { + continue + } + for _, dup := range pather(uuid) { + key := string(parser.AgentCodex) + "\x00" + filepath.Clean(dup) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, parser.DiscoveredFile{ + Path: dup, + Agent: parser.AgentCodex, + ProviderProcess: true, + ProviderSource: e.codexPinnedProviderSource(dup), + }) + } + } + return out +} + +// codexUUIDPathLister returns a function that lists every on-disk Codex +// transcript path for a UUID under the in-scope roots, or nil when the Codex +// provider is unavailable. It scopes a single provider to the in-scope roots so +// the returned paths cover both the live dated and flat archived copies of a +// duplicated UUID, including duplicates that share one root. +func (e *Engine) codexUUIDPathLister( + scope *rootSyncScope, +) func(string) []string { + factory, ok := e.providerFactories[parser.AgentCodex] + if !ok || factory == nil { + return nil + } + roots := make([]string, 0, len(e.agentDirs[parser.AgentCodex])) + for _, root := range e.agentDirs[parser.AgentCodex] { + if root == "" || !scope.includes(root) { + continue + } + roots = append(roots, root) + } + if len(roots) == 0 { + return nil + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: roots, + Machine: e.machine, + }) + lister, ok := provider.(interface { + AllSourcePathsForUUID(string) []string + }) + if !ok { + return nil + } + return lister.AllSourcePathsForUUID +} + // recordSyncStarted persists the start time of a sync run // into pg_sync_state. Callers use this to compute mtime // cutoffs for future quick incremental syncs. @@ -2919,12 +2852,33 @@ func (e *Engine) filterFilesByMtime( return out } +// discoveredFileEffectiveMtime returns the freshness timestamp used to filter a +// discovered file against an incremental-sync cutoff. For provider-sourced +// files it consults the provider's Fingerprint so composite/sibling-file +// freshness (for example a Positron session whose workspace.json changed while +// the chat transcript did not) is honored without a per-agent legacy helper. +// Files without a provider source fall back to the legacy mtime computation. func (e *Engine) discoveredFileEffectiveMtime( - ctx context.Context, - file parser.DiscoveredFile, + ctx context.Context, file parser.DiscoveredFile, ) (int64, error) { + // Codex is excluded from the provider-Fingerprint path on purpose. Its + // Fingerprint folds the shared session_index.jsonl mtime into every + // session's freshness (see CodexEffectiveMtime). That shared signal is + // correct for the skip cache but wrong for the incremental-sync cutoff: + // when the index changes, both the live and archived copies of a UUID + // would look fresh, defeating the per-copy mtime discrimination that + // expandCodexProviderDuplicates relies on to preserve a changed archived + // duplicate. Index refreshes are handled separately by the codexIndexRefresh + // pass in filterFilesByMtime, so codex uses its raw per-file mtime here. + if file.Agent == parser.AgentCodex { + return discoveredFileMtime(file) + } + // Only provider-authoritative sources resolve freshness through the + // provider Fingerprint. Shadow-compare files keep the legacy mtime path so + // agent-specific incremental-sync behavior (for example the Codex index + // refresh below) is unchanged while a provider is still shadowed. if file.ProviderSource != nil && file.ProviderProcess { - if mtime, ok, err := e.providerFingerprintMtime(ctx, file); err != nil { + if mtime, ok, err := e.providerSourceMtime(ctx, file); err != nil { return 0, err } else if ok { return mtime, nil @@ -2933,9 +2887,12 @@ func (e *Engine) discoveredFileEffectiveMtime( return discoveredFileMtime(file) } -func (e *Engine) providerFingerprintMtime( - ctx context.Context, - file parser.DiscoveredFile, +// providerSourceMtime resolves a provider-sourced file's effective mtime through +// the owning provider's Fingerprint. The boolean reports whether the provider +// runtime produced a usable timestamp; a false result tells the caller to fall +// back to the legacy mtime path. +func (e *Engine) providerSourceMtime( + ctx context.Context, file parser.DiscoveredFile, ) (int64, bool, error) { if file.ProviderSource == nil { return 0, false, nil @@ -3845,8 +3802,6 @@ func (e *Engine) processFile( statPath = dbPath } else if dbPath, _, ok := parser.ParseShelleyVirtualPath(file.Path); ok { statPath = dbPath - } else if tracePath, _, ok := parser.ParseVisualStudioCopilotVirtualPath(file.Path); ok { - statPath = tracePath } else if historyPath, _, ok := parser.ParseAiderVirtualPath(file.Path); ok { // aider stores "#"; stat the physical file // so SyncSingleSession (live watcher / on-demand re-sync) works. @@ -3892,7 +3847,7 @@ func (e *Engine) processFile( // re-validation. if cacheSkip && !e.forceParse && !file.ForceParse { // parse-diff: ignore the skip cache if e.shouldUseCachedSkip(file, mtime, sourceFingerprint) { - if e.pathNeedsProjectReparse(file.Path) { + if e.pathNeedsCachedSkipBypass(file.Path) { e.clearSkip(file.Path) } else { res := processResult{ @@ -3914,27 +3869,16 @@ func (e *Engine) processFile( // legacy dispatch, via the S3 sync path. res = e.processS3Session(ctx, file, info) case parser.AgentCodex: - if strings.HasPrefix(file.Path, "s3://") { - res = e.processS3Session(ctx, file, info) - } else { - res = e.processCodex(file, info) - } - case parser.AgentCopilot: - res = e.processCopilot(file, info) + // Non-S3 Codex is provider-authoritative and handled earlier by + // processProviderFile; only s3:// Codex sources fall through to the + // legacy dispatch, via the S3 sync path. + res = e.processS3Session(ctx, file, info) case parser.AgentReasonix: res = e.processReasonix(file, info) - case parser.AgentGemini: - res = e.processGemini(file, info) - case parser.AgentVSCodeCopilot: - res = e.processVSCodeCopilot(file, info) - case parser.AgentVSCopilot: - res = e.processVisualStudioCopilot(file, info) case parser.AgentKiro: res = e.processKiro(file, info) case parser.AgentKiroIDE: res = e.processKiroIDE(file, info) - case parser.AgentPositron: - res = e.processPositron(file, info) case parser.AgentZed: res = e.processZed(file, info) case parser.AgentShelley: @@ -3990,6 +3934,25 @@ func (e *Engine) pathNeedsProjectReparse(path string) bool { return ok && parser.NeedsProjectReparse(project) } +func (e *Engine) pathNeedsCachedSkipBypass(path string) bool { + return e.pathNeedsProjectReparse(path) || + e.pathNeedsDataVersionReparse(path) +} + +func (e *Engine) pathNeedsDataVersionReparse(path string) bool { + if e == nil || e.db == nil { + return false + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + if _, _, ok := e.db.GetFileInfoByPath(lookupPath); !ok { + return false + } + return e.db.GetDataVersionByPath(lookupPath) < db.CurrentDataVersion() +} + func (e *Engine) processProviderFile( ctx context.Context, file parser.DiscoveredFile, @@ -4056,7 +4019,7 @@ func (e *Engine) processProviderFile( mtime: mtime, }, true } - if freshMtime, fresh := e.providerCoworkSourceFresh(source, file); fresh { + if freshMtime, fresh := e.providerSourceFreshBeforeFingerprint(source, file); fresh { return processResult{ skip: true, mtime: freshMtime, @@ -4074,12 +4037,20 @@ func (e *Engine) processProviderFile( cachedMtime, cached := e.skipCache[cacheKey] e.skipMu.RUnlock() if cached && cachedMtime == fingerprint.MTimeNS { - return processResult{ - skip: true, - mtime: fingerprint.MTimeNS, - cacheSkip: true, - cacheKey: cacheKey, - }, true + // A cached skip must not hide a session whose stored row needs + // self-healing (e.g. a parser data-version bump or generated + // roborev CI worktree project): clear the entry and fall through + // to a full reparse, mirroring the legacy process arm. + if e.pathNeedsCachedSkipBypass(file.Path) { + e.clearSkip(cacheKey) + } else { + return processResult{ + skip: true, + mtime: fingerprint.MTimeNS, + cacheSkip: true, + cacheKey: cacheKey, + }, true + } } } @@ -4098,6 +4069,25 @@ func (e *Engine) processProviderFile( } incForceReplace := incRes.forceReplace + // DB-stored fingerprint skip. The provider has no database handle, so the + // engine reproduces the legacy DB-aware skip that single-session JSONL + // providers relied on: an unchanged source whose stored size and effective + // mtime already match is not reparsed, even when the in-memory skip cache + // was cleared (e.g. by SyncSingleSession) or never populated (a fresh + // engine). For Codex this also folds in the session_index.jsonl sidecar: + // a shared index mtime bump that did not change this session's title must + // not trigger a reparse. + if !e.forceParse && !file.ForceParse && + e.shouldSkipProviderSourceByDB(file, fingerprint) { + return processResult{ + skip: true, + mtime: fingerprint.MTimeNS, + cacheSkip: cacheSkip, + cacheKey: cacheKey, + noCacheSkip: true, + }, true + } + // DB-stored-file-info skip: a session whose persisted file_size/file_mtime // already match the source fingerprint (and whose data_version is current) // is unchanged and need not be reparsed. This reproduces the legacy @@ -4511,7 +4501,7 @@ func (e *Engine) shouldCacheSkip( return false } if _, _, ok := - parser.ParseVisualStudioCopilotVirtualPath(file.Path); ok { + parser.SplitVisualStudioCopilotVirtualPath(file.Path); ok { return false } } @@ -4821,11 +4811,11 @@ func (e *Engine) providerSingleSessionFresh( !parser.NeedsProjectReparse(sess.Project) } -func (e *Engine) providerCoworkSourceFresh( +func (e *Engine) providerSourceFreshBeforeFingerprint( source parser.SourceRef, file parser.DiscoveredFile, ) (int64, bool) { - if e.forceParse || file.ForceParse || file.Agent != parser.AgentCowork { + if e.forceParse || file.ForceParse { return 0, false } path := providerDiscoveredPath(source) @@ -4843,15 +4833,33 @@ func (e *Engine) providerCoworkSourceFresh( return 0, false } } - mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) - effectiveInfo := fakeSnapshotInfo{ - fSize: info.Size(), - fMtime: mtime, - } - if !e.shouldSkipByPath(path, effectiveInfo) { - return 0, false + switch file.Agent { + case parser.AgentCowork: + mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if e.shouldSkipByPath(path, effectiveInfo) { + return mtime, true + } + // Gemini is deliberately absent here. Its fingerprint is composite (the + // session file plus projects.json and trustedFolders.json), so a + // pre-fingerprint skip keyed only on the session file's size and mtime + // would skip a session whose project metadata changed while the transcript + // did not, leaving a stale project on scheduled syncs. Gemini relies on the + // post-fingerprint skip cache instead, whose mtime folds in the composite. + case parser.AgentCopilot: + mtime := copilotEffectiveMtime(path, info) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if e.shouldSkipByPath(path, effectiveInfo) { + return mtime, true + } } - return mtime, true + return 0, false } // providerSourceUnchangedInDB reports whether a provider source's persisted @@ -5258,18 +5266,41 @@ func (e *Engine) tryIncrementalJSONL( }, true } -func (e *Engine) shouldSkipCodex( - path string, info os.FileInfo, +// shouldSkipProviderSourceByDB reports whether a provider-dispatched source is +// already stored at the parsed fingerprint and can be skipped without a reparse. +// It is the engine-side replacement for the DB-aware skip the legacy +// single-session JSONL processors performed, since a provider has no database +// handle. It is scoped to Codex: Codex's effective mtime folds in the shared +// session_index.jsonl sidecar, so a size-and-effective-mtime match plus a +// per-session title check preserves the legacy "skip when only the global index +// advanced but this session's name did not" semantics. Other providers keep +// their existing in-memory skip-cache behavior unchanged. +func (e *Engine) shouldSkipProviderSourceByDB( + file parser.DiscoveredFile, fingerprint parser.SourceFingerprint, ) bool { - if e.forceParse { // parse-diff: always re-parse + if file.Agent != parser.AgentCodex { return false } + return e.shouldSkipCodexFingerprint(file.Path, fingerprint) +} + +// shouldSkipCodexFingerprint reproduces the legacy shouldSkipCodex decision in +// terms of a provider SourceFingerprint. The fingerprint MTimeNS already folds +// in session_index.jsonl via CodexEffectiveMtime, so: +// - a stored size mismatch or stale data version forces a reparse; +// - an exact effective-mtime match skips; +// - an effective mtime ahead of the stored mtime driven only by the index +// (the raw transcript mtime is still at or below the stored mtime) skips +// unless this session's stored title differs from the current index title. +func (e *Engine) shouldSkipCodexFingerprint( + path string, fingerprint parser.SourceFingerprint, +) bool { lookupPath := path if e.pathRewriter != nil { lookupPath = e.pathRewriter(path) } storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) - if !ok || storedSize != info.Size() { + if !ok || storedSize != fingerprint.Size { return false } if project, ok := e.db.GetProjectByPath(lookupPath); ok && @@ -5280,26 +5311,17 @@ func (e *Engine) shouldSkipCodex( db.CurrentDataVersion() { return false } - // A Codex title lives in session_index.jsonl, not the transcript, so a - // title-only rename can change the title with no transcript signal. Detect - // it directly rather than inferring it from an mtime inequality: the index - // mtime is folded into the stored watermark, so a later rename whose index - // mtime lands at or below that watermark is invisible to a mtime compare, - // and the old storedMtime==effectiveMtime fast path skipped without ever - // consulting the title. codexIndexSessionNameChanged reads the live title - // (cached per index file) and the stored name; a cheaper stored-name lookup - // to keep this fully off the hot skip path is a deferred follow-up. - if e.codexIndexSessionNameChanged(path) { - return false // title changed -> re-parse to refresh metadata + effectiveMtime := fingerprint.MTimeNS + if storedMtime == effectiveMtime { + return true } - // Title verified unchanged: skip when the transcript itself is unchanged. - // Compare the bare file mtime, not the index-folded effective mtime -- the - // stored watermark may already include a folded index mtime, and a later - // bump of the shared session_index.jsonl (e.g. another session's rename) - // lifts every session's effective mtime; with the title confirmed - // unchanged, that rise must not force a needless reparse. - fileMtime := info.ModTime().UnixNano() - return fileMtime <= storedMtime + fileMtime := effectiveMtime + if info, err := os.Stat(path); err == nil { + fileMtime = info.ModTime().UnixNano() + } + return effectiveMtime > storedMtime && + fileMtime <= storedMtime && + !e.codexIndexSessionNameChanged(path) } // codexIndexNeedsRefreshSince reports whether a Codex session whose transcript @@ -5377,7 +5399,7 @@ func (e *Engine) classifyCodexIndexPath( } var candidates []parser.DiscoveredFile for _, root := range sessionRoots { - if src := parser.FindCodexSourceFile(root, uuid); src != "" { + if src := e.codexSourceFileForUUID(root, uuid); src != "" { candidates = append(candidates, parser.DiscoveredFile{ Path: src, Agent: parser.AgentCodex, @@ -5390,11 +5412,72 @@ func (e *Engine) classifyCodexIndexPath( // A UUID can exist in both sessions/ and archived_sessions/. // Prefer the path the DB already tracks so a title rename does // not reparse a stale duplicate over the stored copy. - out = append(out, pickPreferredCodexDiscoveredFile(e.db, candidates)) + chosen := pickPreferredCodexDiscoveredFile(e.db, candidates) + // Pin the provider source to the chosen path and route it through the + // provider so processProviderFile parses exactly this copy instead of + // re-canonicalizing the UUID to the preferred dated layout, which would + // undo the DB-aware selection above. + chosen.ProviderProcess = true + chosen.ProviderSource = e.codexPinnedProviderSource(chosen.Path) + out = append(out, chosen) } return out } +// codexSourceFileForUUID resolves a Codex session UUID to its on-disk +// transcript path under a single sessions root, preferring the live dated +// layout over a flat archived entry. It scopes a Codex provider to that one +// root so the provider's cross-root live-over-archived canonicalization does +// not collapse a per-root duplicate; classifyCodexIndexPath then applies its +// own DB-aware preference across the per-root candidates. Returns "" when the +// provider, source lookup, or path resolution fails. +func (e *Engine) codexSourceFileForUUID(root, uuid string) string { + factory, ok := e.providerFactories[parser.AgentCodex] + if !ok || factory == nil { + return "" + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: []string{root}, + Machine: e.machine, + }) + source, found, err := provider.FindSource( + context.Background(), + parser.FindSourceRequest{RawSessionID: uuid}, + ) + if err != nil || !found { + return "" + } + return providerDiscoveredPath(source) +} + +// codexPinnedProviderSource builds a Codex provider SourceRef pinned to the +// exact path, bypassing the provider's live-over-archived canonicalization. It +// is used when the engine's DB-aware or mtime-aware logic has already chosen +// which on-disk copy of a duplicated UUID to parse, so processProviderFile +// parses that copy instead of the provider's preferred dated layout. Returns +// nil when the Codex provider or the path's source shape is unavailable. +func (e *Engine) codexPinnedProviderSource(path string) *parser.SourceRef { + factory, ok := e.providerFactories[parser.AgentCodex] + if !ok || factory == nil { + return nil + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: e.agentDirs[parser.AgentCodex], + Machine: e.machine, + }) + pinner, ok := provider.(interface { + SourceRefForPath(string) (parser.SourceRef, bool) + }) + if !ok { + return nil + } + source, ok := pinner.SourceRefForPath(path) + if !ok { + return nil + } + return &source +} + // codexStoredNameDiffers reports whether the stored session_name for a Codex // session differs from the given index title. Unknown sessions return false: // a brand-new session is synced through its own transcript event, not the @@ -5460,6 +5543,54 @@ func pickPreferredCodexDiscoveredFile( return chosen } +// shouldSkipCodex is the legacy file_path + effective-mtime skip check used by +// the S3 Codex sync path (processCodex). Non-S3 Codex is provider-authoritative +// and uses shouldSkipCodexFingerprint; this remains for s3:// Codex sources, +// which the S3 sync path buffers to a temp file and feeds through processCodex. +func (e *Engine) shouldSkipCodex( + path string, info os.FileInfo, +) bool { + if e.forceParse { // parse-diff: always re-parse + return false + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) + if !ok || storedSize != info.Size() { + return false + } + if project, ok := e.db.GetProjectByPath(lookupPath); ok && + parser.NeedsProjectReparse(project) { + return false + } + if e.db.GetDataVersionByPath(lookupPath) < + db.CurrentDataVersion() { + return false + } + // A Codex title lives in session_index.jsonl, not the transcript, so a + // title-only rename can change the title with no transcript signal. Detect + // it directly rather than inferring it from an mtime inequality: the index + // mtime is folded into the stored watermark, so a later rename whose index + // mtime lands at or below that watermark is invisible to a mtime compare, + // and the old storedMtime==effectiveMtime fast path skipped without ever + // consulting the title. codexIndexSessionNameChanged reads the live title + // (cached per index file) and the stored name; a cheaper stored-name lookup + // to keep this fully off the hot skip path is a deferred follow-up. + if e.codexIndexSessionNameChanged(path) { + return false // title changed -> re-parse to refresh metadata + } + // Title verified unchanged: skip when the transcript itself is unchanged. + // Compare the bare file mtime, not the index-folded effective mtime -- the + // stored watermark may already include a folded index mtime, and a later + // bump of the shared session_index.jsonl (e.g. another session's rename) + // lifts every session's effective mtime; with the title confirmed + // unchanged, that rise must not force a needless reparse. + fileMtime := info.ModTime().UnixNano() + return fileMtime <= storedMtime +} + func (e *Engine) processCodex( file parser.DiscoveredFile, info os.FileInfo, ) processResult { @@ -5533,44 +5664,6 @@ func (e *Engine) processCodex( } } -func (e *Engine) processCopilot( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Use effective mtime = max(events.jsonl, workspace.yaml) so - // that a new or updated workspace.yaml triggers a re-parse and - // the stored mtime stays consistent with what we compare against - // on subsequent syncs (preventing oscillation). - effectiveMtime := copilotEffectiveMtime(file.Path, info) - if e.shouldSkipCopilot(file.Path, info, effectiveMtime) { - return processResult{skip: true} - } - - sess, msgs, usageEvents, err := parser.ParseCopilotSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - if effectiveMtime > sess.File.Mtime { - sess.File.Mtime = effectiveMtime - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs, UsageEvents: usageEvents}, - }, - } -} - // copilotEffectiveMtime returns max(events.jsonl mtime, // workspace.yaml mtime). For flat .jsonl sessions (no // workspace.yaml sibling) it returns the events.jsonl mtime. @@ -5723,170 +5816,6 @@ func reasonixEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -// shouldSkipCopilot is like shouldSkipByPath but uses the -// pre-computed effectiveMtime (max of events.jsonl and -// workspace.yaml) for the mtime comparison, keeping the stored -// value consistent with what we compare against on next sync. -func (e *Engine) shouldSkipCopilot( - path string, info os.FileInfo, effectiveMtime int64, -) bool { - if e.forceParse { // parse-diff: always re-parse - return false - } - lookupPath := path - if e.pathRewriter != nil { - lookupPath = e.pathRewriter(path) - } - storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) - if !ok { - return false - } - if storedSize != info.Size() || storedMtime != effectiveMtime { - return false - } - if e.db.GetDataVersionByPath(lookupPath) < - db.CurrentDataVersion() { - return false - } - return true -} - -func (e *Engine) processGemini( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Fast path: skip by file_path + mtime before parsing. - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseGeminiSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - -func (e *Engine) processVSCodeCopilot( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseVSCodeCopilotSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - { - Session: *sess, - Messages: msgs, - UsageEvents: sess.UsageEvents, - }, - }, - } -} - -func (e *Engine) processVisualStudioCopilot( - file parser.DiscoveredFile, _ os.FileInfo, -) processResult { - // Resolve the physical trace path first. Discovery emits one - // # work item per conversation; a watcher event - // or single-session resync may instead pass a real trace file, which can - // hold spans for several conversations. - tracePath := file.Path - var conversationIDs []string - if resolved, conversationID, ok := - parser.ParseVisualStudioCopilotVirtualPath(file.Path); ok { - tracePath = resolved - conversationIDs = []string{conversationID} - } - - // Skip on a fingerprint spanning every sibling trace file: a - // conversation's transcript is rebuilt from all of them, so a change to any - // sibling must defeat the skip even when the representative trace file is - // unchanged. The primary-file stat alone would let a single-session resync - // or watch fallback leave a session stale. - size, mtime, err := parser.VisualStudioCopilotTraceFingerprintStrict( - tracePath, - ) - if err != nil { - return processResult{err: err, noCacheSkip: true} - } - if e.shouldSkipByPath( - file.Path, fakeSnapshotInfo{fSize: size, fMtime: mtime}, - ) { - return processResult{skip: true} - } - - // A real trace file can hold spans for several conversations, so enumerate - // them and emit each independently. - if conversationIDs == nil { - ids, err := parser.VisualStudioCopilotFileConversationIDs(file.Path) - if err != nil { - return processResult{err: err, noCacheSkip: true} - } - conversationIDs = ids - } - - hash, hashErr := ComputeFileHash(tracePath) - - var results []parser.ParseResult - for _, conversationID := range conversationIDs { - sess, msgs, err := parser.ParseVisualStudioCopilotConversation( - tracePath, conversationID, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err, noCacheSkip: true} - } - if sess == nil { - continue - } - if hashErr == nil { - sess.File.Hash = hash - } - results = append(results, parser.ParseResult{ - Session: *sess, Messages: msgs, - }) - } - - // forceReplace mirrors the other multi-session-per-source agents - // (Zed, Kiro): each conversation's messages are fully re-derived from - // all of its spans on every parse, so existing rows must be replaced - // rather than appended. - return processResult{ - results: results, - forceReplace: true, - } -} - func (e *Engine) processZed( file parser.DiscoveredFile, info os.FileInfo, ) processResult { @@ -6188,35 +6117,6 @@ func vibeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -func (e *Engine) processPositron( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParsePositronSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - // aiderFileUnchanged reports whether a physical aider history file is // unchanged since the last sync. Aider sessions are stored under virtual // "#" paths, so the generic shouldSkipByPath (which looks the @@ -6384,6 +6284,8 @@ func (e *Engine) processAntigravityCLI( if sess == nil { return processResult{} } + sess.File.Size = effectiveInfo.Size() + sess.File.Mtime = effectiveInfo.ModTime().UnixNano() hash, err := ComputeFileHash(file.Path) if err == nil { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 3bf7bfb49..31c28450e 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -2128,6 +2128,150 @@ func TestSyncPathsGeminiJSONL(t *testing.T) { assertSessionMessageCount(t, env.db, "gemini:"+sessionID, 2) } +func TestSyncPathsGeminiProjectMetadataEventRefreshesProject(t *testing.T) { + env := setupTestEnv(t) + + sessionID := "gem-project-refresh" + projectsPath := filepath.Join(env.geminiDir, "projects.json") + writeProject := func(name string) { + t.Helper() + require.NoError(t, os.WriteFile( + projectsPath, + fmt.Appendf(nil, + `{"projects":{"/Users/alice/code/%s":"alias"}}`, + name, + ), + 0o644, + ), "write projects") + } + writeProject("one") + path := env.writeGeminiSession( + t, + filepath.Join( + "tmp", "alias", "chats", + "session-001.json", + ), + testjsonl.GeminiSessionJSON( + sessionID, "alias", tsEarly, tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg( + "m1", tsEarly, "Hello Gemini", + ), + testjsonl.GeminiAssistantMsg( + "m2", tsEarlyS5, "Hi there!", nil, + ), + }, + ), + ) + + env.engine.SyncPaths([]string{path}) + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + info, err := os.Stat(path) + require.NoError(t, err, "stat gemini session") + env.engine.InjectSkipCache(map[string]int64{ + path: info.ModTime().UnixNano(), + }) + + writeProject("two") + env.engine.SyncPaths([]string{projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) + + writeProject("three") + env.engine.SyncPaths([]string{path, projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "three", sess.Project) + }, + ) + + writeProject("four") + env.engine.SyncPaths([]string{projectsPath, path}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "four", sess.Project) + }, + ) + + require.NoError(t, os.Remove(projectsPath), "remove projects") + env.engine.SyncPaths([]string{projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "alias", sess.Project) + }, + ) +} + +// TestSyncAllGeminiProjectMetadataChangeReparsesProject guards that a scheduled +// SyncAll is not fooled by the pre-fingerprint fast skip when only Gemini's +// projects.json changed. The session transcript's own size and mtime are left +// untouched, so the removed AgentGemini fast skip (which compared just the +// session stat) would have kept the stale project; the composite fingerprint, +// which folds in projects.json, must drive a reparse on the periodic full sync. +func TestSyncAllGeminiProjectMetadataChangeReparsesProject(t *testing.T) { + env := setupTestEnv(t) + + sessionID := "gem-syncall-refresh" + projectsPath := filepath.Join(env.geminiDir, "projects.json") + writeProject := func(name string) { + t.Helper() + require.NoError(t, os.WriteFile( + projectsPath, + fmt.Appendf(nil, + `{"projects":{"/Users/alice/code/%s":"alias"}}`, + name, + ), + 0o644, + ), "write projects") + } + writeProject("one") + env.writeGeminiSession( + t, + filepath.Join("tmp", "alias", "chats", "session-001.json"), + testjsonl.GeminiSessionJSON( + sessionID, "alias", tsEarly, tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("m1", tsEarly, "Hello Gemini"), + testjsonl.GeminiAssistantMsg("m2", tsEarlyS5, "Hi there!", nil), + }, + ), + ) + + env.engine.SyncAll(context.Background(), nil) + assertSessionState(t, env.db, "gemini:"+sessionID, func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }) + + // Change only the project metadata, then advance its mtime so the composite + // fingerprint moves forward. The session transcript is left untouched, so a + // fast skip keyed on the transcript stat alone would keep the stale "one". + writeProject("two") + later := time.Now().Add(48 * time.Hour) + require.NoError(t, os.Chtimes(projectsPath, later, later), "bump projects mtime") + env.engine.SyncAll(context.Background(), nil) + + assertSessionState(t, env.db, "gemini:"+sessionID, func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }) +} + func TestSyncPathsCodexAcceptsFlatArchived(t *testing.T) { env := setupTestEnv(t) @@ -2663,6 +2807,104 @@ func TestSyncPathsCodexIndexEventRefreshesStoredDuplicate(t *testing.T) { assertSessionMessageCount(t, env.db, "codex:"+uuid, 2) } +func TestSyncPathsCodexArchivedDuplicateEventPinsChangedFile(t *testing.T) { + root := t.TempDir() + codexDir := filepath.Join(root, "sessions") + archivedDir := filepath.Join(root, "archived_sessions") + require.NoError(t, os.MkdirAll(codexDir, 0o755)) + require.NoError(t, os.MkdirAll(archivedDir, 0o755)) + env := setupTestEnv(t, WithCodexDirs([]string{codexDir, archivedDir})) + + uuid := "f7a8b9ca-7890-1234-ef01-456789012346" + staleLiveContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Stale live copy"). + String() + archivedContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Archived copy"). + String() + updatedArchivedContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Archived copy"). + AddCodexMessage(tsEarlyS5, "assistant", "Updated archived reply"). + String() + + livePath := env.writeCodexSession( + t, + filepath.Join("2026", "05", "04"), + "rollout-2026-05-04T02-10-04-"+uuid+".jsonl", + staleLiveContent, + ) + archivedPath := env.writeSession( + t, archivedDir, + "rollout-2026-05-04T14-31-58-"+uuid+".jsonl", + archivedContent, + ) + initialTime := time.Now().Add(-2 * time.Hour) + require.NoError(t, os.Chtimes(livePath, initialTime, initialTime), "chtimes live") + require.NoError(t, os.Chtimes(archivedPath, initialTime, initialTime), "chtimes archived") + + env.engine.SyncAll(context.Background(), nil) + assert.Equal(t, livePath, env.db.GetSessionFilePath("codex:"+uuid)) + + newTime := time.Now().Add(-30 * time.Minute) + require.NoError(t, os.WriteFile(archivedPath, []byte(updatedArchivedContent), 0o644)) + require.NoError(t, os.Chtimes(archivedPath, newTime, newTime), "chtimes archived update") + + env.engine.SyncPaths([]string{archivedPath}) + + assert.Equal(t, archivedPath, env.db.GetSessionFilePath("codex:"+uuid), + "archived transcript event must parse the changed file, not the stale live duplicate") + assertSessionMessageCount(t, env.db, "codex:"+uuid, 2) +} + +func TestSyncSingleSessionCodexPreservesStoredArchivedDuplicate(t *testing.T) { + root := t.TempDir() + codexDir := filepath.Join(root, "sessions") + archivedDir := filepath.Join(root, "archived_sessions") + require.NoError(t, os.MkdirAll(codexDir, 0o755)) + require.NoError(t, os.MkdirAll(archivedDir, 0o755)) + env := setupTestEnv(t, WithCodexDirs([]string{codexDir, archivedDir})) + + uuid := "f7a8b9ca-7890-1234-ef01-456789012347" + archivedContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Archived copy"). + AddCodexMessage(tsEarlyS5, "assistant", "Archived reply"). + String() + staleLiveContent := testjsonl.NewSessionBuilder(). + AddCodexMeta(tsEarly, uuid, "/home/user/code/api", "user"). + AddCodexMessage(tsEarlyS1, "user", "Stale live copy"). + String() + + archivedPath := env.writeSession( + t, archivedDir, + "rollout-2026-05-04T14-31-58-"+uuid+".jsonl", + archivedContent, + ) + initialTime := time.Now().Add(-2 * time.Hour) + require.NoError(t, os.Chtimes(archivedPath, initialTime, initialTime), "chtimes archived") + + env.engine.SyncAll(context.Background(), nil) + require.Equal(t, archivedPath, env.db.GetSessionFilePath("codex:"+uuid), + "DB must track the archived copy before a stale live duplicate appears") + + livePath := env.writeCodexSession( + t, + filepath.Join("2026", "05", "04"), + "rollout-2026-05-04T02-10-04-"+uuid+".jsonl", + staleLiveContent, + ) + require.NoError(t, os.Chtimes(livePath, initialTime, initialTime), "chtimes live") + + require.NoError(t, env.engine.SyncSingleSession("codex:"+uuid)) + + assert.Equal(t, archivedPath, env.db.GetSessionFilePath("codex:"+uuid), + "single-session resync must preserve the stored archived source") + assertSessionMessageCount(t, env.db, "codex:"+uuid, 2) +} + func TestSyncPathsGeminiRejectsWrongStructure(t *testing.T) { env := setupTestEnv(t) @@ -4555,6 +4797,8 @@ func TestSyncAllSinceOpenCodeStoragePicksUpUsagePartUpdate(t *testing.T) { require.NoError(t, os.Chtimes(usagePartPath, future, future), "chtimes usage part") require.NoError(t, os.Chtimes(sessionPath, sessionMtime, sessionMtime), "restore session mtime") + // Composite freshness includes the part file, so the part-only edit is + // fresh relative to the cutoff and re-syncs the updated reply. stats := env.engine.SyncAllSince(context.Background(), cutoff, nil) require.Equal(t, 1, stats.Synced, "SyncAllSince synced = %d, want 1", stats.Synced) @@ -5701,18 +5945,15 @@ func TestResyncAllReplacesMessageContent(t *testing.T) { }) require.NoError(t, err, "update message content") - // Normal SyncAll should skip (file unchanged on disk). - stats := env.engine.SyncAll(context.Background(), nil) - require.Equal(t, 1, stats.Skipped, "expected 1 skip, got %d", stats.Skipped) - msgs = fetchMessages(t, env.db, fullID) - require.True(t, strings.Contains(msgs[1].Content, "stale content"), "SyncAll should not have replaced content") - // Capture FTS state before resync so a regression that // breaks FTS isn't masked by HasFTS() returning false // post-resync. hadFTS := env.db.HasFTS() - // ResyncAll should re-parse and replace message content. + // ResyncAll should re-parse and replace message content. Gemini is + // provider-authoritative, so it has no DB-backed mtime skip; a plain + // SyncAll would also re-parse the unchanged file. ResyncAll additionally + // drops and rebuilds the FTS index, which is what this test guards. env.engine.ResyncAll(context.Background(), nil) msgs = fetchMessages(t, env.db, fullID) require.Equal(t, 2, len(msgs), "got %d messages after resync, want 2", len(msgs)) @@ -6929,6 +7170,402 @@ func TestSyncPathsVSCodeCopilotJSONLPriority(t *testing.T) { assert.Equal(t, 0, len(page.Sessions), "expected 0 sessions (.json skipped), got %d", len(page.Sessions)) } +func TestSyncPathsVSCodeCopilotWorkspaceMetadataRefreshesProject(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + vscDir := filepath.Join(dir, "vscode") + hashDir := filepath.Join(vscDir, "workspaceStorage", "abc123") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentVSCodeCopilot: {vscDir}, + }, + Machine: "local", + }) + + writeWorkspace := func(name string) { + t.Helper() + dbtest.WriteTestFile(t, workspacePath, fmt.Appendf(nil, + `{"folder":"file:///Users/alice/code/%s"}`, + name, + )) + } + + uuid := "bbbbbbbb-cccc-dddd-eeee-ffffffffffff" + session := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + + writeWorkspace("one") + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+session+`}`), + ) + + engine.SyncPaths([]string{jsonlPath}) + assertSessionState( + t, database, "vscode-copilot:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + info, err := os.Stat(jsonlPath) + require.NoError(t, err, "stat vscode copilot session") + engine.InjectSkipCache(map[string]int64{ + jsonlPath: info.ModTime().UnixNano(), + }) + + writeWorkspace("two") + engine.SyncPaths([]string{workspacePath}) + assertSessionState( + t, database, "vscode-copilot:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) + + writeWorkspace("three") + engine.SyncPaths([]string{jsonlPath, workspacePath}) + assertSessionState( + t, database, "vscode-copilot:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "three", sess.Project) + }, + ) +} + +func TestSyncPathsVSCodeCopilotPersistsUsageEvents(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + vscDir := filepath.Join(dir, "vscode") + chatDir := filepath.Join( + vscDir, "workspaceStorage", "abc123", + "chatSessions", + ) + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentVSCodeCopilot: {vscDir}, + }, + Machine: "local", + }) + + uuid := "cccccccc-dddd-eeee-ffff-000000000000" + session := fmt.Sprintf( + `{"version":3,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000,`+ + `"modelId":"copilot/claude-opus-4.8",`+ + `"result":{"metadata":{`+ + `"promptTokens":12,`+ + `"outputTokens":3,`+ + `"resolvedModel":"claude-opus-4-8"}}}]}`, + uuid, + ) + jsonPath := filepath.Join(chatDir, uuid+".json") + dbtest.WriteTestFile(t, jsonPath, []byte(session)) + + engine.SyncPaths([]string{jsonPath}) + + ctx := context.Background() + sessionID := "vscode-copilot:" + uuid + events, err := database.GetUsageEvents(ctx, sessionID) + require.NoError(t, err) + require.Len(t, events, 1) + assert.Equal(t, "vscode-copilot", events[0].Source) + assert.Equal(t, "claude-opus-4-8", events[0].Model) + assert.Equal(t, 12, events[0].InputTokens) + assert.Equal(t, 3, events[0].OutputTokens) + + require.NoError(t, engine.SyncSingleSession(sessionID)) + events, err = database.GetUsageEvents(ctx, sessionID) + require.NoError(t, err) + require.Len(t, events, 1) + assert.Equal(t, "claude-opus-4-8", events[0].Model) +} + +func TestSyncPathsPositronJSONLPriority(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + positronDir := filepath.Join(dir, "positron") + chatDir := filepath.Join( + positronDir, "workspaceStorage", "abc123", + "chatSessions", + ) + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentPositron: {positronDir}, + }, + Machine: "local", + }) + + uuid := "cccccccc-dddd-eeee-ffff-aaaaaaaaaaaa" + session := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + + jsonPath := filepath.Join(chatDir, uuid+".json") + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + dbtest.WriteTestFile(t, jsonPath, []byte(session)) + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+session+`}`), + ) + + engine.SyncPaths([]string{jsonPath}) + + page, err := database.ListSessions( + context.Background(), db.SessionFilter{Limit: 10}, + ) + require.NoError(t, err) + assert.Equal(t, 0, len(page.Sessions), "expected 0 sessions (.json skipped), got %d", len(page.Sessions)) +} + +func TestSyncAllPositronJSONLPriority(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + positronDir := filepath.Join(dir, "positron") + hashDir := filepath.Join(positronDir, "workspaceStorage", "abc123") + chatDir := filepath.Join(hashDir, "chatSessions") + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentPositron: {positronDir}, + }, + Machine: "local", + }) + + uuid := "cccccccc-dddd-eeee-ffff-bbbbbbbbbbbb" + jsonSession := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"json fallback"},`+ + `"response":[{"value":"json response"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + jsonlSession := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"jsonl preferred"},`+ + `"response":[{"value":"jsonl response"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + + jsonPath := filepath.Join(chatDir, uuid+".json") + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + dbtest.WriteTestFile(t, jsonPath, []byte(jsonSession)) + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+jsonlSession+`}`), + ) + + stats := engine.SyncAll(context.Background(), nil) + assert.Equal(t, 1, stats.Synced, "synced = %d, want 1", stats.Synced) + + sess, err := database.GetSession(context.Background(), "positron:"+uuid) + require.NoError(t, err) + require.NotNil(t, sess) + assertSessionMessageCount(t, database, "positron:"+uuid, 2) + msgs := fetchMessages(t, database, "positron:"+uuid) + require.NotEmpty(t, msgs) + assert.Equal(t, "jsonl preferred", msgs[0].Content) +} + +func TestSyncPathsPositronWorkspaceMetadataRefreshesProject(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + positronDir := filepath.Join(dir, "positron") + hashDir := filepath.Join(positronDir, "workspaceStorage", "abc123") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentPositron: {positronDir}, + }, + Machine: "local", + }) + + writeWorkspace := func(name string) { + t.Helper() + dbtest.WriteTestFile(t, workspacePath, fmt.Appendf(nil, + `{"folder":"file:///Users/alice/code/%s"}`, + name, + )) + } + + uuid := "dddddddd-eeee-ffff-aaaa-bbbbbbbbbbbb" + session := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + + writeWorkspace("one") + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+session+`}`), + ) + + engine.SyncPaths([]string{jsonlPath}) + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + info, err := os.Stat(jsonlPath) + require.NoError(t, err, "stat positron session") + engine.InjectSkipCache(map[string]int64{ + jsonlPath: info.ModTime().UnixNano(), + }) + + writeWorkspace("two") + engine.SyncPaths([]string{workspacePath}) + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) + + writeWorkspace("three") + engine.SyncPaths([]string{jsonlPath, workspacePath}) + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "three", sess.Project) + }, + ) +} + +func TestSyncAllSincePositronWorkspaceMetadataRefreshesProject(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + dir := t.TempDir() + positronDir := filepath.Join(dir, "positron") + hashDir := filepath.Join(positronDir, "workspaceStorage", "abc123") + chatDir := filepath.Join(hashDir, "chatSessions") + workspacePath := filepath.Join(hashDir, "workspace.json") + + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentPositron: {positronDir}, + }, + Machine: "local", + }) + + writeWorkspace := func(name string) { + t.Helper() + dbtest.WriteTestFile(t, workspacePath, fmt.Appendf(nil, + `{"folder":"file:///Users/alice/code/%s"}`, + name, + )) + } + + uuid := "dddddddd-eeee-ffff-aaaa-cccccccccccc" + session := fmt.Sprintf( + `{"version":1,"sessionId":"%s",`+ + `"creationDate":1704103200000,`+ + `"lastMessageDate":1704103260000,`+ + `"requests":[{"requestId":"r1",`+ + `"message":{"text":"hello"},`+ + `"response":[{"value":"hi"}],`+ + `"timestamp":1704103200000}]}`, + uuid, + ) + jsonlPath := filepath.Join(chatDir, uuid+".jsonl") + + writeWorkspace("one") + dbtest.WriteTestFile( + t, jsonlPath, + []byte(`{"kind":0,"v":`+session+`}`), + ) + + engine.SyncAll(context.Background(), nil) + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + oldTime := time.Now().Add(-48 * time.Hour) + require.NoError(t, os.Chtimes(jsonlPath, oldTime, oldTime), "chtimes session") + require.NoError(t, os.Chtimes(workspacePath, oldTime, oldTime), "chtimes workspace") + cutoff := time.Now().Add(-1 * time.Hour) + + writeWorkspace("two") + stats := engine.SyncAllSince(context.Background(), cutoff, nil) + assert.Equal(t, 1, stats.Synced, "synced = %d, want 1", stats.Synced) + + assertSessionState( + t, database, "positron:"+uuid, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) +} + func TestPiSessionIntegration(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") @@ -7726,7 +8363,7 @@ func TestIncrementalSync_CodexStoresEffectiveMtime(t *testing.T) { "effective mtime exceeds the plain rollout mtime") } -func TestIncrementalSync_CodexHashMatchesConsumedPrefix(t *testing.T) { +func TestIncrementalSync_CodexAppendFullReparseStoresRawFileSize(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") } @@ -7768,13 +8405,17 @@ func TestIncrementalSync_CodexHashMatchesConsumedPrefix(t *testing.T) { live, err := os.ReadFile(path) require.NoError(t, err, "read live transcript") - require.Less(t, *sess.FileSize, int64(len(live)), - "partial trailing JSON should remain outside the consumed prefix") - prefix := live[:*sess.FileSize] - sum := sha256.Sum256(prefix) + // Codex does not advertise incremental append, so re-syncing the appended + // transcript is a full re-parse that stores the raw file size and hash + // (including the ignored partial trailing line). The parsed-snapshot vs + // partial-tail distinction is enforced at parse-diff time via + // CodexTranscriptConsumedSize, not in the stored fingerprint. + require.Equal(t, int64(len(live)), *sess.FileSize, + "full Codex re-parse stores the raw file size") + sum := sha256.Sum256(live) wantHash := fmt.Sprintf("%x", sum[:]) assert.Equal(t, wantHash, *sess.FileHash, - "incremental Codex hash must match the consumed file_size prefix") + "stored Codex hash matches the whole-file fingerprint") } func TestIncrementalSync_CodexExecAppendRetainsEvents(t *testing.T) { diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index c50a01440..923ba7ef9 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -1420,8 +1420,10 @@ func TestShouldSkipCodexReparsesStaleProject(t *testing.T) { }, } - assert.False(t, e.shouldSkipCodex(path, info), - "stale generated roborev CI projects must be reparsed") + assert.False(t, e.shouldSkipCodexFingerprint(path, parser.SourceFingerprint{ + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }), "stale generated roborev CI projects must be reparsed") } func TestProcessFileSkipCacheReparsesStaleCodexProject(t *testing.T) { @@ -1459,6 +1461,13 @@ func TestProcessFileSkipCacheReparsesStaleCodexProject(t *testing.T) { db: database, idPrefix: "host~", skipCache: map[string]int64{path: info.ModTime().UnixNano()}, + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, pathRewriter: func(path string) string { return "host:" + path }, @@ -1475,6 +1484,156 @@ func TestProcessFileSkipCacheReparsesStaleCodexProject(t *testing.T) { assert.Equal(t, "agentsview", res.results[0].Session.Project) } +func TestProcessFileSkipCacheReparsesStaleCodexDataVersion(t *testing.T) { + database := openTestDB(t) + root := t.TempDir() + path := filepath.Join(root, "rollout-2026-06-21T18-59-38-abc.jsonl") + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + "abc", + "/home/user/code/agentsview", + "user", + "2024-01-01T10:00:00Z", + ), + testjsonl.CodexMsgJSON("user", "review this", "2024-01-01T10:00:01Z"), + ) + require.NoError(t, os.WriteFile(path, []byte(content), 0o600)) + info, err := os.Stat(path) + require.NoError(t, err, "stat codex fixture") + + sess := db.Session{ + ID: "host~codex:abc", + Project: "agentsview", + Machine: "host", + Agent: "codex", + FilePath: strPtr("host:" + path), + FileSize: int64Ptr(info.Size()), + FileMtime: int64Ptr(info.ModTime().UnixNano()), + } + require.NoError(t, database.UpsertSession(sess)) + require.NoError(t, database.SetSessionDataVersion( + sess.ID, db.CurrentDataVersion()-1, + )) + + e := &Engine{ + db: database, + idPrefix: "host~", + skipCache: map[string]int64{path: info.ModTime().UnixNano()}, + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, + pathRewriter: func(path string) string { + return "host:" + path + }, + } + + res := e.processFile(context.Background(), parser.DiscoveredFile{ + Agent: parser.AgentCodex, + Path: path, + }) + require.NoError(t, res.err) + require.False(t, res.skip, + "skip cache must not hide stale parser data versions") + require.Len(t, res.results, 1) +} + +func TestProcessFileCodexDBFreshSkipIsNotCached(t *testing.T) { + database := openTestDB(t) + root := t.TempDir() + path := filepath.Join(root, "rollout-2026-06-21T18-59-38-abc.jsonl") + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + "abc", + "/home/user/code/agentsview", + "user", + "2024-01-01T10:00:00Z", + ), + testjsonl.CodexMsgJSON("user", "review this", "2024-01-01T10:00:01Z"), + ) + require.NoError(t, os.WriteFile(path, []byte(content), 0o600)) + info, err := os.Stat(path) + require.NoError(t, err, "stat codex fixture") + + sess := db.Session{ + ID: "host~codex:abc", + Project: "agentsview", + Machine: "host", + Agent: "codex", + FilePath: strPtr("host:" + path), + FileSize: int64Ptr(info.Size()), + FileMtime: int64Ptr(info.ModTime().UnixNano()), + } + require.NoError(t, database.UpsertSession(sess)) + require.NoError(t, database.SetSessionDataVersion( + sess.ID, db.CurrentDataVersion(), + )) + + e := &Engine{ + db: database, + idPrefix: "host~", + skipCache: map[string]int64{}, + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, + pathRewriter: func(path string) string { + return "host:" + path + }, + } + + res := e.processFile(context.Background(), parser.DiscoveredFile{ + Agent: parser.AgentCodex, + Path: path, + }) + require.NoError(t, res.err) + require.True(t, res.skip) + assert.True(t, res.noCacheSkip) + assert.Empty(t, e.SnapshotSkipCache()) +} + +func TestClassifyCodexIndexPathSkipsMissingTranscript(t *testing.T) { + database := openTestDB(t) + root := t.TempDir() + codexDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(codexDir, 0o755)) + indexPath := filepath.Join(root, parser.CodexSessionIndexFilename) + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e7" + missingPath := filepath.Join( + codexDir, + "2026", "06", "11", + "rollout-2026-06-11T12-44-06-"+uuid+".jsonl", + ) + require.NoError(t, database.UpsertSession(db.Session{ + ID: "codex:" + uuid, + Project: "agentsview", + Machine: "local", + Agent: string(parser.AgentCodex), + SessionName: strPtr("Old title"), + FilePath: &missingPath, + })) + require.NoError(t, os.WriteFile(indexPath, []byte( + `{"id":"`+uuid+`","thread_name":"New title",`+ + `"updated_at":"2026-06-11T17:34:20Z"}`+"\n", + ), 0o644)) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {codexDir}, + }, + Machine: "local", + }) + + files := engine.classifyCodexIndexPath(indexPath) + + assert.Empty(t, files) +} + func TestProcessCodexAppendedStaleProjectDoesFullReparse(t *testing.T) { database := openTestDB(t) root := t.TempDir() @@ -1525,21 +1684,26 @@ func TestProcessCodexAppendedStaleProjectDoesFullReparse(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") - info, err = os.Stat(path) - require.NoError(t, err, "stat appended codex fixture") e := &Engine{ db: database, idPrefix: "host~", + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, pathRewriter: func(path string) string { return "host:" + path }, } - res := e.processCodex(parser.DiscoveredFile{ + res := e.processFile(context.Background(), parser.DiscoveredFile{ Agent: parser.AgentCodex, Path: path, - }, info) + }) require.NoError(t, res.err) require.Nil(t, res.incremental, "stale project metadata must force full parse even when file appended") @@ -1615,21 +1779,26 @@ func TestProcessCodexAppendedStaleProjectCarriesForceReplace(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") - info, err = os.Stat(path) - require.NoError(t, err, "stat appended codex fixture") e := &Engine{ db: database, idPrefix: "host~", + agentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, pathRewriter: func(path string) string { return "host:" + path }, } - res := e.processCodex(parser.DiscoveredFile{ + res := e.processFile(context.Background(), parser.DiscoveredFile{ Agent: parser.AgentCodex, Path: path, - }, info) + }) require.NoError(t, res.err) require.Nil(t, res.incremental, "stale project metadata must force full parse even when file appended") @@ -3356,7 +3525,7 @@ func TestEngine_ClassifyOnePathReasonixProjectBareMeta(t *testing.T) { dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) dbtest.WriteTestFile(t, metaPath, []byte(`{"model":"claude"}`)) - got, ok := engine.classifyOnePath(metaPath, nil) + got, ok := engine.classifyOnePath(metaPath) require.True(t, ok, "expected Reasonix sidecar to classify") assert.Equal(t, sessionPath, got.Path) assert.Equal(t, "proj", got.Project) @@ -3379,7 +3548,7 @@ func TestEngine_ClassifyOnePathReasonixDeletedMeta(t *testing.T) { metaPath := sessionPath + ".meta" dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) - got, ok := engine.classifyOnePath(metaPath, nil) + got, ok := engine.classifyOnePath(metaPath) require.True(t, ok, "expected deleted Reasonix sidecar to classify") assert.Equal(t, sessionPath, got.Path) assert.Equal(t, "proj", got.Project) @@ -3400,7 +3569,7 @@ func TestEngine_ClassifyOnePathReasonixDeletedTranscriptIgnored(t *testing.T) { reasonixDir, "projects", "proj", "sessions", "session-123.jsonl", ) - _, ok := engine.classifyOnePath(sessionPath, nil) + _, ok := engine.classifyOnePath(sessionPath) assert.False(t, ok, "expected deleted Reasonix transcript to be ignored") } @@ -4187,3 +4356,55 @@ func TestShouldSkipCodexTitleRenameBelowStoredMtimeDoesNotSkip(t *testing.T) { assert.False(t, f.e.shouldSkipCodex(f.path, f.info), "title-only rename at or below stored watermark must not skip") } + +func TestEngine_ClassifyPathsProviderRemoveSkipsMissingGeminiSource( + t *testing.T, +) { + db := openTestDB(t) + geminiDir := t.TempDir() + engine := NewEngine(db, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {geminiDir}, + }, + Machine: "local", + }) + + sessionPath := filepath.Join( + geminiDir, "tmp", "alias", "chats", "session-001.json", + ) + dbtest.WriteTestFile(t, sessionPath, []byte("{}")) + require.NoError(t, os.Remove(sessionPath), "Remove(%q)", sessionPath) + + files := engine.classifyPaths([]string{sessionPath}) + assert.Empty(t, files) +} + +func TestEngine_ClassifyPathsProviderSidecarKeepsExistingGeminiSources( + t *testing.T, +) { + db := openTestDB(t) + geminiDir := t.TempDir() + engine := NewEngine(db, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {geminiDir}, + }, + Machine: "local", + }) + + projectsPath := filepath.Join(geminiDir, "projects.json") + dbtest.WriteTestFile( + t, + projectsPath, + []byte(`{"projects":{"/Users/alice/code/sample":"alias"}}`), + ) + sessionPath := filepath.Join( + geminiDir, "tmp", "alias", "chats", "session-001.json", + ) + dbtest.WriteTestFile(t, sessionPath, []byte("{}")) + + files := engine.classifyPaths([]string{projectsPath}) + require.Len(t, files, 1) + assert.Equal(t, sessionPath, files[0].Path) + assert.Equal(t, parser.AgentGemini, files[0].Agent) + assert.True(t, files[0].ForceParse) +} diff --git a/internal/sync/parsediff.go b/internal/sync/parsediff.go index 39577484c..92f950882 100644 --- a/internal/sync/parsediff.go +++ b/internal/sync/parsediff.go @@ -3,7 +3,6 @@ package sync import ( "context" "fmt" - "log" "os" "path/filepath" "sort" @@ -80,7 +79,11 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi } continue } - files = append(files, e.parseDiffProviderSources(ctx, def.Type)...) + providerFiles, err := e.parseDiffProviderSources(ctx, def.Type) + if err != nil { + return nil, err + } + files = append(files, providerFiles...) } // DiscoverFunc does not emit the shared-SQLite source for Kiro // (data.sqlite3) or db-mode OpenCode (opencode.db) — normal sync @@ -215,14 +218,14 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi func (e *Engine) parseDiffProviderSources( ctx context.Context, agentType parser.AgentType, -) []parser.DiscoveredFile { +) ([]parser.DiscoveredFile, error) { factory, ok := e.providerFactories[agentType] if !ok || factory == nil { - return nil + return nil, nil } roots := e.agentDirs[agentType] if len(roots) == 0 { - return nil + return nil, nil } provider := factory.NewProvider(parser.ProviderConfig{ Roots: roots, @@ -230,8 +233,10 @@ func (e *Engine) parseDiffProviderSources( }) sources, err := provider.Discover(ctx) if err != nil { - log.Printf("parse-diff %s provider discovery: %v", agentType, err) - return nil + return nil, fmt.Errorf( + "parse-diff %s provider discovery: %w", + agentType, err, + ) } def := provider.Definition() var files []parser.DiscoveredFile @@ -253,7 +258,7 @@ func (e *Engine) parseDiffProviderSources( ProviderProcess: true, }) } - return files + return files, nil } func (e *Engine) parseDiffAgentDiscoverable(def parser.AgentDef) bool { @@ -440,7 +445,7 @@ func isOpenCodeFamilyProviderVirtualSource(path string) bool { // shared trace file, and the "#runIdx" suffix aider appends to its shared // history file. func stripVirtualSourceSuffix(path string) string { - if tracePath, _, ok := parser.ParseVisualStudioCopilotVirtualPath(path); ok { + if tracePath, _, ok := parser.SplitVisualStudioCopilotVirtualPath(path); ok { return tracePath } if historyPath, _, ok := parser.ParseAiderVirtualPath(path); ok { diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index a389eac76..0090f76b7 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -349,6 +349,42 @@ func TestProviderVirtualSourceBackedByEventPreservesHashInDBPath(t *testing.T) { assert.False(t, providerVirtualSourceBackedByEvent(sourcePath, filepath.Dir(dbPath))) } +func TestParseDiffProviderDiscoveryErrorFails(t *testing.T) { + root := t.TempDir() + discoverErr := errors.New("discover failed") + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + }, + }, + }, + discoverErr: discoverErr, + } + engine := NewDiffEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCodex: {root}, + }, + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + report, err := engine.ParseDiff(context.Background(), ParseDiffOptions{ + Agents: []parser.AgentType{parser.AgentCodex}, + }) + + require.Error(t, err) + assert.Nil(t, report) + assert.ErrorContains(t, err, "parse-diff codex provider discovery") + assert.ErrorIs(t, err, discoverErr) +} + func TestProcessFileShadowRecordsCachedSkipAsNotComparable(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-skip.jsonl") @@ -667,6 +703,56 @@ func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *te assert.True(t, provider.parseRequest.ForceParse) } +func TestProcessFileProviderAuthoritativeSkipsFreshCopilotBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join( + root, "session-state", "copilot-fresh", "events.jsonl", + ) + workspacePath := filepath.Join(filepath.Dir(sourcePath), "workspace.yaml") + sourceMtime := writeFreshProviderDBSession( + t, sourcePath, &workspacePath, + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCopilot, + DisplayName: "Copilot CLI", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCopilot, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCopilot: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCopilot: parser.ProviderMigrationProviderAuthoritative, + }, + }) + requireFreshProviderSession(t, engine.db, parser.AgentCopilot, sourcePath, sourceMtime) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCopilot, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") @@ -1257,3 +1343,52 @@ func writeFreshCoworkProviderSource( return sourcePath, sourceMtime } + +func writeFreshProviderDBSession( + t *testing.T, + sourcePath string, + mtimeSidecar *string, +) int64 { + t.Helper() + + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + sourceTime := time.Unix(1_781_475_210, 0) + require.NoError(t, os.Chtimes(sourcePath, sourceTime, sourceTime)) + mtime := sourceTime.UnixNano() + if mtimeSidecar != nil { + sidecarTime := sourceTime.Add(time.Second) + require.NoError(t, os.WriteFile(*mtimeSidecar, []byte("name: Fresh\n"), 0o644)) + require.NoError(t, os.Chtimes(*mtimeSidecar, sidecarTime, sidecarTime)) + mtime = sidecarTime.UnixNano() + } + + return mtime +} + +func requireFreshProviderSession( + t *testing.T, + database *db.DB, + agent parser.AgentType, + sourcePath string, + sourceMtime int64, +) { + t.Helper() + + info, err := os.Stat(sourcePath) + require.NoError(t, err) + sourceSize := info.Size() + fullSessionID := string(agent) + ":fresh" + require.NoError(t, database.UpsertSession(db.Session{ + ID: fullSessionID, + Project: "provider-project", + Machine: "devbox", + Agent: string(agent), + FilePath: &sourcePath, + FileSize: &sourceSize, + FileMtime: &sourceMtime, + })) + require.NoError(t, database.SetSessionDataVersion( + fullSessionID, db.CurrentDataVersion(), + )) +} diff --git a/internal/sync/provider_shadow_codex_test.go b/internal/sync/provider_shadow_codex_test.go new file mode 100644 index 000000000..0ef157e65 --- /dev/null +++ b/internal/sync/provider_shadow_codex_test.go @@ -0,0 +1,83 @@ +package sync + +import ( + "context" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/testjsonl" +) + +// TestObserveProviderSourceParsesCodexSourceWithIndexTitle exercises the folded +// Codex provider end to end through ObserveProviderSource. The legacy +// ParseCodexSession entrypoint was deleted in the fold, so this replaces the +// shadow-baseline comparison with provider-API coverage that pins the parsed +// session shape: discovery finds the dated transcript, the sibling +// session_index.jsonl supplies the thread title as session_name, and the +// observed parse output and data-version planning match the source. +func TestObserveProviderSourceParsesCodexSourceWithIndexTitle(t *testing.T) { + base := t.TempDir() + root := filepath.Join(base, "sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c12abcd" + sourcePath := filepath.Join( + root, + "2026", + "06", + "11", + "rollout-2026-06-11T12-44-06-"+uuid+".jsonl", + ) + writeProviderShadowSourceFile( + t, + sourcePath, + testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + uuid, + "/home/user/code/api", + "codex_cli_rs", + "2026-06-11T12:44:06Z", + ), + testjsonl.CodexMsgJSON("user", "provider question", "2026-06-11T12:44:07Z"), + ), + ) + writeProviderShadowSourceFile( + t, + filepath.Join(base, parser.CodexSessionIndexFilename), + `{"id":"`+uuid+`","thread_name":"Provider title","updated_at":"2026-06-11T17:34:20Z"}`+"\n", + ) + + provider, ok := parser.NewProvider(parser.AgentCodex, parser.ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: sources[0], + Machine: "devbox", + }) + require.NoError(t, err) + require.Len(t, observation.Results, 1) + + session := observation.Results[0].Session + assert.Equal(t, "codex:"+uuid, session.ID) + assert.Equal(t, parser.AgentCodex, session.Agent) + assert.Equal(t, "devbox", session.Machine) + assert.Equal(t, "/home/user/code/api", session.Cwd) + assert.Equal(t, "Provider title", session.SessionName) + assert.Equal(t, "provider question", session.FirstMessage) + assert.Equal(t, sourcePath, session.File.Path) + assert.Equal(t, observation.Fingerprint.Hash, session.File.Hash) + + require.Len(t, observation.Results[0].Messages, 1) + assert.Equal(t, parser.RoleUser, observation.Results[0].Messages[0].Role) + + assert.Equal(t, []string{session.ID}, observation.Planned.DataVersionSessionIDs()) + assert.Empty(t, observation.Planned.Diagnostics) +} diff --git a/internal/sync/provider_shadow_support_test.go b/internal/sync/provider_shadow_support_test.go new file mode 100644 index 000000000..03714305b --- /dev/null +++ b/internal/sync/provider_shadow_support_test.go @@ -0,0 +1,19 @@ +package sync + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +// writeProviderShadowSourceFile writes a provider source fixture, creating the +// parent directory. It is the shared helper for the per-provider shadow/parse +// tests (the Codex fold is the lowest caller; later provider folds reuse it). +func writeProviderShadowSourceFile(t *testing.T, path, content string) { + t.Helper() + + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) +} diff --git a/internal/sync/s3_container_test.go b/internal/sync/s3_container_test.go new file mode 100644 index 000000000..ebfed5c70 --- /dev/null +++ b/internal/sync/s3_container_test.go @@ -0,0 +1,181 @@ +//go:build s3test + +// Package sync's S3 container integration test. It exercises the real S3 +// discovery+sync path -- listS3Objects -> s3Client -> a live S3-compatible +// object store -- rather than stubbing the seam, so a regression that silently +// drops s3:// discovery (as the provider migration once did) cannot pass. +// +// Gated behind the s3test build tag and Docker, mirroring the pgtest setup. Run +// with: +// +// CGO_ENABLED=1 go test -tags "fts5,s3test" ./internal/sync/ -run TestS3 -v +// +// The container image is rustfs, an actively maintained S3-compatible object +// store; any S3-compatible image works by swapping s3ContainerImage and its +// credential env vars. +package sync + +import ( + "bytes" + "context" + "fmt" + "testing" + "time" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" + + "go.kenn.io/agentsview/internal/db" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/testjsonl" +) + +const ( + s3ContainerImage = "rustfs/rustfs:latest" + s3TestAccessKey = "rustfsadmin" + s3TestSecretKey = "rustfsadmin" + s3TestBucket = "agentsview" + // s3TestMachine is the machine segment of the //raw/ + // layout; discovery derives the session machine namespace from it. + s3TestMachine = "laptop" +) + +// startS3Container boots a throwaway S3-compatible object store and returns its +// host:port endpoint. The container is terminated on test cleanup. +func startS3Container(ctx context.Context, t *testing.T) string { + t.Helper() + req := testcontainers.ContainerRequest{ + Image: s3ContainerImage, + ExposedPorts: []string{"9000/tcp"}, + Env: map[string]string{ + "RUSTFS_ACCESS_KEY": s3TestAccessKey, + "RUSTFS_SECRET_KEY": s3TestSecretKey, + }, + // rustfs serves the S3 API on :9000 and answers /health with 200 once + // the object store is ready; the image's default entrypoint starts the + // server, so no Cmd override is needed. + WaitingFor: wait.ForHTTP("/health"). + WithPort("9000/tcp"). + WithStartupTimeout(90 * time.Second), + } + container, err := testcontainers.GenericContainer( + ctx, testcontainers.GenericContainerRequest{ + ContainerRequest: req, + Started: true, + }, + ) + require.NoError(t, err, "start S3 container") + t.Cleanup(func() { + // Use a fresh context so cleanup runs even if the test ctx is done. + _ = container.Terminate(context.Background()) + }) + + host, err := container.Host(ctx) + require.NoError(t, err, "container host") + port, err := container.MappedPort(ctx, "9000") + require.NoError(t, err, "container mapped port") + return fmt.Sprintf("%s:%s", host, port.Port()) +} + +// putS3Object uploads body to s3:/// using a direct client. +func putS3Object( + ctx context.Context, t *testing.T, cl *minio.Client, key, body string, +) { + t.Helper() + _, err := cl.PutObject( + ctx, s3TestBucket, key, + bytes.NewReader([]byte(body)), int64(len(body)), + minio.PutObjectOptions{ContentType: "application/jsonl"}, + ) + require.NoError(t, err, "put object %s", key) +} + +// TestS3DiscoverySyncAgainstContainer uploads a Claude session and a Codex +// rollout to a live S3-compatible store, points the production s3Client at it +// via the standard AWS_* env vars, and runs a full provider-authoritative +// SyncAll against s3:// roots. It asserts both remote sessions are discovered, +// fetched, parsed, and persisted machine-namespaced under the s3 root's machine +// segment -- the end-to-end path that has no other real-store coverage. +func TestS3DiscoverySyncAgainstContainer(t *testing.T) { + ctx := context.Background() + endpoint := startS3Container(ctx, t) + + // Point the production s3Client (env-driven) at the container. The endpoint + // is loopback (127.0.0.1), so http is allowed without the insecure override. + t.Setenv("AWS_S3_ENDPOINT", "http://"+endpoint) + t.Setenv("AWS_ACCESS_KEY_ID", s3TestAccessKey) + t.Setenv("AWS_SECRET_ACCESS_KEY", s3TestSecretKey) + t.Setenv("AWS_REGION", "us-east-1") + + uploadClient, err := minio.New(endpoint, &minio.Options{ + Creds: credentials.NewStaticV4(s3TestAccessKey, s3TestSecretKey, ""), + Secure: false, + }) + require.NoError(t, err, "build upload client") + require.NoError(t, uploadClient.MakeBucket( + ctx, s3TestBucket, minio.MakeBucketOptions{}, + ), "make bucket") + + const ( + claudeID = "11111111-1111-4111-8111-111111111111" + codexID = "22222222-2222-4222-8222-222222222222" + ) + claudeKey := fmt.Sprintf( + "%s/raw/claude/myproj/%s.jsonl", s3TestMachine, claudeID, + ) + claudeBody := testjsonl.NewSessionBuilder(). + AddClaudeUser("2024-01-01T00:00:00Z", "hello from claude s3"). + AddClaudeAssistant("2024-01-01T00:00:05Z", "hi there"). + String() + putS3Object(ctx, t, uploadClient, claudeKey, claudeBody) + + codexKey := fmt.Sprintf( + "%s/raw/codex/2026/06/24/rollout-2026-06-24T00-00-00-%s.jsonl", + s3TestMachine, codexID, + ) + codexBody := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON( + codexID, "/home/coder/project", "user", "2026-06-24T00:00:00Z", + ), + testjsonl.CodexMsgJSON("user", "review this from s3", "2026-06-24T00:00:01Z"), + ) + putS3Object(ctx, t, uploadClient, codexKey, codexBody) + + database := openTestDB(t) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {"s3://" + s3TestBucket + "/" + s3TestMachine + "/raw/claude"}, + parser.AgentCodex: {"s3://" + s3TestBucket + "/" + s3TestMachine + "/raw/codex"}, + }, + Machine: "central", + }) + + stats := engine.SyncAll(ctx, nil) + require.GreaterOrEqual(t, stats.Synced, 2, + "both s3 sessions discovered and synced") + + // The Claude session ID is the object's filename stem, namespaced by the s3 + // root's machine ("laptop"), not the host machine ("central"). + claudeSess, err := database.GetSessionFull(ctx, s3TestMachine+"~"+claudeID) + require.NoError(t, err) + require.NotNil(t, claudeSess, "claude s3 session persisted") + assert.Equal(t, s3TestMachine, claudeSess.Machine) + assert.Equal(t, "s3://"+s3TestBucket+"/"+claudeKey, derefString(claudeSess.FilePath)) + + // Codex namespaces its ID differently (machine~codex:); assert via the + // persisted set so the exact format is not hard-coded. + page, err := database.ListSessions(ctx, db.SessionFilter{Limit: 100}) + require.NoError(t, err) + agents := map[string]bool{} + for _, s := range page.Sessions { + assert.Equal(t, s3TestMachine, s.Machine, + "every synced s3 session is namespaced under the s3 root machine") + agents[s.Agent] = true + } + assert.True(t, agents["claude"], "claude s3 session discovered") + assert.True(t, agents["codex"], "codex s3 session discovered") +} diff --git a/internal/sync/visualstudio_copilot_integration_test.go b/internal/sync/visualstudio_copilot_integration_test.go index c2b3113ed..bb3fc545d 100644 --- a/internal/sync/visualstudio_copilot_integration_test.go +++ b/internal/sync/visualstudio_copilot_integration_test.go @@ -246,8 +246,8 @@ func TestFindSourceFileVisualStudioCopilotReturnsVirtualPath(t *testing.T) { } // TestSyncSingleSessionContextVisualStudioCopilotPreservesProject verifies that -// a single-session re-sync keeps the session's visualstudio project rather than -// overwriting it with an empty string. +// a single-session re-sync keeps the stored project rather than overwriting it +// with the provider's default project. func TestSyncSingleSessionContextVisualStudioCopilotPreservesProject(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") @@ -271,6 +271,8 @@ func TestSyncSingleSessionContextVisualStudioCopilotPreservesProject(t *testing. require.NoError(t, err) require.NotNil(t, before) require.Equal(t, "visualstudio", before.Project) + before.Project = "stored-solution" + require.NoError(t, database.UpsertSession(*before)) require.NoError(t, engine.SyncSingleSessionContext( context.Background(), sessionID, @@ -279,8 +281,8 @@ func TestSyncSingleSessionContextVisualStudioCopilotPreservesProject(t *testing. after, err := database.GetSession(context.Background(), sessionID) require.NoError(t, err) require.NotNil(t, after) - assert.Equal(t, "visualstudio", after.Project, - "single-session re-sync must preserve the visualstudio project") + assert.Equal(t, "stored-solution", after.Project, + "single-session re-sync must preserve the stored project") } // TestSyncEngineVisualStudioCopilotUnreadableSiblingBlocksPartialSession