From 3c34f7a54b7661b1e0356879e7597f9934b3c9a3 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Mon, 22 Jun 2026 22:08:17 -0400 Subject: [PATCH 01/11] feat(parser): migrate openhands provider OpenHands stores each conversation as a directory with metadata and event files, so the provider needs a directory source facade rather than a JSONL file wrapper. This keeps the legacy discovery and dashed/undashed ID lookup behavior while making the composite snapshot fingerprint explicit at the provider boundary. The provider uses the existing OpenHands parser and snapshot helpers so freshness, shallow watch planning, changed-path classification, and normalized parse output stay aligned with the legacy sync path. test(parser): opt openhands into provider shadow OpenHands now has a concrete facade provider on this branch, so its migration mode should enter shadow comparison instead of remaining legacy-only and additive. Earlier provider opt-ins stay inherited and later provider branches own their modes. Validation: go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare openhands shadow parity OpenHands is shadow-compared on this branch, so add source-level migration coverage that compares provider observation with ParseOpenHandsSession. The test uses the directory snapshot source shape so the provider fingerprint path and planned data-version behavior stay visible while the branch migrates away from legacy dispatch. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesOpenHandsLegacyParser|TestOpenHandsProvider|TestParseOpenHands|TestDiscoverAndFindOpenHands|TestClassifyOnePath_OpenHands|TestProcessFileOpenHandsUsesSnapshotMtimeForRetryCache' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... refactor(parser): fold openhands into provider Move OpenHands discovery, source lookup, and parse ownership onto the concrete provider and delete the package-level DiscoverOpenHandsSessions, FindOpenHandsSourceFile, and ParseOpenHandsSession free functions. Discovery now walks conversation roots directly in the provider source set, raw-session-ID lookup folds the literal/dash-stripped/normalized matching into sessionDirForID, and parsing runs on a provider receiver method. The provider-neutral snapshot, session-dir predicate, and event parse helpers stay as shared free functions. Make OpenHands provider-authoritative and remove its legacy sync dispatch: the classifyOnePath block, the processFile case arm, the OpenHands snapshot-mtime branch, and processOpenHands are gone. Sync now classifies and processes OpenHands through provider changed-path handling, which preserves the base_state.json/TASKS.json/events companion remap to the session directory and keeps the snapshot mtime driving the skip-retry cache via the provider fingerprint. Drop the OpenHands AgentDef DiscoverFunc/FindSourceFunc hooks, remove the shadow baseline test, exempt the provider file from the shim scan, and add a guard asserting the legacy entrypoints stay deleted. --- internal/parser/openhands.go | 83 +---- internal/parser/openhands_provider.go | 414 +++++++++++++++++++++ internal/parser/openhands_provider_test.go | 209 +++++++++++ internal/parser/openhands_test.go | 59 ++- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/provider_shim_scan_test.go | 1 + internal/parser/types.go | 18 +- internal/sync/classify_openhands_test.go | 19 +- internal/sync/engine.go | 73 ---- internal/sync/openhands_retry_test.go | 11 +- 11 files changed, 704 insertions(+), 187 deletions(-) create mode 100644 internal/parser/openhands_provider.go create mode 100644 internal/parser/openhands_provider_test.go diff --git a/internal/parser/openhands.go b/internal/parser/openhands.go index 0945e015b..c2644ad6c 100644 --- a/internal/parser/openhands.go +++ b/internal/parser/openhands.go @@ -7,7 +7,6 @@ import ( "fmt" "os" "path/filepath" - "sort" "strings" "time" @@ -20,82 +19,6 @@ const ( openHandsObservationEvent = "ObservationEvent" ) -// DiscoverOpenHandsSessions finds OpenHands CLI conversation -// directories under ~/.openhands/conversations. -func DiscoverOpenHandsSessions( - conversationsDir string, -) []DiscoveredFile { - entries, err := os.ReadDir(conversationsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if !entry.IsDir() || !IsValidSessionID(entry.Name()) { - continue - } - sessionDir := filepath.Join( - conversationsDir, entry.Name(), - ) - if !isOpenHandsSessionDir(sessionDir) { - continue - } - files = append(files, DiscoveredFile{ - Path: sessionDir, - Agent: AgentOpenHands, - }) - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindOpenHandsSourceFile locates an OpenHands conversation -// directory by its raw session ID. -func FindOpenHandsSourceFile( - conversationsDir, rawID string, -) string { - if conversationsDir == "" || !IsValidSessionID(rawID) { - return "" - } - - candidates := []string{rawID} - stripped := strings.ReplaceAll(rawID, "-", "") - if stripped != rawID { - candidates = append(candidates, stripped) - } - - for _, cand := range candidates { - sessionDir := filepath.Join(conversationsDir, cand) - if isOpenHandsSessionDir(sessionDir) { - return sessionDir - } - } - - entries, err := os.ReadDir(conversationsDir) - if err != nil { - return "" - } - for _, entry := range entries { - if !entry.IsDir() { - continue - } - sessionDir := filepath.Join( - conversationsDir, entry.Name(), - ) - if !isOpenHandsSessionDir(sessionDir) { - continue - } - if normalizeOpenHandsSessionID(entry.Name()) == normalizeOpenHandsSessionID(rawID) { - return sessionDir - } - } - return "" -} - // OpenHandsSnapshot computes synthetic file metadata for an // OpenHands conversation directory by hashing the relevant // metadata of base_state.json, TASKS.json, and events/*.json. @@ -184,9 +107,9 @@ func OpenHandsSnapshot(path string) (FileInfo, error) { }, nil } -// ParseOpenHandsSession parses a single OpenHands CLI -// conversation directory into a session and messages. -func ParseOpenHandsSession( +// parseSession parses a single OpenHands CLI conversation +// directory into a session and messages. +func (p *openHandsProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, error) { sessionDir, err := normalizeOpenHandsSessionPath(path) diff --git a/internal/parser/openhands_provider.go b/internal/parser/openhands_provider.go new file mode 100644 index 000000000..42cc2dcdb --- /dev/null +++ b/internal/parser/openhands_provider.go @@ -0,0 +1,414 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*openHandsProvider)(nil) + +type openHandsProviderFactory struct { + def AgentDef +} + +func newOpenHandsProviderFactory(def AgentDef) ProviderFactory { + return openHandsProviderFactory{def: cloneAgentDef(def)} +} + +func (f openHandsProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f openHandsProviderFactory) Capabilities() Capabilities { + return openHandsProviderCapabilities() +} + +func (f openHandsProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &openHandsProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: openHandsProviderCapabilities(), + Config: cfg, + }, + sources: newOpenHandsSourceSet(cfg.Roots), + } +} + +type openHandsProvider struct { + ProviderBase + sources openHandsSourceSet +} + +func (p *openHandsProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *openHandsProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *openHandsProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *openHandsProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *openHandsProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *openHandsProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("openhands source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type openHandsSource struct { + Root string + Path string +} + +type openHandsSourceSet struct { + roots []string +} + +func newOpenHandsSourceSet(roots []string) openHandsSourceSet { + return openHandsSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s openHandsSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + entries, err := os.ReadDir(root) + if err != nil { + continue + } + for _, entry := range entries { + if !entry.IsDir() || !IsValidSessionID(entry.Name()) { + continue + } + sessionDir := filepath.Join(root, entry.Name()) + source, ok := s.sourceRef(root, sessionDir) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s openHandsSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: false, + DebounceKey: string(AgentOpenHands) + ":dir:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s openHandsSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + if !s.hasRoot(root) { + return nil, nil + } + source, ok := s.sourceForPathInRoot(root, req.Path) + if !ok { + return nil, nil + } + return []SourceRef{source}, nil + } + for _, root := range s.roots { + source, ok := s.sourceForPathInRoot(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s openHandsSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + if source, ok := s.sourceForPath(path); ok { + return source, true, nil + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.sessionDirForID(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// sessionDirForID locates an OpenHands conversation directory under +// root by its raw session ID. It first tries the raw ID and its +// dash-stripped form as literal directory names, then falls back to +// matching any session directory whose normalized ID equals the +// normalized raw ID. +func (s openHandsSourceSet) sessionDirForID(root, rawID string) string { + if root == "" || !IsValidSessionID(rawID) { + return "" + } + + candidates := []string{rawID} + stripped := strings.ReplaceAll(rawID, "-", "") + if stripped != rawID { + candidates = append(candidates, stripped) + } + for _, cand := range candidates { + sessionDir := filepath.Join(root, cand) + if isOpenHandsSessionDir(sessionDir) { + return sessionDir + } + } + + entries, err := os.ReadDir(root) + if err != nil { + return "" + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + sessionDir := filepath.Join(root, entry.Name()) + if !isOpenHandsSessionDir(sessionDir) { + continue + } + if normalizeOpenHandsSessionID(entry.Name()) == + normalizeOpenHandsSessionID(rawID) { + return sessionDir + } + } + return "" +} + +func (s openHandsSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("openhands source path unavailable") + } + snapshot, err := OpenHandsSnapshot(path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: snapshot.Size, + MTimeNS: snapshot.Mtime, + Hash: snapshot.Hash, + }, nil +} + +func (s openHandsSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case openHandsSource: + return src.Path, src.Path != "" + case *openHandsSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if ref, ok := s.sourceForPath(candidate); ok { + src := ref.Opaque.(openHandsSource) + return src.Path, true + } + } + return "", false +} + +func (s openHandsSourceSet) sourceForPath(path string) (SourceRef, bool) { + for _, root := range s.roots { + if source, ok := s.sourceForPathInRoot(root, path); ok { + return source, true + } + } + return SourceRef{}, false +} + +func (s openHandsSourceSet) sourceForPathInRoot( + root string, + path string, +) (SourceRef, bool) { + sessionDir, ok := openHandsSessionDirForPath(root, path) + if !ok { + return SourceRef{}, false + } + return s.sourceRef(root, sessionDir) +} + +func (s openHandsSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !isOpenHandsSessionDir(path) { + return SourceRef{}, false + } + rel, err := filepath.Rel(root, path) + if err != nil || rel == "." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) || + strings.Contains(rel, string(filepath.Separator)) { + return SourceRef{}, false + } + if !IsValidSessionID(rel) { + return SourceRef{}, false + } + return SourceRef{ + Provider: AgentOpenHands, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: openHandsSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s openHandsSourceSet) hasRoot(root string) bool { + for _, configured := range s.roots { + if samePath(root, configured) { + return true + } + } + return false +} + +func openHandsSessionDirForPath(root, path string) (string, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, err := filepath.Rel(root, path) + if err != nil || rel == "." || rel == ".." || + strings.HasPrefix(rel, ".."+string(filepath.Separator)) { + return "", false + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) == 0 || !IsValidSessionID(parts[0]) { + return "", false + } + switch len(parts) { + case 1: + case 2: + if parts[1] != "base_state.json" && parts[1] != "TASKS.json" { + return "", false + } + case 3: + if parts[1] != "events" || filepath.Ext(parts[2]) != ".json" { + return "", false + } + default: + return "", false + } + return filepath.Join(root, parts[0]), true +} + +func openHandsProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/openhands_provider_test.go b/internal/parser/openhands_provider_test.go new file mode 100644 index 000000000..5e07f6ba1 --- /dev/null +++ b/internal/parser/openhands_provider_test.go @@ -0,0 +1,209 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOpenHandsProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentOpenHands) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestOpenHandsProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "086c7ecf-6cb7-46b6-9fbc-b900358d1247" + dirName := "086c7ecf6cb746b69fbcb900358d1247" + sessionDir := openHandsProviderWriteSession( + t, root, dirName, sessionID, "provider question", + ) + openHandsProviderWriteInvalidSession(t, root, "missing-events") + writeSourceFile(t, filepath.Join(root, "notes.txt"), "{}\n") + + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.NotEmpty(t, plan.Roots[0].DebounceKey) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentOpenHands, discovered[0].Provider) + assert.Equal(t, sessionDir, discovered[0].Key) + assert.Equal(t, sessionDir, discovered[0].DisplayPath) + assert.Equal(t, sessionDir, discovered[0].FingerprintKey) + assert.Empty(t, discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~openhands:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: dirName, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: sessionDir, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) + + snapshot, err := OpenHandsSnapshot(sessionDir) + require.NoError(t, err) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sessionDir, fingerprint.Key) + assert.Equal(t, snapshot.Size, fingerprint.Size) + assert.Equal(t, snapshot.Mtime, fingerprint.MTimeNS) + assert.Equal(t, snapshot.Hash, fingerprint.Hash) + + for _, changedPath := range []string{ + sessionDir, + filepath.Join(sessionDir, "base_state.json"), + filepath.Join(sessionDir, "TASKS.json"), + filepath.Join(sessionDir, "events", "event-00000-user.json"), + } { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: changedPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1, changedPath) + assert.Equal(t, sessionDir, changed[0].DisplayPath) + } + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(sessionDir, "events", "notes.txt"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: sessionDir, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestOpenHandsProviderParse(t *testing.T) { + root := t.TempDir() + sessionID := "086c7ecf-6cb7-46b6-9fbc-b900358d1247" + sessionDir := openHandsProviderWriteSession( + t, root, "086c7ecf6cb746b69fbcb900358d1247", sessionID, "parse question", + ) + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "openhands:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentOpenHands, result.Result.Session.Agent) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sessionDir, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 1) +} + +func openHandsProviderWriteSession( + t *testing.T, + root string, + dirName string, + sessionID string, + firstMessage string, +) string { + t.Helper() + sessionDir := filepath.Join(root, dirName) + eventsDir := filepath.Join(sessionDir, "events") + require.NoError(t, os.MkdirAll(eventsDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(sessionDir, "base_state.json"), + []byte(`{"id":"`+sessionID+`","agent":{"llm":{"model":"test-model"}}}`), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(sessionDir, "TASKS.json"), + []byte(`[]`), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(eventsDir, "event-00000-user.json"), + []byte(`{ + "id":"e0", + "timestamp":"2026-04-02T15:25:40.706887", + "source":"user", + "llm_message":{"role":"user","content":[{"type":"text","text":"`+firstMessage+`"}]}, + "kind":"MessageEvent" + }`), + 0o644, + )) + return sessionDir +} + +func openHandsProviderWriteInvalidSession( + t *testing.T, + root string, + dirName string, +) { + t.Helper() + require.NoError(t, os.MkdirAll(filepath.Join(root, dirName), 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(root, dirName, "base_state.json"), + []byte(`{}`), + 0o644, + )) +} diff --git a/internal/parser/openhands_test.go b/internal/parser/openhands_test.go index 4d8b4bc6f..3e6109d69 100644 --- a/internal/parser/openhands_test.go +++ b/internal/parser/openhands_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "encoding/json" "os" "path/filepath" @@ -25,19 +26,30 @@ func TestDiscoverAndFindOpenHandsSessions(t *testing.T) { 0o644, )) - files := DiscoverOpenHandsSessions(root) - require.Len(t, files, 1) - assert.Equal(t, sessionDir, files[0].Path) - assert.Equal(t, AgentOpenHands, files[0].Agent) + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) - assert.Equal( - t, sessionDir, - FindOpenHandsSourceFile(root, sessionID), - ) - assert.Equal( - t, sessionDir, - FindOpenHandsSourceFile(root, dirName), - ) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, sessionDir, sources[0].DisplayPath) + assert.Equal(t, AgentOpenHands, sources[0].Provider) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: dirName, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) } func TestParseOpenHandsSession(t *testing.T) { @@ -116,10 +128,27 @@ func TestParseOpenHandsSession(t *testing.T) { )) } - sess, msgs, err := ParseOpenHandsSession( - sessionDir, "local", - ) + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{root}, + Machine: "local", + }) + require.True(t, ok) + source, found, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: sessionDir, + }) require.NoError(t, err) + require.True(t, found) + fingerprint, err := provider.Fingerprint(context.Background(), source) + require.NoError(t, err) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + + sess := &outcome.Results[0].Result.Session + msgs := outcome.Results[0].Result.Messages require.NotNil(t, sess) require.Len(t, msgs, 4) diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 4f205e0ee..8bf1dcd22 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -369,6 +369,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIcodemateProviderFactory(def) case AgentOpenCode: return newOpenCodeProviderFactory(def) + case AgentOpenHands: + return newOpenHandsProviderFactory(def) case AgentOpenClaw: return newOpenClawProviderFactory(def) case AgentOMP, AgentPi: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 093f9ac0c..40a1bd5d9 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -22,7 +22,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentCodex: ProviderMigrationLegacyOnly, AgentCopilot: ProviderMigrationLegacyOnly, AgentGemini: ProviderMigrationLegacyOnly, - AgentOpenHands: ProviderMigrationLegacyOnly, + AgentOpenHands: ProviderMigrationProviderAuthoritative, AgentCursor: ProviderMigrationLegacyOnly, AgentMiMoCode: ProviderMigrationProviderAuthoritative, AgentOpenCode: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 204c96081..de49bfa1f 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -55,6 +55,7 @@ var pendingShimProviderFiles = map[string]bool{ "kiro_ide_provider.go": true, "kiro_provider.go": true, "openhands_provider.go": true, + "opencode_provider.go": true, "positron_provider.go": true, "shelley_provider.go": true, "vibe_provider.go": true, diff --git a/internal/parser/types.go b/internal/parser/types.go index 17d1a96c5..6b1a54ba4 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -206,16 +206,14 @@ var Registry = []AgentDef{ WatchRootsFunc: ResolveKiloWatchRoots, }, { - Type: AgentOpenHands, - DisplayName: "OpenHands CLI", - EnvVar: "OPENHANDS_CONVERSATIONS_DIR", - ConfigKey: "openhands_dirs", - DefaultDirs: []string{".openhands/conversations"}, - IDPrefix: "openhands:", - FileBased: true, - ShallowWatch: true, - DiscoverFunc: DiscoverOpenHandsSessions, - FindSourceFunc: FindOpenHandsSourceFile, + Type: AgentOpenHands, + DisplayName: "OpenHands CLI", + EnvVar: "OPENHANDS_CONVERSATIONS_DIR", + ConfigKey: "openhands_dirs", + DefaultDirs: []string{".openhands/conversations"}, + IDPrefix: "openhands:", + FileBased: true, + ShallowWatch: true, }, { Type: AgentCursor, diff --git a/internal/sync/classify_openhands_test.go b/internal/sync/classify_openhands_test.go index a23e4e563..580319fd0 100644 --- a/internal/sync/classify_openhands_test.go +++ b/internal/sync/classify_openhands_test.go @@ -34,11 +34,15 @@ func TestClassifyOnePath_OpenHands(t *testing.T) { )) eng := &Engine{ + db: openTestDB(t), agentDirs: map[parser.AgentType][]string{ parser.AgentOpenHands: {root}, }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentOpenHands: parser.ProviderMigrationProviderAuthoritative, + }, } - geminiMap := make(map[string]map[string]string) tests := []struct { name string @@ -80,12 +84,15 @@ func TestClassifyOnePath_OpenHands(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) - assert.Equal(t, tt.want, ok) - if ok { - assert.Equal(t, parser.AgentOpenHands, got.Agent) - assert.Equal(t, tt.retPath, got.Path) + files := eng.classifyPaths([]string{tt.path}) + if !tt.want { + assert.Empty(t, files) + return } + require.Len(t, files, 1) + got := files[0] + assert.Equal(t, parser.AgentOpenHands, got.Agent) + assert.Equal(t, tt.retPath, got.Path) }) } } diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 83e8e3253..a7643cebb 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1139,38 +1139,6 @@ func (e *Engine) classifyOnePath( } } - // OpenHands CLI: - // //base_state.json - // //TASKS.json - // //events/*.json - for _, openHandsDir := range e.agentDirs[parser.AgentOpenHands] { - if openHandsDir == "" { - continue - } - if rel, ok := isUnder(openHandsDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) < 2 || !parser.IsValidSessionID(parts[0]) { - continue - } - switch { - case len(parts) == 2 && - (parts[1] == "base_state.json" || - parts[1] == "TASKS.json"): - case len(parts) == 3 && - parts[1] == "events" && - strings.HasSuffix(parts[2], ".json"): - default: - continue - } - return parser.DiscoveredFile{ - Path: filepath.Join( - openHandsDir, parts[0], - ), - Agent: parser.AgentOpenHands, - }, true - } - } - // Cursor: // //agent-transcripts/.{txt,jsonl} // //agent-transcripts//.{txt,jsonl} @@ -4039,13 +4007,6 @@ func (e *Engine) processFile( // Capture mtime once from the initial stat so all // downstream cache operations use a consistent value. mtime := info.ModTime().UnixNano() - if file.Agent == parser.AgentOpenHands { - snapshot, err := parser.OpenHandsSnapshot(file.Path) - if err != nil { - return processResult{err: err} - } - mtime = snapshot.Mtime - } if file.Agent == parser.AgentCowork { mtime = parser.CoworkSessionMtime(file.Path, mtime) } @@ -4111,8 +4072,6 @@ func (e *Engine) processFile( res = e.processReasonix(file, info) case parser.AgentGemini: res = e.processGemini(file, info) - case parser.AgentOpenHands: - res = e.processOpenHands(file, info) case parser.AgentCursor: res = e.processCursor(file, info) case parser.AgentVSCodeCopilot: @@ -6332,38 +6291,6 @@ func (e *Engine) processAntigravityCLI( } } -func (e *Engine) processOpenHands( - file parser.DiscoveredFile, _ os.FileInfo, -) processResult { - snapshot, err := parser.OpenHandsSnapshot(file.Path) - if err != nil { - return processResult{err: err} - } - - fi := fakeSnapshotInfo{ - fSize: snapshot.Size, fMtime: snapshot.Mtime, - } - if e.shouldSkipByPath(file.Path, fi) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseOpenHandsSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func (e *Engine) processCursor( file parser.DiscoveredFile, info os.FileInfo, ) processResult { diff --git a/internal/sync/openhands_retry_test.go b/internal/sync/openhands_retry_test.go index 872d0c091..353611c67 100644 --- a/internal/sync/openhands_retry_test.go +++ b/internal/sync/openhands_retry_test.go @@ -39,8 +39,15 @@ func TestProcessFileOpenHandsUsesSnapshotMtimeForRetryCache(t *testing.T) { oldDirMtime := dirInfo.ModTime() engine := &Engine{ - db: dbtest.OpenTestDB(t), - machine: "local", + db: dbtest.OpenTestDB(t), + machine: "local", + agentDirs: map[parser.AgentType][]string{ + parser.AgentOpenHands: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentOpenHands: parser.ProviderMigrationProviderAuthoritative, + }, skipCache: map[string]int64{sessionDir: oldDirMtime.UnixNano()}, } From f5965d580f4837111a78512cde5a90559e628413 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:33:01 -0400 Subject: [PATCH 02/11] feat(parser): migrate cursor provider Cursor transcript sources have two legacy layouts and select .jsonl over .txt when both exist for a session. Moving Cursor behind a concrete provider keeps that selection policy explicit at the provider boundary instead of relying on the legacy parser adapter.\n\nThe provider preserves recursive project discovery, raw/full ID lookup, stale .txt path promotion, changed-path classification, content-hash fingerprinting, and parser output normalization while using the same Cursor discovery and parsing helpers as the previous sync path. fix(parser): preserve cursor project-scoped source selection Cursor session IDs are only unique within an encoded project directory, but the provider was resolving stored and changed paths through a root-wide lookup. That could silently select the same transcript stem from a different project and drop valid sources during discovery. Resolve Cursor source promotion inside the project derived from the incoming path, add duplicate-stem coverage, and mark model output unsupported until the parser actually fills message models. This lets the Cursor branch enter shadow comparison as a real migration step. Validation: go test -tags "fts5" ./internal/parser -run 'Test(CursorProvider|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare cursor shadow parity Cursor is shadow-compared on this branch, so add source-level migration coverage that compares provider observation with ParseCursorSession. The test uses duplicate transcript stems in different encoded project directories to lock in the current parser ID behavior while proving provider source observation stays project-scoped. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesCursorLegacyParser|TestCursorProvider|TestParseCursor|TestCursorSessionID' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... test(sync): assert cursor provider hash parity Roborev job 2709 caught that the Cursor shadow parity fixture normalized the legacy session hash before proving the provider fingerprint matched the legacy parser hash. That left the test unable to detect a provider fingerprint regression that propagated into parsed output. Assert hash parity before normalizing the legacy session for the full struct comparison, keeping the existing duplicate-stem fixture focused on provider/legacy equivalence. Validation: go test -tags "fts5" ./internal/sync -run TestObserveProviderSourceMatchesCursorLegacyParser -count=1; go fmt ./...; go vet ./...; git diff --check refactor(parser): fold cursor into provider Move Cursor source discovery, lookup, and parse ownership onto the concrete cursorProvider and remove the package-level DiscoverCursorSessions, FindCursorSourceFile, and ParseCursorSession free functions. Discovery and find-source bodies now live as provider-owned helpers (discoverTranscriptPaths, cursorAddSeen, cursorFindSourceFile) on the cursor source set, and parseSession is a receiver method. Make Cursor provider-authoritative and drop its legacy sync dispatch: the classifyOnePath transcript block, the processFile case arm, the processCursor method, and its now-orphaned validateCursorContainment and findContainingDir helpers. Source classification, containment, .txt/.jsonl precedence, and project-hint decoding are all reproduced through the provider's changed-path and discovery paths, so runtime behavior is preserved. ParseCursorTranscriptRelPath stays a shared provider-neutral path validator used by both the engine's project enrichment and the provider. Replace the shadow-baseline test with provider API coverage plus a guard asserting the legacy entrypoints stay gone, and remove cursor from the pending-shim list. fix(parser): cap cursor provider fingerprinting Cursor parsing already rejects transcripts over 10 MiB, but the migrated provider fingerprint path still hashed the full source before parse. That made oversized files pay an unbounded read cost in the provider freshness path even though parse would never accept them.\n\nKeep normal-size content hashing intact and return only metadata for oversized Cursor transcripts so parse remains the sole place that reads up to the guarded cap.\n\nValidation: go test -tags "fts5" ./internal/parser -run 'TestCursorProvider' -count=1; go vet ./...; git diff --check --- internal/parser/cursor.go | 8 +- internal/parser/cursor_provider.go | 609 +++++++++++++++++++++ internal/parser/cursor_provider_test.go | 282 ++++++++++ internal/parser/discovery.go | 213 ------- internal/parser/discovery_test.go | 37 +- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/provider_shim_scan_test.go | 6 +- internal/parser/types.go | 16 +- internal/sync/engine.go | 120 ---- 10 files changed, 927 insertions(+), 368 deletions(-) create mode 100644 internal/parser/cursor_provider.go create mode 100644 internal/parser/cursor_provider_test.go diff --git a/internal/parser/cursor.go b/internal/parser/cursor.go index f570b07f7..fa42d25d0 100644 --- a/internal/parser/cursor.go +++ b/internal/parser/cursor.go @@ -17,10 +17,10 @@ import ( // under 500 KB; 10 MB provides generous headroom. const maxCursorTranscriptSize = 10 << 20 -// ParseCursorSession parses a Cursor agent transcript file. -// Transcripts are plain text with "user:" and "assistant:" role -// markers, tool calls, and thinking blocks. -func ParseCursorSession( +// parseSession parses a Cursor agent transcript file. Transcripts are plain +// text with "user:" and "assistant:" role markers, tool calls, and thinking +// blocks. +func (p *cursorProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { // Open with O_NOFOLLOW (Unix) to reject symlinks at the diff --git a/internal/parser/cursor_provider.go b/internal/parser/cursor_provider.go new file mode 100644 index 000000000..67ecc210f --- /dev/null +++ b/internal/parser/cursor_provider.go @@ -0,0 +1,609 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*cursorProvider)(nil) + +type cursorProviderFactory struct { + def AgentDef +} + +func newCursorProviderFactory(def AgentDef) ProviderFactory { + return cursorProviderFactory{def: cloneAgentDef(def)} +} + +func (f cursorProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f cursorProviderFactory) Capabilities() Capabilities { + return cursorProviderCapabilities() +} + +func (f cursorProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &cursorProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: cursorProviderCapabilities(), + Config: cfg, + }, + sources: newCursorSourceSet(cfg.Roots), + } +} + +type cursorProvider struct { + ProviderBase + sources cursorSourceSet +} + +func (p *cursorProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *cursorProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *cursorProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *cursorProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *cursorProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *cursorProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("cursor source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type cursorSource struct { + Root string + Path string +} + +type cursorSourceSet struct { + roots []string +} + +func newCursorSourceSet(roots []string) cursorSourceSet { + return cursorSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s cursorSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, path := range s.discoverTranscriptPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverTranscriptPaths walks a Cursor projects root and returns the primary +// transcript file paths. All paths resolve within the canonical root, +// preventing symlink escapes. Symlinked project directory entries are rejected. +// Cursor uses two layouts: flat (agent-transcripts/.{txt,jsonl}) and +// nested (agent-transcripts//.{txt,jsonl}); when both .jsonl and +// .txt exist for the same stem, .jsonl is preferred. +func (s cursorSourceSet) discoverTranscriptPaths(projectsDir string) []string { + if projectsDir == "" { + return nil + } + + // Canonicalize root once for containment checks. + resolvedRoot, err := filepath.EvalSymlinks(projectsDir) + if err != nil { + return nil + } + + entries, err := os.ReadDir(projectsDir) + if err != nil { + return nil + } + + var paths []string + for _, entry := range entries { + if !entry.IsDir() { + continue + } + // Reject symlinked project directory entries. + if entry.Type()&os.ModeSymlink != 0 { + continue + } + + transcriptsDir := filepath.Join( + projectsDir, entry.Name(), "agent-transcripts", + ) + + // Verify the transcripts directory resolves within + // the canonical root. + resolvedDir, err := filepath.EvalSymlinks(transcriptsDir) + if err != nil { + continue + } + if !isContainedIn(resolvedDir, resolvedRoot) { + continue + } + + transcripts, err := os.ReadDir(transcriptsDir) + if err != nil { + continue + } + + // Collect valid transcripts, deduping by basename + // stem. When both .jsonl and .txt exist for the + // same session, prefer .jsonl. + // + // Cursor uses two layouts: + // flat: agent-transcripts/.{txt,jsonl} + // nested: agent-transcripts//.{txt,jsonl} + seen := make(map[string]string) // stem -> path + for _, sf := range transcripts { + if !sf.IsDir() { + // Flat layout: file directly in + // agent-transcripts/. + name := sf.Name() + if !IsCursorTranscriptExt(name) { + continue + } + fullPath := filepath.Join(transcriptsDir, name) + if !IsRegularFile(fullPath) { + continue + } + cursorAddSeen(seen, name, fullPath) + continue + } + + // Nested layout: agent-transcripts// + // containing .{txt,jsonl}. + subDir := filepath.Join(transcriptsDir, sf.Name()) + subEntries, err := os.ReadDir(subDir) + if err != nil { + continue + } + dirName := sf.Name() + for _, sub := range subEntries { + if sub.IsDir() { + continue + } + name := sub.Name() + if !IsCursorTranscriptExt(name) { + continue + } + // Only accept files whose stem matches + // the parent directory name, e.g. + // /.jsonl. + stem := strings.TrimSuffix(name, filepath.Ext(name)) + if stem != dirName { + continue + } + fullPath := filepath.Join(subDir, name) + if !IsRegularFile(fullPath) { + continue + } + cursorAddSeen(seen, name, fullPath) + } + } + for _, path := range seen { + paths = append(paths, path) + } + } + return paths +} + +// cursorAddSeen inserts a transcript path into the seen map, preferring .jsonl +// over .txt when both exist for the same stem. +func cursorAddSeen(seen map[string]string, name, fullPath string) { + stem := strings.TrimSuffix(name, filepath.Ext(name)) + if prev, ok := seen[stem]; ok { + if strings.HasSuffix(prev, ".txt") && + strings.HasSuffix(name, ".jsonl") { + seen[stem] = fullPath + } + return + } + seen[stem] = fullPath +} + +func (s cursorSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "*.txt"}, + DebounceKey: string(AgentCursor) + ":transcripts:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s cursorSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + if !s.hasRoot(root) { + return nil, nil + } + source, ok := s.sourceForPathInRoot(root, req.Path) + if !ok { + return nil, nil + } + return []SourceRef{source}, nil + } + for _, root := range s.roots { + source, ok := s.sourceForPathInRoot(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s cursorSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + if source, ok := s.sourceForPath(path); ok { + return source, true, nil + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := cursorFindSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// cursorFindSourceFile finds a Cursor transcript file by session UUID across a +// projects root, preferring .jsonl over .txt. Returns "" if no matching file +// resolves within the canonical root. +func cursorFindSourceFile(projectsDir, sessionID string) string { + if projectsDir == "" || !IsValidSessionID(sessionID) { + return "" + } + + entries, err := os.ReadDir(projectsDir) + if err != nil { + return "" + } + + resolvedRoot, err := filepath.EvalSymlinks(projectsDir) + if err != nil { + return "" + } + + for _, ext := range []string{".jsonl", ".txt"} { + target := sessionID + ext + for _, entry := range entries { + if !entry.IsDir() { + continue + } + // Nested layout first (matches discovery + // precedence), then flat layout. + candidates := []string{ + filepath.Join( + projectsDir, entry.Name(), + "agent-transcripts", sessionID, target, + ), + filepath.Join( + projectsDir, entry.Name(), + "agent-transcripts", target, + ), + } + for _, candidate := range candidates { + if !IsRegularFile(candidate) { + continue + } + resolved, err := filepath.EvalSymlinks(candidate) + if err != nil { + continue + } + rel, err := filepath.Rel(resolvedRoot, resolved) + sep := string(filepath.Separator) + if err != nil || rel == ".." || + strings.HasPrefix(rel, ".."+sep) { + continue + } + return candidate + } + } + } + return "" +} + +func (s cursorSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("cursor source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash := "" + if info.Size() <= maxCursorTranscriptSize { + hash, err = hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Hash: hash, + }, nil +} + +func (s cursorSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case cursorSource: + return src.Path, src.Path != "" + case *cursorSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if ref, ok := s.sourceForPath(candidate); ok { + src := ref.Opaque.(cursorSource) + return src.Path, true + } + } + return "", false +} + +func (s cursorSourceSet) sourceForPath(path string) (SourceRef, bool) { + for _, root := range s.roots { + if source, ok := s.sourceForPathInRoot(root, path); ok { + return source, true + } + } + return SourceRef{}, false +} + +func (s cursorSourceSet) sourceForPathInRoot( + root string, + path string, +) (SourceRef, bool) { + rawID, ok := cursorRawSessionIDFromPath(root, path) + if !ok { + return SourceRef{}, false + } + projectDir, ok := cursorProjectDirFromPath(root, path) + if !ok { + return SourceRef{}, false + } + selected := cursorFindSourceFileInProject(root, projectDir, rawID) + if selected == "" { + return SourceRef{}, false + } + return s.sourceRef(root, selected) +} + +func (s cursorSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !IsRegularFile(path) { + return SourceRef{}, false + } + rawID, ok := cursorRawSessionIDFromPath(root, path) + if !ok { + return SourceRef{}, false + } + projectDir, ok := cursorProjectDirFromPath(root, path) + if !ok { + return SourceRef{}, false + } + selected := cursorFindSourceFileInProject(root, projectDir, rawID) + if selected == "" || !samePath(selected, path) { + return SourceRef{}, false + } + project := DecodeCursorProjectDir(projectDir) + if project == "" { + project = "unknown" + } + return SourceRef{ + Provider: AgentCursor, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: cursorSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s cursorSourceSet) hasRoot(root string) bool { + for _, configured := range s.roots { + if samePath(root, configured) { + return true + } + } + return false +} + +func cursorFindSourceFileInProject(root, projectDir, rawID string) string { + if root == "" || projectDir == "" || !IsValidSessionID(rawID) { + return "" + } + resolvedRoot, err := filepath.EvalSymlinks(root) + if err != nil { + return "" + } + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + for _, ext := range []string{".jsonl", ".txt"} { + target := rawID + ext + candidates := []string{ + filepath.Join(transcriptsDir, rawID, target), + filepath.Join(transcriptsDir, target), + } + for _, candidate := range candidates { + if !IsRegularFile(candidate) { + continue + } + resolved, err := filepath.EvalSymlinks(candidate) + if err != nil || !isContainedIn(resolved, resolvedRoot) { + continue + } + return candidate + } + } + return "" +} + +func cursorRawSessionIDFromPath(root, path string) (string, bool) { + rel, ok := cursorRelPath(root, path) + if !ok { + return "", false + } + parts := strings.Split(rel, string(filepath.Separator)) + switch len(parts) { + case 3: + return strings.TrimSuffix(parts[2], filepath.Ext(parts[2])), true + case 4: + return parts[2], true + default: + return "", false + } +} + +func cursorProjectDirFromPath(root, path string) (string, bool) { + rel, ok := cursorRelPath(root, path) + if !ok { + return "", false + } + return ParseCursorTranscriptRelPath(rel) +} + +func cursorRelPath(root, path string) (string, bool) { + rel, err := filepath.Rel(filepath.Clean(root), filepath.Clean(path)) + if err != nil { + return "", false + } + if _, ok := ParseCursorTranscriptRelPath(rel); !ok { + return "", false + } + return rel, true +} + +func cursorProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/cursor_provider_test.go b/internal/parser/cursor_provider_test.go new file mode 100644 index 000000000..c023bc2eb --- /dev/null +++ b/internal/parser/cursor_provider_test.go @@ -0,0 +1,282 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCursorProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCursor) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCursorProviderSourceMethods(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + flatTxt := cursorProviderWriteTranscript(t, transcriptsDir, "flat.txt", "old") + flatJSONL := cursorProviderWriteJSONLTranscript(t, transcriptsDir, "flat.jsonl", "new") + nestedTxt := cursorProviderWriteTranscript(t, transcriptsDir, filepath.Join("nested", "nested.txt"), "old") + nestedJSONL := cursorProviderWriteJSONLTranscript( + t, transcriptsDir, filepath.Join("nested", "nested.jsonl"), "new", + ) + cursorProviderWriteJSONLTranscript( + t, transcriptsDir, filepath.Join("nested", "subagents", "child.jsonl"), "child", + ) + cursorProviderWriteJSONLTranscript(t, transcriptsDir, filepath.Join("mismatch", "other.jsonl"), "other") + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl", "*.txt"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{flatJSONL, nestedJSONL}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + for _, source := range discovered { + assert.Equal(t, AgentCursor, source.Provider) + assert.Equal(t, DecodeCursorProjectDir(projectDir), source.ProjectHint) + } + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~cursor:flat", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, flatJSONL, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: flatTxt, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, flatJSONL, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "nested", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, nestedJSONL, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, nestedJSONL, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + want string + }{ + {name: "flat txt promotes to jsonl", path: flatTxt, want: flatJSONL}, + {name: "flat jsonl", path: flatJSONL, want: flatJSONL}, + {name: "nested txt promotes to jsonl", path: nestedTxt, want: nestedJSONL}, + {name: "nested jsonl", path: nestedJSONL, want: nestedJSONL}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tc.path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tc.want, changed[0].DisplayPath) + }) + } + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(transcriptsDir, "nested", "subagents", "child.jsonl"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: flatJSONL, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestCursorProviderResolvesDuplicateStemsWithinProject(t *testing.T) { + root := t.TempDir() + firstProject := "Users-fiona-Documents-first" + secondProject := "Users-fiona-Documents-second" + firstDir := filepath.Join(root, firstProject, "agent-transcripts") + secondDir := filepath.Join(root, secondProject, "agent-transcripts") + firstJSONL := cursorProviderWriteJSONLTranscript(t, firstDir, "shared.jsonl", "first") + secondTxt := cursorProviderWriteTranscript(t, secondDir, "shared.txt", "second old") + secondJSONL := cursorProviderWriteJSONLTranscript(t, secondDir, "shared.jsonl", "second new") + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + assert.ElementsMatch(t, []string{firstJSONL, secondJSONL}, sourceDisplayPaths(discovered)) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: secondTxt, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, secondJSONL, found.DisplayPath) + assert.Equal(t, DecodeCursorProjectDir(secondProject), found.ProjectHint) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: secondTxt, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, secondJSONL, changed[0].DisplayPath) + assert.Equal(t, DecodeCursorProjectDir(secondProject), changed[0].ProjectHint) +} + +func TestCursorProviderParse(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + sourcePath := cursorProviderWriteJSONLTranscript( + t, transcriptsDir, "parse.jsonl", "parse question", + ) + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "cursor:parse", result.Result.Session.ID) + assert.Equal(t, AgentCursor, result.Result.Session.Agent) + assert.Equal(t, DecodeCursorProjectDir(projectDir), result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sourcePath, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCursorProviderFingerprintSkipsOversizedTranscriptHash(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + sourcePath := filepath.Join(transcriptsDir, "oversized.jsonl") + require.NoError(t, os.MkdirAll(transcriptsDir, 0o755)) + file, err := os.Create(sourcePath) + require.NoError(t, err) + require.NoError(t, file.Truncate(maxCursorTranscriptSize+1)) + require.NoError(t, file.Close()) + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Equal(t, int64(maxCursorTranscriptSize+1), fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.Empty(t, fingerprint.Hash) + + _, err = provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "file too large") +} + +func cursorProviderWriteTranscript( + t *testing.T, + dir string, + name string, + firstMessage string, +) string { + t.Helper() + path := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile( + path, + []byte("user:\n"+firstMessage+"\nassistant:\nDone.\n"), + 0o644, + )) + return path +} + +func cursorProviderWriteJSONLTranscript( + t *testing.T, + dir string, + name string, + firstMessage string, +) string { + t.Helper() + path := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile( + path, + []byte(`{"role":"user","message":{"content":"`+firstMessage+`"}}`+"\n"+ + `{"role":"assistant","message":{"content":"Done."}}`+"\n"), + 0o644, + )) + return path +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index fb8219760..d45ddbc6c 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -987,219 +987,6 @@ func confirmGeminiSessionID( return GeminiSessionID(data) == sessionID } -// DiscoverCursorSessions finds all agent transcript files under -// the Cursor projects dir (//agent-transcripts/.txt). -// All discovered paths are validated to resolve within the -// canonical projectsDir, preventing symlink escapes. -// cursorAddSeen inserts a transcript path into the seen map, -// preferring .jsonl over .txt when both exist for the same stem. -func cursorAddSeen( - seen map[string]string, name, fullPath string, -) { - stem := strings.TrimSuffix(name, filepath.Ext(name)) - if prev, ok := seen[stem]; ok { - if strings.HasSuffix(prev, ".txt") && - strings.HasSuffix(name, ".jsonl") { - seen[stem] = fullPath - } - return - } - seen[stem] = fullPath -} - -func DiscoverCursorSessions( - projectsDir string, -) []DiscoveredFile { - if projectsDir == "" { - return nil - } - - // Canonicalize root once for containment checks. - resolvedRoot, err := filepath.EvalSymlinks(projectsDir) - if err != nil { - return nil - } - - entries, err := os.ReadDir(projectsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if !entry.IsDir() { - continue - } - // Reject symlinked project directory entries. - if entry.Type()&os.ModeSymlink != 0 { - continue - } - - transcriptsDir := filepath.Join( - projectsDir, entry.Name(), "agent-transcripts", - ) - - // Verify the transcripts directory resolves within - // the canonical root. - resolvedDir, err := filepath.EvalSymlinks( - transcriptsDir, - ) - if err != nil { - continue - } - if !isContainedIn(resolvedDir, resolvedRoot) { - continue - } - - transcripts, err := os.ReadDir(transcriptsDir) - if err != nil { - continue - } - - project := DecodeCursorProjectDir(entry.Name()) - if project == "" { - project = "unknown" - } - - // Collect valid transcripts, deduping by basename - // stem. When both .jsonl and .txt exist for the - // same session, prefer .jsonl. - // - // Cursor uses two layouts: - // flat: agent-transcripts/.{txt,jsonl} - // nested: agent-transcripts//.{txt,jsonl} - seen := make(map[string]string) // stem -> path - for _, sf := range transcripts { - if !sf.IsDir() { - // Flat layout: file directly in - // agent-transcripts/. - name := sf.Name() - if !IsCursorTranscriptExt(name) { - continue - } - fullPath := filepath.Join( - transcriptsDir, name, - ) - if !IsRegularFile(fullPath) { - continue - } - cursorAddSeen(seen, name, fullPath) - continue - } - - // Nested layout: agent-transcripts// - // containing .{txt,jsonl}. - subDir := filepath.Join( - transcriptsDir, sf.Name(), - ) - subEntries, err := os.ReadDir(subDir) - if err != nil { - continue - } - dirName := sf.Name() - for _, sub := range subEntries { - if sub.IsDir() { - continue - } - name := sub.Name() - if !IsCursorTranscriptExt(name) { - continue - } - // Only accept files whose stem matches - // the parent directory name, e.g. - // /.jsonl. - stem := strings.TrimSuffix( - name, filepath.Ext(name), - ) - if stem != dirName { - continue - } - fullPath := filepath.Join( - subDir, name, - ) - if !IsRegularFile(fullPath) { - continue - } - cursorAddSeen(seen, name, fullPath) - } - } - for _, path := range seen { - files = append(files, DiscoveredFile{ - Path: path, - Project: project, - Agent: AgentCursor, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindCursorSourceFile finds a Cursor transcript file by -// session UUID. Prefers .jsonl over .txt. -func FindCursorSourceFile( - projectsDir, sessionID string, -) string { - if projectsDir == "" || !IsValidSessionID(sessionID) { - return "" - } - - entries, err := os.ReadDir(projectsDir) - if err != nil { - return "" - } - - resolvedRoot, err := filepath.EvalSymlinks(projectsDir) - if err != nil { - return "" - } - - for _, ext := range []string{".jsonl", ".txt"} { - target := sessionID + ext - for _, entry := range entries { - if !entry.IsDir() { - continue - } - // Nested layout first (matches discovery - // precedence), then flat layout. - candidates := []string{ - filepath.Join( - projectsDir, entry.Name(), - "agent-transcripts", sessionID, target, - ), - filepath.Join( - projectsDir, entry.Name(), - "agent-transcripts", target, - ), - } - for _, candidate := range candidates { - if !IsRegularFile(candidate) { - continue - } - resolved, err := filepath.EvalSymlinks( - candidate, - ) - if err != nil { - continue - } - rel, err := filepath.Rel( - resolvedRoot, resolved, - ) - sep := string(filepath.Separator) - if err != nil || rel == ".." || - strings.HasPrefix(rel, ".."+sep) { - continue - } - return candidate - } - } - } - return "" -} - // geminiProjectsFile holds the structure of // ~/.gemini/projects.json. type geminiProjectsFile struct { diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 72f4c98ba..61019154a 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -1152,11 +1152,9 @@ func TestDiscoverCursorSessions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCursorSessions(dir) - require.Len(t, files, tt.wantCount, "files count") - for _, f := range files { - assert.Equal(t, AgentCursor, f.Agent, "agent") - } + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, tt.wantCount, "paths count") }) } } @@ -1225,11 +1223,9 @@ func TestDiscoverCursorSessions_NestedLayout(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCursorSessions(dir) - require.Len(t, files, tt.wantCount, "files count") - for _, f := range files { - assert.Equal(t, AgentCursor, f.Agent, "agent") - } + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, tt.wantCount, "paths count") }) } } @@ -1243,10 +1239,11 @@ func TestDiscoverCursorSessions_DedupPrefersJsonl(t *testing.T) { filepath.Join(transcripts, "sess.txt"): "user:\nhi", filepath.Join(transcripts, "sess.jsonl"): `{"role":"user"}`, }) - files := DiscoverCursorSessions(dir) - require.Len(t, files, 1, "files count") - assert.True(t, strings.HasSuffix(files[0].Path, ".jsonl"), - "expected .jsonl path, got %q", files[0].Path) + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, 1, "paths count") + assert.True(t, strings.HasSuffix(paths[0], ".jsonl"), + "expected .jsonl path, got %q", paths[0]) } func TestParseCursorTranscriptRelPath(t *testing.T) { @@ -1321,7 +1318,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess1.txt"): "data", }) - got := FindCursorSourceFile(dir, "sess1") + got := cursorFindSourceFile(dir, "sess1") assert.NotEmpty(t, got, "expected to find .txt file") }) @@ -1330,7 +1327,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess2.jsonl"): "{}", }) - got := FindCursorSourceFile(dir, "sess2") + got := cursorFindSourceFile(dir, "sess2") assert.NotEmpty(t, got, "expected to find .jsonl file") }) @@ -1343,7 +1340,7 @@ func TestFindCursorSourceFile(t *testing.T) { jsonlPath := filepath.Join( dir, cursorTranscripts, "sess3.jsonl", ) - got := FindCursorSourceFile(dir, "sess3") + got := cursorFindSourceFile(dir, "sess3") assert.Equal(t, jsonlPath, got, "(.jsonl preferred)") }) @@ -1352,7 +1349,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess4", "sess4.jsonl"): "{}", }) - got := FindCursorSourceFile(dir, "sess4") + got := cursorFindSourceFile(dir, "sess4") require.NotEmpty(t, got, "expected to find nested .jsonl file") assert.True(t, strings.HasSuffix(got, filepath.Join("sess4", "sess4.jsonl")), "unexpected path %q", got) @@ -1364,14 +1361,14 @@ func TestFindCursorSourceFile(t *testing.T) { filepath.Join(cursorTranscripts, "sess5", "sess5.txt"): "old", filepath.Join(cursorTranscripts, "sess5", "sess5.jsonl"): "new", }) - got := FindCursorSourceFile(dir, "sess5") + got := cursorFindSourceFile(dir, "sess5") assert.True(t, strings.HasSuffix(got, "sess5.jsonl"), "expected .jsonl path, got %q", got) }) t.Run("NotFound", func(t *testing.T) { dir := t.TempDir() - got := FindCursorSourceFile(dir, "nonexistent") + got := cursorFindSourceFile(dir, "nonexistent") assert.Empty(t, got, "expected empty") }) } diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 8bf1dcd22..200b720ef 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -353,6 +353,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newCommandCodeProviderFactory(def) case AgentCortex: return newCortexProviderFactory(def) + case AgentCursor: + return newCursorProviderFactory(def) case AgentDeepSeekTUI: return newDeepSeekTUIProviderFactory(def) case AgentIflow: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 40a1bd5d9..0bbc5c1ce 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -23,7 +23,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentCopilot: ProviderMigrationLegacyOnly, AgentGemini: ProviderMigrationLegacyOnly, AgentOpenHands: ProviderMigrationProviderAuthoritative, - AgentCursor: ProviderMigrationLegacyOnly, + AgentCursor: ProviderMigrationProviderAuthoritative, AgentMiMoCode: ProviderMigrationProviderAuthoritative, AgentOpenCode: ProviderMigrationProviderAuthoritative, AgentKilo: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index de49bfa1f..d95a84577 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -29,6 +29,11 @@ var legacyEntrypointVerb = regexp.MustCompile(`^(Discover|Find|Parse|Process|Cla var providerNeutralEntrypoints = map[string]bool{ "ParseVirtualSourcePath": true, "ParseVirtualSourcePathForBase": true, + // ParseCursorTranscriptRelPath is a pure rel-path shape validator with no + // filesystem or provider state. It is shared by the engine's path + // classification/enrichment and the Cursor provider's source set, so it + // stays a free helper rather than moving onto the provider. + "ParseCursorTranscriptRelPath": true, } // pendingShimProviderFiles are provider files whose behavior has not yet been @@ -48,7 +53,6 @@ var pendingShimProviderFiles = map[string]bool{ "codex_provider.go": true, "copilot_provider.go": true, "cowork_provider.go": true, - "cursor_provider.go": true, "db_backed_provider.go": true, "gemini_provider.go": true, "hermes_provider.go": true, diff --git a/internal/parser/types.go b/internal/parser/types.go index 6b1a54ba4..42fdc0538 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -216,15 +216,13 @@ var Registry = []AgentDef{ ShallowWatch: true, }, { - Type: AgentCursor, - DisplayName: "Cursor", - EnvVar: "CURSOR_PROJECTS_DIR", - ConfigKey: "cursor_project_dirs", - DefaultDirs: []string{".cursor/projects"}, - IDPrefix: "cursor:", - FileBased: true, - DiscoverFunc: DiscoverCursorSessions, - FindSourceFunc: FindCursorSourceFile, + Type: AgentCursor, + DisplayName: "Cursor", + EnvVar: "CURSOR_PROJECTS_DIR", + ConfigKey: "cursor_project_dirs", + DefaultDirs: []string{".cursor/projects"}, + IDPrefix: "cursor:", + FileBased: true, }, { Type: AgentAmp, diff --git a/internal/sync/engine.go b/internal/sync/engine.go index a7643cebb..00409aa87 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -907,20 +907,6 @@ func isUnder(dir, path string) (string, bool) { return rel, true } -// findContainingDir returns the first dir from dirs that is a -// parent of path, or "" if none match. -func findContainingDir(dirs []string, path string) string { - for _, d := range dirs { - if d == "" { - continue - } - if _, ok := isUnder(d, path); ok { - return d - } - } - return "" -} - // classifyContainerPath runs the container- and SQLite-style classifiers that // resolve a path whether or not it currently exists on disk (Kiro, Zed, // Shelley, and Vibe). Split out of classifyOnePath to keep that function @@ -1139,30 +1125,6 @@ func (e *Engine) classifyOnePath( } } - // Cursor: - // //agent-transcripts/.{txt,jsonl} - // //agent-transcripts//.{txt,jsonl} - for _, cursorDir := range e.agentDirs[parser.AgentCursor] { - if cursorDir == "" { - continue - } - if rel, ok := isUnder(cursorDir, path); ok { - projectDir, ok := parser.ParseCursorTranscriptRelPath(rel) - if !ok { - continue - } - project := parser.DecodeCursorProjectDir(projectDir) - if project == "" { - project = "unknown" - } - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentCursor, - }, true - } - } - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { @@ -4072,8 +4034,6 @@ func (e *Engine) processFile( res = e.processReasonix(file, info) case parser.AgentGemini: res = e.processGemini(file, info) - case parser.AgentCursor: - res = e.processCursor(file, info) case parser.AgentVSCodeCopilot: res = e.processVSCodeCopilot(file, info) case parser.AgentVSCopilot: @@ -6291,61 +6251,6 @@ func (e *Engine) processAntigravityCLI( } } -func (e *Engine) processCursor( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Skip .txt if a sibling .jsonl exists — .jsonl is the - // richer format and takes precedence. - if stem, ok := strings.CutSuffix(file.Path, ".txt"); ok { - if parser.IsRegularFile(stem + ".jsonl") { - return processResult{skip: true} - } - } - - sessionID := parser.CursorSessionID(file.Path) - - if e.shouldSkipFile(sessionID, info) { - return processResult{skip: true} - } - - // Re-validate containment immediately before parsing to - // close the TOCTOU window between discovery and read. - // The parser opens with O_NOFOLLOW (rejecting symlinked - // final components), and this check catches parent - // directory swaps. - if root := findContainingDir( - e.agentDirs[parser.AgentCursor], file.Path, - ); root != "" { - if err := validateCursorContainment( - root, file.Path, - ); err != nil { - return processResult{ - err: fmt.Errorf( - "containment check: %w", err, - ), - } - } - } - - sess, msgs, err := parser.ParseCursorSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - // Hash is computed inside ParseCursorSession from the - // already-read data to avoid re-opening the file by path. - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func commandCodeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { size := info.Size() mtime := info.ModTime().UnixNano() @@ -6359,31 +6264,6 @@ func commandCodeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -// validateCursorContainment re-resolves both root and path -// to verify the file still resides within the cursor projects -// directory. Returns an error if containment fails. -func validateCursorContainment( - cursorDir, path string, -) error { - resolvedRoot, err := filepath.EvalSymlinks(cursorDir) - if err != nil { - return fmt.Errorf("resolve root: %w", err) - } - resolvedPath, err := filepath.EvalSymlinks(path) - if err != nil { - return fmt.Errorf("resolve path: %w", err) - } - rel, err := filepath.Rel(resolvedRoot, resolvedPath) - sep := string(filepath.Separator) - if err != nil || rel == ".." || - strings.HasPrefix(rel, ".."+sep) { - return fmt.Errorf( - "%s escapes %s", path, cursorDir, - ) - } - return nil -} - // computeFinalStreak counts trailing consecutive failures // from the end of the tool call list. func computeFinalStreak(calls []signals.ToolCallRow) int { From db37e5fba16988141a9e1610b3dd0e14e1387e0b Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Mon, 22 Jun 2026 22:10:38 -0400 Subject: [PATCH 03/11] feat(parser): migrate vibe provider Vibe stores transcript content in messages.jsonl while canonical session identity, title, timestamps, model, and usage can live in a sibling meta.json. Moving it behind a concrete provider keeps that companion relationship explicit at the provider boundary.\n\nThe provider preserves recursive session discovery, symlinked session directories, raw and full ID lookup through meta.json, meta-sidecar changed-path classification, effective size and mtime freshness, transcript hashing, fallback-ID exclusion, and parser output normalization through the existing Vibe parser wrapper. fix(parser): classify removed vibe transcripts Vibe source events need to keep working after the primary messages.jsonl has already disappeared. Routing deletion and rename-style events through the existing file check meant the watcher could ignore the exact event that should refresh or remove the stored session. Synthesize source refs only for missing-path removal semantics, keep ordinary lookups existence-checked, and pin the intentionally shallow session directory layout in provider tests. This lets the Vibe provider enter shadow comparison as a real migration step. Validation: go test -tags "fts5" ./internal/parser -run 'Test(VibeProvider|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare vibe shadow parity Vibe is shadow-compared on this branch, so add source-level migration coverage that compares provider observation with ParseVibeSessionWrapper. The test includes meta.json canonical ID promotion, provider-adjusted fingerprint metadata, usage events, and excluded fallback IDs so reviewers can see the migration preserves the composite source behavior. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesVibeLegacyParser|TestVibeProvider|TestParseVibe|TestClassifyOnePath_Vibe|TestSyncVibe|TestSourceMtimeVibe|TestProcessVibe' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... test(sync): cover vibe provider usage parity Roborev job 2711 caught that the Vibe shadow parity fixture compared empty usage slices, so it could not detect regressions in aggregate usage emission. Seed the fixture with real Vibe metadata fields for active model and nonzero stats, then assert both legacy and provider paths emit usage before comparing them. Validation: go test -tags "fts5" ./internal/sync -run TestObserveProviderSourceMatchesVibeLegacyParser -count=1; go fmt ./...; go vet ./...; git diff --check refactor(parser): fold vibe into provider Move Vibe source discovery, lookup, and parse ownership onto the concrete vibeProvider and delete the package-level DiscoverVibeSessions, FindVibeSourceFile, and ParseVibeSessionWrapper free functions. Discovery and find-source bodies now live as provider-owned helpers (discoverSessionPaths, findSourceFile) on the vibe source set, the isVibeMessagesFile guard moves to the provider file, and the messages.jsonl parser becomes the provider parseVibeResult/parseSession methods. Make Vibe provider-authoritative and drop its legacy sync dispatch: the classifyContainerPath classifyVibePath call and method, the processFile case arm, the processVibe method, and its now-orphaned isSessionBlocked and isSessionTrashed helpers. vibeEffectiveInfo stays as a shared composite-mtime helper used by the skip-cache and fingerprint paths. Because a provider has no database handle, the engine reproduces Vibe's DB-aware, file-path-scoped bookkeeping in applyProviderFilePathPolicies for single-session-per-file providers: stale stored IDs at the same source path are excluded, and a freshly parsed row is suppressed when the user already removed (trashed or deleted) the session occupying that path, so a canonical ID flipping between the meta.json session_id and the directory-name fallback no longer resurrects a hidden session. This is a no-op for stable-ID providers and skipped for multi-session sources. Drop the Vibe AgentDef DiscoverFunc/FindSourceFunc hooks, remove it from the pending shim scan list, replace the shadow-baseline test with provider API coverage plus a guard that the legacy entrypoints stay gone, and route the package and engine tests through the provider methods. The obsolete classifyOnePath Vibe test is removed; the provider's SourcesForChangedPath coverage replaces it. --- internal/parser/discovery.go | 78 ------ internal/parser/discovery_test.go | 4 +- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/provider_shim_scan_test.go | 1 - internal/parser/types.go | 16 +- internal/parser/vibe.go | 18 +- internal/parser/vibe_provider.go | 304 +++++++++++++++++++++ internal/parser/vibe_provider_test.go | 297 ++++++++++++++++++++ internal/parser/vibe_test.go | 98 +++++-- internal/sync/classify_vibe_test.go | 92 ------- internal/sync/engine.go | 265 ++++++++---------- internal/sync/engine_test.go | 50 ++-- 13 files changed, 837 insertions(+), 390 deletions(-) create mode 100644 internal/parser/vibe_provider.go create mode 100644 internal/parser/vibe_provider_test.go delete mode 100644 internal/sync/classify_vibe_test.go diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index d45ddbc6c..df4c88b48 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -1571,81 +1571,3 @@ func extractIflowBaseSessionID(sessionID string) string { // If we didn't find 5 hyphens, this is not a fork ID return sessionID } - -// DiscoverVibeSessions finds all Vibe session files under the given root directory. -// Vibe stores sessions in: ~/.vibe/logs/session/session_YYYYMMDD_HHMMSS_uuid/ -// Each session directory contains messages.jsonl -func DiscoverVibeSessions(root string) []DiscoveredFile { - var results []DiscoveredFile - - entries, err := os.ReadDir(root) - if err != nil { - return results - } - - for _, entry := range entries { - if !isDirOrSymlink(entry, root) { - continue - } - - // Vibe session directories match pattern: session_YYYYMMDD_HHMMSS_uuid - // The uuid part can contain hyphens - if !strings.HasPrefix(entry.Name(), "session_") || !strings.Contains(entry.Name(), "_") { - continue - } - - sessionDir := filepath.Join(root, entry.Name()) - messagesPath := filepath.Join(sessionDir, "messages.jsonl") - - if info, err := os.Stat(messagesPath); err == nil && !info.IsDir() { - results = append(results, DiscoveredFile{ - Path: messagesPath, - Agent: AgentVibe, - Project: entry.Name(), - }) - } - } - - return results -} - -// FindVibeSourceFile locates a specific Vibe session file by ID. The ID is the -// session_id recorded in meta.json (a uuid), which usually differs from the -// session directory name. Sessions without meta.json fall back to the directory -// name, so a direct path is tried first before scanning meta.json files. -func FindVibeSourceFile(root, sessionID string) string { - // Fast path: sessionID is the directory name (no-meta fallback). - if messagesPath := filepath.Join(root, sessionID, "messages.jsonl"); isVibeMessagesFile(messagesPath) { - return messagesPath - } - - // Otherwise sessionID is a meta.json session_id; scan session - // directories and match on their recorded session_id. - entries, err := os.ReadDir(root) - if err != nil { - return "" - } - for _, entry := range entries { - if !isDirOrSymlink(entry, root) || !strings.HasPrefix(entry.Name(), "session_") { - continue - } - messagesPath := filepath.Join(root, entry.Name(), "messages.jsonl") - if !isVibeMessagesFile(messagesPath) { - continue - } - metaPath := filepath.Join(root, entry.Name(), "meta.json") - if meta, err := parseVibeMetadata(metaPath); err == nil && meta.SessionID == sessionID { - return messagesPath - } - } - return "" -} - -// isVibeMessagesFile reports whether path is an existing regular file. -func isVibeMessagesFile(path string) bool { - info, err := os.Stat(path) - if err != nil || info == nil { - return false - } - return !info.IsDir() -} diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 61019154a..9acad18e2 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -1424,7 +1424,7 @@ func TestIsPiSessionFile(t *testing.T) { func TestDiscoverVibeSessionsIntegration(t *testing.T) { // Test discovery with testdata - files := DiscoverVibeSessions("testdata/vibe") + files := discoverVibeTestSessions(t, "testdata/vibe") // Should find all session directories with messages.jsonl require.NotEmpty(t, files) @@ -1442,7 +1442,7 @@ func TestDiscoverVibeSessionsIntegration(t *testing.T) { func TestFindVibeSourceFileIntegration(t *testing.T) { // Test with actual testdata sessionID := "session_basic" - result := FindVibeSourceFile("testdata/vibe", sessionID) + result := findVibeTestSourceFile(t, "testdata/vibe", sessionID) expected := filepath.Join("testdata", "vibe", sessionID, "messages.jsonl") assert.Equal(t, expected, result) diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 200b720ef..e01b866c8 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -381,6 +381,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newQwenPawProviderFactory(def) case AgentQClaw: return newQClawProviderFactory(def) + case AgentVibe: + return newVibeProviderFactory(def) case AgentWorkBuddy: return newWorkBuddyProviderFactory(def) case AgentQwen: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 0bbc5c1ce..370aa91f1 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -53,7 +53,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentPositron: ProviderMigrationLegacyOnly, AgentAntigravity: ProviderMigrationLegacyOnly, AgentAntigravityCLI: ProviderMigrationLegacyOnly, - AgentVibe: ProviderMigrationLegacyOnly, + AgentVibe: ProviderMigrationProviderAuthoritative, AgentZed: ProviderMigrationLegacyOnly, AgentQwenPaw: ProviderMigrationProviderAuthoritative, AgentGptme: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index d95a84577..1e774457f 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -62,7 +62,6 @@ var pendingShimProviderFiles = map[string]bool{ "opencode_provider.go": true, "positron_provider.go": true, "shelley_provider.go": true, - "vibe_provider.go": true, "visualstudio_copilot_provider.go": true, "vscode_copilot_provider.go": true, "zed_provider.go": true, diff --git a/internal/parser/types.go b/internal/parser/types.go index 42fdc0538..3ec9ebd00 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -574,15 +574,13 @@ var Registry = []AgentDef{ FindSourceFunc: FindShelleySourceFile, }, { - Type: AgentVibe, - DisplayName: "Mistral Vibe", - EnvVar: "VIBE_SESSIONS_DIR", - ConfigKey: "vibe_session_dirs", - DefaultDirs: []string{".vibe/logs/session"}, - IDPrefix: "vibe:", - FileBased: true, - DiscoverFunc: DiscoverVibeSessions, - FindSourceFunc: FindVibeSourceFile, + Type: AgentVibe, + DisplayName: "Mistral Vibe", + EnvVar: "VIBE_SESSIONS_DIR", + ConfigKey: "vibe_session_dirs", + DefaultDirs: []string{".vibe/logs/session"}, + IDPrefix: "vibe:", + FileBased: true, }, { // Aider has no central session store. It writes one Markdown diff --git a/internal/parser/vibe.go b/internal/parser/vibe.go index 49aad1018..4f16f325a 100644 --- a/internal/parser/vibe.go +++ b/internal/parser/vibe.go @@ -67,8 +67,10 @@ type VibeStats struct { LastTurnTotalTokens int `json:"last_turn_total_tokens"` } -// ParseVibeSession parses a Mistral Vibe messages.jsonl file -func ParseVibeSession(path string, fileInfo FileInfo) (ParseResult, error) { +// parseVibeResult parses a Mistral Vibe messages.jsonl file into a ParseResult. +// It owns the on-disk shape (messages.jsonl plus the sibling meta.json) for the +// Vibe provider; the package-level entrypoint was folded onto the provider. +func parseVibeResultFile(path string, fileInfo FileInfo) (ParseResult, error) { result := ParseResult{ Session: ParsedSession{ Agent: AgentVibe, @@ -386,11 +388,11 @@ func vibeToolArguments(args json.RawMessage) string { return string(args) } -// ParseVibeSessionWrapper wraps ParseVibeSession and returns the session, -// messages, and usage events in the shape the sync engine consumes: -// (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error). It stats the -// file to build FileInfo and optionally overrides the project and machine. -func ParseVibeSessionWrapper(path, project, machine string) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { +// parseSession parses a Vibe session at path and returns the session, messages, +// and usage events in the shape the provider consumes: (*ParsedSession, +// []ParsedMessage, []ParsedUsageEvent, error). It stats the file to build +// FileInfo and optionally overrides the project and machine. +func parseVibeSession(path, project, machine string) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) if err != nil { return nil, nil, nil, fmt.Errorf("stat %s: %w", path, err) @@ -402,7 +404,7 @@ func ParseVibeSessionWrapper(path, project, machine string) (*ParsedSession, []P Mtime: info.ModTime().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeResultFile(path, fileInfo) if err != nil { return nil, nil, nil, err } diff --git a/internal/parser/vibe_provider.go b/internal/parser/vibe_provider.go new file mode 100644 index 000000000..29ad56f2c --- /dev/null +++ b/internal/parser/vibe_provider.go @@ -0,0 +1,304 @@ +package parser + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// Vibe stores each session in /session___/, with a +// messages.jsonl transcript and a sibling meta.json. It is a single-file +// provider: one transcript parses into one session, with a composite fingerprint +// folding in meta.json and a fallback-ID exclusion when meta.json later supplies +// a different session_id. All behavior is wired into the shared single-file base +// via options. +func newVibeProviderFactory(def AgentDef) ProviderFactory { + return NewSingleFileProviderFactory( + def, + vibeProviderCapabilities(), + func(cfg ProviderConfig) singleFileSourceSet { + return NewSingleFileSourceSet( + AgentVibe, + cfg.Roots, + WithFileDiscovery(vibeDiscoverFiles), + WithFileWatchRoots(vibeWatchRoots), + WithFileChangedPathClassifier(vibeClassifyPath), + WithFileLookup(vibeFindFile), + WithFileFingerprint(vibeFingerprintSource), + WithFileParse(vibeParseFile), + ) + }, + ) +} + +func vibeDiscoverFiles(root string) []singleFileMatch { + var out []singleFileMatch + for _, path := range discoverVibeSessionPaths(root) { + if match, ok := vibeStrictMatch(root, path); ok { + out = append(out, match) + } + } + return out +} + +// discoverVibeSessionPaths finds all Vibe messages.jsonl paths under root. +// Symlinked session directories are followed (matching the watcher), but only +// session_-prefixed directories that hold a regular messages.jsonl qualify. +func discoverVibeSessionPaths(root string) []string { + entries, err := os.ReadDir(root) + if err != nil { + return nil + } + var paths []string + for _, entry := range entries { + if !isDirOrSymlink(entry, root) { + continue + } + if !isVibeSessionDirName(entry.Name()) { + continue + } + messagesPath := filepath.Join(root, entry.Name(), "messages.jsonl") + if isVibeMessagesFile(messagesPath) { + paths = append(paths, messagesPath) + } + } + return paths +} + +func vibeWatchRoots(roots []string) []WatchRoot { + out := make([]WatchRoot, 0, len(roots)) + for _, root := range roots { + out = append(out, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"messages.jsonl", "meta.json"}, + DebounceKey: string(AgentVibe) + ":sessions:" + root, + }) + } + return out +} + +// vibeClassifyPath maps a messages.jsonl or meta.json event path to its session +// transcript. Under allowMissing a transcript that does not (yet) exist still +// classifies via the session directory name, so a metadata-only event or a +// deletion still resolves. +func vibeClassifyPath( + root, path string, allowMissing bool, +) (singleFileMatch, bool) { + rel, ok := vibeRelPath(root, path) + if !ok { + return singleFileMatch{}, false + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) != 2 || !isVibeSessionDirName(parts[0]) { + return singleFileMatch{}, false + } + messagesPath := filepath.Join(filepath.Clean(root), parts[0], "messages.jsonl") + switch parts[1] { + case "messages.jsonl": + if allowMissing { + return vibeMatchFromSessionDir(parts[0], messagesPath) + } + return vibeStrictMatch(root, messagesPath) + case "meta.json": + if allowMissing && !isVibeMessagesFile(messagesPath) { + return vibeMatchFromSessionDir(parts[0], messagesPath) + } + return vibeStrictMatch(root, messagesPath) + default: + return singleFileMatch{}, false + } +} + +// vibeStrictMatch requires the messages.jsonl to exist as a regular file under a +// session directory before classifying it. +func vibeStrictMatch(root, path string) (singleFileMatch, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !isVibeMessagesFile(path) { + return singleFileMatch{}, false + } + rel, ok := vibeRelPath(root, path) + if !ok { + return singleFileMatch{}, false + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) != 2 || !isVibeSessionDirName(parts[0]) || + parts[1] != "messages.jsonl" { + return singleFileMatch{}, false + } + return vibeMatchFromSessionDir(parts[0], path) +} + +func vibeMatchFromSessionDir(sessionDir, path string) (singleFileMatch, bool) { + if !isVibeSessionDirName(sessionDir) { + return singleFileMatch{}, false + } + return singleFileMatch{Path: path, ProjectHint: sessionDir}, true +} + +func vibeFindFile(root, rawID string) (singleFileMatch, bool) { + path := findVibeSourceFile(root, rawID) + if path == "" { + return singleFileMatch{}, false + } + return vibeStrictMatch(root, path) +} + +// findVibeSourceFile locates a Vibe session by ID under root. The ID is the +// session_id from meta.json (a uuid), which usually differs from the session +// directory name, so a direct directory-name path is tried before scanning +// meta.json files. +func findVibeSourceFile(root, sessionID string) string { + if messagesPath := filepath.Join( + root, sessionID, "messages.jsonl", + ); isVibeMessagesFile(messagesPath) { + return messagesPath + } + entries, err := os.ReadDir(root) + if err != nil { + return "" + } + for _, entry := range entries { + if !isDirOrSymlink(entry, root) || + !strings.HasPrefix(entry.Name(), "session_") { + continue + } + messagesPath := filepath.Join(root, entry.Name(), "messages.jsonl") + if !isVibeMessagesFile(messagesPath) { + continue + } + metaPath := filepath.Join(root, entry.Name(), "meta.json") + if meta, err := parseVibeMetadata(metaPath); err == nil && + meta.SessionID == sessionID { + return messagesPath + } + } + return "" +} + +// isVibeMessagesFile reports whether path is an existing regular file. +func isVibeMessagesFile(path string) bool { + info, err := os.Stat(path) + if err != nil || info == nil { + return false + } + return !info.IsDir() +} + +func vibeFingerprintSource(src singleFileSource) (SourceFingerprint, error) { + info, err := os.Stat(src.Path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", src.Path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf( + "stat %s: source is a directory", src.Path, + ) + } + size := info.Size() + mtime := info.ModTime().UnixNano() + metaPath := vibeMetaPath(src.Path) + if metaInfo, err := os.Stat(metaPath); err == nil { + size += metaInfo.Size() + if metaMTime := metaInfo.ModTime().UnixNano(); metaMTime > mtime { + mtime = metaMTime + } + } + hash, err := hashJSONLSourceFile(src.Path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Size: size, + MTimeNS: mtime, + Hash: hash, + }, nil +} + +func vibeParseFile( + src singleFileSource, req ParseRequest, +) ([]ParseResult, []string, error) { + sess, msgs, usageEvents, err := parseVibeSession(src.Path, "", req.Machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + excluded := vibeProviderExcludedSessionIDs(src.Path, sess.ID) + return []ParseResult{{ + Session: *sess, + Messages: msgs, + UsageEvents: usageEvents, + }}, excluded, nil +} + +func vibeRelPath(root, path string) (string, bool) { + rel, err := filepath.Rel(filepath.Clean(root), filepath.Clean(path)) + if err != nil || rel == "." || rel == "" { + return "", false + } + if strings.HasPrefix(rel, ".."+string(filepath.Separator)) || rel == ".." { + return "", false + } + for part := range strings.SplitSeq(rel, string(filepath.Separator)) { + if part == "" || part == "." || part == ".." { + return "", false + } + } + return rel, true +} + +func isVibeSessionDirName(name string) bool { + return strings.HasPrefix(name, "session_") && strings.Contains(name, "_") +} + +func vibeMetaPath(messagesPath string) string { + return filepath.Join(filepath.Dir(messagesPath), "meta.json") +} + +func vibeProviderExcludedSessionIDs(path, currentID string) []string { + fallbackID := string(AgentVibe) + ":" + filepath.Base(filepath.Dir(path)) + if currentID == "" || currentID == fallbackID { + return nil + } + return []string{fallbackID} +} + +func vibeProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilitySupported, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Relationships: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/vibe_provider_test.go b/internal/parser/vibe_provider_test.go new file mode 100644 index 000000000..8dbae35f2 --- /dev/null +++ b/internal/parser/vibe_provider_test.go @@ -0,0 +1,297 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestVibeProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentVibe) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestVibeProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionDir := "session_20260613_123456_abc123def" + messagesPath := filepath.Join(root, sessionDir, "messages.jsonl") + metaPath := filepath.Join(root, sessionDir, "meta.json") + writeSourceFile(t, messagesPath, vibeProviderMessagesFixture("provider question")) + writeSourceFile(t, metaPath, vibeProviderMetaFixture("uuid-1234", "Provider title")) + writeSourceFile(t, filepath.Join(root, "scratch", "messages.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "session_missing_messages", "meta.json"), "{}\n") + nestedPath := filepath.Join(root, "nested", "session_20260613_123456_nested", "messages.jsonl") + writeSourceFile(t, nestedPath, vibeProviderMessagesFixture("nested")) + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"messages.jsonl", "meta.json"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + source := discovered[0] + assert.Equal(t, AgentVibe, source.Provider) + assert.Equal(t, messagesPath, source.DisplayPath) + assert.Equal(t, messagesPath, source.FingerprintKey) + assert.Equal(t, sessionDir, source.ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~vibe:uuid-1234", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, messagesPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionDir, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, messagesPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: messagesPath, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, messagesPath, found.DisplayPath) + + messageInfo, err := os.Stat(messagesPath) + require.NoError(t, err) + metaInfo, err := os.Stat(metaPath) + require.NoError(t, err) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, messagesPath, fingerprint.Key) + assert.Equal(t, messageInfo.Size()+metaInfo.Size(), fingerprint.Size) + assert.Equal( + t, + max(messageInfo.ModTime().UnixNano(), metaInfo.ModTime().UnixNano()), + fingerprint.MTimeNS, + ) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + want string + }{ + {name: "messages", path: messagesPath, want: messagesPath}, + {name: "meta sidecar", path: metaPath, want: messagesPath}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tc.path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tc.want, changed[0].DisplayPath) + }) + } + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, messagesPath, changed[0].DisplayPath) + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "scratch", "messages.jsonl"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + nested, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: nestedPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + assert.Empty(t, nested) + + require.NoError(t, os.Remove(messagesPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: messagesPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, messagesPath, changed[0].DisplayPath) + assert.Equal(t, sessionDir, changed[0].ProjectHint) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: messagesPath, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestVibeProviderDiscoversSymlinkedSessionDirectory(t *testing.T) { + root := t.TempDir() + targetRoot := t.TempDir() + sessionDir := "session_20260613_123456_symlinked" + targetDir := filepath.Join(targetRoot, sessionDir) + sourceDir := filepath.Join(root, sessionDir) + sourcePath := filepath.Join(sourceDir, "messages.jsonl") + writeSourceFile( + t, + filepath.Join(targetDir, "messages.jsonl"), + vibeProviderMessagesFixture("from symlink"), + ) + if err := os.Symlink(targetDir, sourceDir); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionDir, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) +} + +func TestVibeProviderParse(t *testing.T) { + root := t.TempDir() + sessionDir := "session_20260613_123456_abc123def" + messagesPath := filepath.Join(root, sessionDir, "messages.jsonl") + metaPath := filepath.Join(root, sessionDir, "meta.json") + writeSourceFile(t, messagesPath, vibeProviderMessagesFixture("parse question")) + writeSourceFile(t, metaPath, vibeProviderMetaFixture("uuid-1234", "Provider title")) + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "vibe:uuid-1234", result.Result.Session.ID) + assert.Equal(t, AgentVibe, result.Result.Session.Agent) + assert.Equal(t, "vibe", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, messagesPath, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Size, result.Result.Session.File.Size) + assert.Equal(t, fingerprint.MTimeNS, result.Result.Session.File.Mtime) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "Provider title", result.Result.Session.SessionName) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Contains(t, outcome.ExcludedSessionIDs, "vibe:"+sessionDir) + assert.Len(t, result.Result.Messages, 2) +} + +// TestVibeProviderParseEmitsUsageEvents locks in the usage-event and +// excluded-ID behavior the deleted shadow-baseline test asserted: when +// meta.json carries a model and token stats, Parse must surface a single +// session-level usage event and exclude the directory-name fallback ID. +func TestVibeProviderParseEmitsUsageEvents(t *testing.T) { + root := t.TempDir() + sessionDir := "session_20260616_083518_abc123" + sessionID := "uuid-1234" + messagesPath := filepath.Join(root, sessionDir, "messages.jsonl") + metaPath := filepath.Join(root, sessionDir, "meta.json") + writeSourceFile(t, messagesPath, vibeProviderMessagesFixture("provider question")) + writeSourceFile(t, metaPath, vibeProviderMetaWithStatsFixture(sessionID, "Provider title")) + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, "vibe:"+sessionID, result.Result.Session.ID) + assert.Equal(t, []string{"vibe:" + sessionDir}, outcome.ExcludedSessionIDs) + + require.Len(t, result.Result.UsageEvents, 1) + usageEvent := result.Result.UsageEvents[0] + assert.Equal(t, "vibe:"+sessionID, usageEvent.SessionID) + assert.Equal(t, "mistral-medium-3.5", usageEvent.Model) + assert.Equal(t, 100, usageEvent.InputTokens) + assert.Equal(t, 40, usageEvent.OutputTokens) +} + +func vibeProviderMessagesFixture(firstMessage string) string { + return `{"role":"user","content":"` + firstMessage + `"}` + "\n" + + `{"role":"assistant","content":"Done."}` + "\n" +} + +func vibeProviderMetaFixture(sessionID, title string) string { + return `{"session_id":"` + sessionID + `","title":"` + title + `"}` +} + +func vibeProviderMetaWithStatsFixture(sessionID, title string) string { + return `{"session_id":"` + sessionID + `","title":"` + title + `",` + + `"config":{"active_model":"mistral-medium-3.5"},` + + `"stats":{"session_prompt_tokens":100,"session_completion_tokens":40}}` +} diff --git a/internal/parser/vibe_test.go b/internal/parser/vibe_test.go index 7d7af4630..89b3b6085 100644 --- a/internal/parser/vibe_test.go +++ b/internal/parser/vibe_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "encoding/json" "os" "path/filepath" @@ -11,6 +12,55 @@ import ( "github.com/stretchr/testify/require" ) +// newVibeTestProvider builds a Vibe provider for the given roots so package +// tests can exercise discovery through the Provider interface. +func newVibeTestProvider(t *testing.T, roots ...string) Provider { + t.Helper() + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + return provider +} + +// parseVibeTestSession parses a Vibe messages.jsonl file at path into a +// ParseResult through the folded free function, replacing the removed +// package-level ParseVibeSession entrypoint. +func parseVibeTestSession(t *testing.T, path string, fileInfo FileInfo) (ParseResult, error) { + t.Helper() + return parseVibeResultFile(path, fileInfo) +} + +// discoverVibeTestSessions discovers Vibe sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path + project) the +// tests assert against. +func discoverVibeTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newVibeTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Project: source.ProjectHint, + Agent: AgentVibe, + }) + } + return files +} + +// findVibeTestSourceFile resolves a Vibe session ID to a messages.jsonl path, +// replacing the removed FindVibeSourceFile. +func findVibeTestSourceFile(t *testing.T, root, sessionID string) string { + t.Helper() + return findVibeSourceFile(root, sessionID) +} + func TestDiscoverVibeSessions(t *testing.T) { tmpDir := t.TempDir() @@ -29,7 +79,7 @@ func TestDiscoverVibeSessions(t *testing.T) { require.NoError(t, os.MkdirAll(otherDir, 0755)) // Run discovery - discovered := DiscoverVibeSessions(tmpDir) + discovered := discoverVibeTestSessions(t, tmpDir) // Verify results require.Len(t, discovered, 1) @@ -53,7 +103,7 @@ func TestDiscoverVibeSessionsMultiple(t *testing.T) { require.NoError(t, os.MkdirAll(invalidDir, 0755)) // Run discovery - discovered := DiscoverVibeSessions(tmpDir) + discovered := discoverVibeTestSessions(t, tmpDir) // Verify results - should find only 3 valid sessions require.Len(t, discovered, 3) @@ -69,7 +119,7 @@ func TestDiscoverVibeSessionsEmptyDir(t *testing.T) { tmpDir := t.TempDir() // Run discovery on empty directory - files := DiscoverVibeSessions(tmpDir) + files := discoverVibeTestSessions(t, tmpDir) // Should return empty slice assert.Len(t, files, 0) @@ -77,7 +127,7 @@ func TestDiscoverVibeSessionsEmptyDir(t *testing.T) { func TestDiscoverVibeSessionsNonExistentDir(t *testing.T) { // Run discovery on non-existent directory - files := DiscoverVibeSessions("/nonexistent/path") + files := discoverVibeTestSessions(t, "/nonexistent/path") // Should return empty slice without error assert.Len(t, files, 0) @@ -92,7 +142,7 @@ func TestFindVibeSourceFile(t *testing.T) { // When the ID matches the directory name (no meta.json), the file is // resolved directly. - result := FindVibeSourceFile(root, sessionID) + result := findVibeTestSourceFile(t, root, sessionID) expected := filepath.Join(root, sessionID, "messages.jsonl") assert.Equal(t, expected, result) } @@ -104,7 +154,7 @@ func TestFindVibeSourceFileWithSpecialChars(t *testing.T) { filepath.Join(sessionID, "messages.jsonl"): "test", }) - result := FindVibeSourceFile(root, sessionID) + result := findVibeTestSourceFile(t, root, sessionID) expected := filepath.Join(root, sessionID, "messages.jsonl") assert.Equal(t, expected, result) } @@ -119,12 +169,12 @@ func TestFindVibeSourceFileByMetaSessionID(t *testing.T) { // The canonical ID is the meta.json session_id, which differs from the // directory name; the lookup must scan meta.json to resolve it. - result := FindVibeSourceFile(root, "uuid-1234") + result := findVibeTestSourceFile(t, root, "uuid-1234") expected := filepath.Join(root, dirName, "messages.jsonl") assert.Equal(t, expected, result) // An unknown ID resolves to nothing. - assert.Empty(t, FindVibeSourceFile(root, "does-not-exist")) + assert.Empty(t, findVibeTestSourceFile(t, root, "does-not-exist")) } func TestParseVibeSession(t *testing.T) { @@ -134,7 +184,7 @@ func TestParseVibeSession(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Verify session metadata @@ -192,7 +242,7 @@ func TestParseVibeSessionWithTools(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Verify messages @@ -254,7 +304,7 @@ func TestParseVibeSessionEmpty(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Empty file should have no messages @@ -281,7 +331,7 @@ func TestParseVibeSessionMalformedLines(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Should have parsed 2 valid messages and counted 1 malformed line @@ -307,7 +357,7 @@ func TestParseVibeSessionWithoutMeta(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Should have parsed messages but no metadata from meta.json. The ID @@ -358,7 +408,7 @@ func TestParseVibeSessionEmptyStats(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Should have parsed messages and metadata but no usage events due to empty stats @@ -406,7 +456,7 @@ func TestParseVibeSessionModelFromMessages(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Should have parsed messages and metadata @@ -460,7 +510,7 @@ func TestParseVibeSessionModelFromConfig(t *testing.T) { path := filepath.Join(tmpDir, "session_test", "messages.jsonl") fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) require.Len(t, result.UsageEvents, 1) @@ -488,7 +538,7 @@ func TestParseVibeSessionInjectedUserExcluded(t *testing.T) { path := filepath.Join(tmpDir, "session_test", "messages.jsonl") fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) require.Len(t, result.Messages, 3) @@ -507,7 +557,7 @@ func TestParseVibeSessionToolResultNotCountedAsUser(t *testing.T) { path := "testdata/vibe/session_with_tools/messages.jsonl" fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) assert.Equal(t, 1, result.Session.UserMessageCount) @@ -533,7 +583,7 @@ func TestParseVibeSessionMalformedMetaRecoversID(t *testing.T) { path := filepath.Join(tmpDir, "session_dir", "messages.jsonl") fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) assert.Equal(t, "vibe:uuid-canonical-1", result.Session.ID) @@ -560,7 +610,7 @@ func TestParseVibeSessionCorruptMetaReturnsError(t *testing.T) { path := filepath.Join(tmpDir, "session_dir", "messages.jsonl") fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - _, err := ParseVibeSession(path, fileInfo) + _, err := parseVibeTestSession(t, path, fileInfo) require.Error(t, err) assert.Contains(t, err.Error(), "meta.json") } @@ -575,8 +625,10 @@ func TestVibeAgentByType(t *testing.T) { assert.Equal(t, "vibe_session_dirs", def.ConfigKey) assert.Equal(t, "vibe:", def.IDPrefix) assert.True(t, def.FileBased) - assert.NotNil(t, def.DiscoverFunc) - assert.NotNil(t, def.FindSourceFunc) + // Vibe is provider-authoritative: discovery and source lookup live on the + // vibeProvider, not on legacy AgentDef hooks. + assert.Nil(t, def.DiscoverFunc) + assert.Nil(t, def.FindSourceFunc) } func TestVibeAgentByPrefix(t *testing.T) { @@ -642,7 +694,7 @@ func TestParseRealVibeSession(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(messagesPath, fileInfo) + result, err := parseVibeTestSession(t, messagesPath, fileInfo) require.NoError(t, err) // Verify basic session metadata diff --git a/internal/sync/classify_vibe_test.go b/internal/sync/classify_vibe_test.go deleted file mode 100644 index 5b5cb7602..000000000 --- a/internal/sync/classify_vibe_test.go +++ /dev/null @@ -1,92 +0,0 @@ -package sync - -import ( - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.kenn.io/agentsview/internal/parser" -) - -func TestClassifyOnePath_Vibe(t *testing.T) { - dir := t.TempDir() - sessionDir := "session_20260616_083518_0107f266" - - // Vibe layout: /session__/messages.jsonl. - msgPath := filepath.Join(dir, sessionDir, "messages.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(msgPath), 0o755)) - require.NoError(t, os.WriteFile(msgPath, []byte("{}\n"), 0o644)) - - // A real meta.json sits beside messages.jsonl. Changes to it should - // route back to the sibling messages.jsonl, since title/model/usage - // stats are sourced from meta.json. - metaPath := filepath.Join(dir, sessionDir, "meta.json") - require.NoError(t, os.WriteFile(metaPath, []byte("{}\n"), 0o644)) - - deletedMetaDir := "session_20260616_083519_deleted" - deletedMetaMsgPath := filepath.Join(dir, deletedMetaDir, "messages.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(deletedMetaMsgPath), 0o755)) - require.NoError(t, os.WriteFile(deletedMetaMsgPath, []byte("{}\n"), 0o644)) - deletedMetaPath := filepath.Join(dir, deletedMetaDir, "meta.json") - - // A non-session directory must not classify. - otherPath := filepath.Join(dir, "scratch", "messages.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(otherPath), 0o755)) - require.NoError(t, os.WriteFile(otherPath, []byte("{}\n"), 0o644)) - - eng := &Engine{ - agentDirs: map[parser.AgentType][]string{ - parser.AgentVibe: {dir}, - }, - } - geminiMap := make(map[string]map[string]string) - - tests := []struct { - name string - path string - want bool - wantPath string - wantProject string - }{ - { - name: "messages.jsonl under session dir classifies", - path: msgPath, - want: true, - wantPath: msgPath, - wantProject: sessionDir, - }, - { - name: "messages.jsonl outside session dir ignored", - path: otherPath, - want: false, - }, - { - name: "meta.json routes to sibling messages.jsonl", - path: metaPath, - want: true, - wantPath: msgPath, - wantProject: sessionDir, - }, - { - name: "deleted meta.json routes to sibling messages.jsonl", - path: deletedMetaPath, - want: true, - wantPath: deletedMetaMsgPath, - wantProject: deletedMetaDir, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) - assert.Equal(t, tt.want, ok) - if ok { - assert.Equal(t, parser.AgentVibe, got.Agent) - assert.Equal(t, tt.wantPath, got.Path) - assert.Equal(t, tt.wantProject, got.Project) - } - }) - } -} diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 00409aa87..5cdb11644 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -923,9 +923,6 @@ func (e *Engine) classifyContainerPath( if df, ok := e.classifyShelleySQLitePath(path); ok { return df, true } - if df, ok := e.classifyVibePath(path); ok { - return df, true - } return parser.DiscoveredFile{}, false } @@ -1341,55 +1338,6 @@ func (e *Engine) classifyAiderPath( return parser.DiscoveredFile{}, false } -// classifyVibePath handles Vibe's session directory layout: -// -// /session__/messages.jsonl -// /session__/meta.json -// -// meta.json changes route back to messages.jsonl because title, model, -// timestamps, and usage stats are sourced from the sidecar metadata file. -func (e *Engine) classifyVibePath( - path string, -) (parser.DiscoveredFile, bool) { - sep := string(filepath.Separator) - for _, vibeDir := range e.agentDirs[parser.AgentVibe] { - if vibeDir == "" { - continue - } - rel, ok := isUnder(vibeDir, path) - if !ok { - continue - } - parts := strings.Split(rel, sep) - if len(parts) != 2 || !strings.HasPrefix(parts[0], "session_") { - continue - } - switch parts[1] { - case "messages.jsonl": - if _, err := os.Stat(path); err != nil { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentVibe, - }, true - case "meta.json": - messagesPath := filepath.Join( - vibeDir, parts[0], "messages.jsonl", - ) - if _, err := os.Stat(messagesPath); err == nil { - return parser.DiscoveredFile{ - Path: messagesPath, - Project: parts[0], - Agent: parser.AgentVibe, - }, true - } - } - } - return parser.DiscoveredFile{}, false -} - // classifyAntigravitySidecarPath maps Antigravity sidecar events -- // IDE annotations/.pbtxt plus IDE and CLI brain//* artifacts // -- to every session source file that renders them. A CLI storage @@ -4044,8 +3992,6 @@ func (e *Engine) processFile( res = e.processKiroIDE(file, info) case parser.AgentHermes: res = e.processHermes(file, info) - case parser.AgentVibe: - res = e.processVibe(file, info) case parser.AgentPositron: res = e.processPositron(file, info) case parser.AgentZed: @@ -4227,9 +4173,126 @@ func (e *Engine) processProviderFile( }) } } + e.applyProviderFilePathPolicies(provider, file.Agent, &res) return res, true } +// applyProviderFilePathPolicies reproduces the DB-aware, file-path-scoped +// session bookkeeping that a provider cannot do on its own (it has no database +// handle). It runs only for single-session-per-file providers whose canonical +// ID can change while the source path is unchanged (e.g. Vibe, whose ID flips +// between the meta.json session_id and the directory-name fallback as meta.json +// appears or is removed). Multi-session sources are skipped, where several +// distinct sessions legitimately share one path; for stable-ID providers it is +// a no-op because the stored ID always matches the freshly parsed one. +// +// Two policies are applied per result, keyed by the (path-rewritten) file_path: +// +// 1. Resurrection guard: if the user removed the session occupying this path — +// a trashed row at the same path, or an alternate identity for the path +// (the provider's excluded fallback ID, or a stale stored ID) that is now +// trashed or permanently excluded — the freshly parsed row must not be +// written under its new ID. The result is dropped and its ID is excluded. +// 2. Stale-row cleanup: any other live stored ID at the same path that the +// current parse no longer emits is added to the exclusion list so the +// superseded row is deleted. +func (e *Engine) applyProviderFilePathPolicies( + provider parser.Provider, + agent parser.AgentType, + res *processResult, +) { + if provider.Capabilities().Source.MultiSessionSource == parser.CapabilitySupported { + return + } + if len(res.results) == 0 { + return + } + + excluded := make(map[string]struct{}, len(res.excludedSessionIDs)) + for _, id := range e.applyIDPrefixToSessionIDs(res.excludedSessionIDs) { + excluded[id] = struct{}{} + } + addExclusion := func(id string) { + if id == "" { + return + } + if _, ok := excluded[id]; ok { + return + } + excluded[id] = struct{}{} + res.excludedSessionIDs = append(res.excludedSessionIDs, id) + } + + kept := res.results[:0] + for _, result := range res.results { + path := result.Session.File.Path + if path == "" { + kept = append(kept, result) + continue + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + currentID := result.Session.ID + currentPrefixedID := e.idPrefix + result.Session.ID + + existingIDs, err := e.db.ListSessionIDsByFilePath(lookupPath, string(agent)) + if err != nil { + log.Printf("list session IDs by file path: %v", err) + kept = append(kept, result) + continue + } + + // Resurrection guard. The path's identity is removed when a trashed row + // shares it, or when any alternate identity for the path (the + // provider's excluded fallback IDs or a stale stored ID) is trashed or + // permanently excluded. In that case the new row must not be written. + suppress := e.db.HasTrashedSessionByFilePath(lookupPath, string(agent)) + if !suppress { + for id := range excluded { + if id == currentID || id == currentPrefixedID { + continue + } + if e.db.IsSessionExcluded(id) || e.db.IsSessionTrashed(id) { + suppress = true + break + } + } + } + if !suppress { + for _, id := range existingIDs { + if id == currentID || id == currentPrefixedID { + continue + } + if e.db.IsSessionExcluded(id) || e.db.IsSessionTrashed(id) { + suppress = true + break + } + } + } + if suppress { + // Keep a trashed current ID trashed rather than converting it to a + // parser deletion; the upsert's trash guard already hides it. + if (currentPrefixedID == "" || !e.db.IsSessionTrashed(currentPrefixedID)) && + !e.db.IsSessionTrashed(currentID) { + addExclusion(currentID) + } + continue + } + + // Stale-row cleanup for live siblings the current parse supersedes. + for _, id := range existingIDs { + if id == currentID || id == currentPrefixedID { + continue + } + addExclusion(id) + } + kept = append(kept, result) + } + res.results = kept +} + func providerOutcomeAllowsCleanSkipCache(outcome parser.ParseOutcome) bool { if !outcome.ResultSetComplete { return false @@ -5926,100 +5989,6 @@ func (e *Engine) processHermes( } } -func (e *Engine) processVibe( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Title/model/usage stats come from the sibling meta.json, so the - // skip check and stored file info must account for it too, or a - // meta.json-only update never refreshes those fields. - effectiveInfo := vibeEffectiveInfo(file.Path, info) - if e.shouldSkipByPath(file.Path, effectiveInfo) { - return processResult{skip: true} - } - - // Pass an empty project so the parser-derived project (from the - // session's working directory) is kept. file.Project holds the - // cryptic session directory name, which must not become the project. - sess, msgs, usageEvents, err := parser.ParseVibeSessionWrapper( - file.Path, "", e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - sess.File.Size = effectiveInfo.Size() - sess.File.Mtime = effectiveInfo.ModTime().UnixNano() - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - var excludedIDs []string - lookupPath := file.Path - if e.pathRewriter != nil { - lookupPath = e.pathRewriter(file.Path) - } - existingIDs, err := e.db.ListSessionIDsByFilePath( - lookupPath, string(parser.AgentVibe), - ) - if err != nil { - return processResult{err: err} - } - currentID := sess.ID - currentPrefixedID := e.idPrefix + sess.ID - fallbackID := "vibe:" + filepath.Base(filepath.Dir(file.Path)) - for _, id := range existingIDs { - if id != currentID && id != currentPrefixedID { - excludedIDs = append(excludedIDs, id) - } - } - - currentFallbackTrashed := sess.ID == fallbackID && e.isSessionTrashed(fallbackID) - if e.isSessionBlocked(fallbackID) || - (sess.ID == fallbackID && - e.db.HasTrashedSessionByFilePath(lookupPath, string(parser.AgentVibe))) { - if !currentFallbackTrashed && !slices.Contains(excludedIDs, sess.ID) { - excludedIDs = append(excludedIDs, sess.ID) - } - return processResult{excludedSessionIDs: excludedIDs} - } - - // Sessions parsed before meta.json existed (or was parseable) are stored - // under the directory-name fallback ID. Keep excluding that legacy row even - // if it predates file_path metadata and did not appear in the path lookup. - if sess.ID != fallbackID && !slices.Contains(excludedIDs, fallbackID) { - excludedIDs = append(excludedIDs, fallbackID) - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs, UsageEvents: usageEvents}, - }, - excludedSessionIDs: excludedIDs, - } -} - -func (e *Engine) isSessionBlocked(id string) bool { - if e.idPrefix != "" && !strings.HasPrefix(id, e.idPrefix) { - prefixed := e.idPrefix + id - return e.db.IsSessionExcluded(prefixed) || e.db.IsSessionTrashed(prefixed) - } - if e.db.IsSessionExcluded(id) || e.db.IsSessionTrashed(id) { - return true - } - return false -} - -func (e *Engine) isSessionTrashed(id string) bool { - if e.idPrefix != "" && !strings.HasPrefix(id, e.idPrefix) { - return e.db.IsSessionTrashed(e.idPrefix + id) - } - return e.db.IsSessionTrashed(id) -} - // vibeEffectiveInfo returns size/mtime for a Vibe session that account // for the sibling meta.json file: size is the sum of both files, and // mtime is the larger of the two. Returns info unchanged when meta.json diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index 7b3538587..f6824249b 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -1225,10 +1225,15 @@ func TestProcessAntigravityWALOnlyUpdateNotSkipped(t *testing.T) { func TestProcessVibeMetaOnlyUpdateNotSkipped(t *testing.T) { database := openTestDB(t) - e := &Engine{db: database} ctx := context.Background() root := t.TempDir() + e := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentVibe: {root}, + }, + }) + sessionDir := filepath.Join(root, "session_20260616_083518_0107f266") require.NoError(t, os.MkdirAll(sessionDir, 0o755)) @@ -1246,32 +1251,19 @@ func TestProcessVibeMetaOnlyUpdateNotSkipped(t *testing.T) { 0o644, )) - file := parser.DiscoveredFile{ - Agent: parser.AgentVibe, - Path: msgPath, - } - - res := e.processFile(ctx, file) - require.NoError(t, res.err) - require.False(t, res.skip) - require.Len(t, res.results, 1) - require.Equal(t, "Original title", res.results[0].Session.SessionName) + canonicalID := "vibe:abc" - pw := pendingWrite{ - sess: res.results[0].Session, - msgs: res.results[0].Messages, - } - written, _, failed := e.writeBatch( - []pendingWrite{pw}, syncWriteDefault, false, - ) - require.Equal(t, 0, failed) - require.Equal(t, 1, written) - - res = e.processFile(ctx, file) - require.True(t, res.skip, "unchanged session should skip") + e.SyncPaths([]string{msgPath}) + sess, err := database.GetSession(ctx, canonicalID) + require.NoError(t, err) + require.NotNil(t, sess) + require.NotNil(t, sess.DisplayName) + assert.Equal(t, "Original title", *sess.DisplayName) // meta.json-only update: messages.jsonl is untouched, but the title - // (sourced from meta.json) changes. + // (sourced from meta.json) changes. The Vibe provider's composite + // fingerprint folds the sibling meta.json mtime in, so the change busts + // the skip cache and triggers a reparse rather than a skip. info, err := os.Stat(msgPath) require.NoError(t, err) metaTime := info.ModTime().Add(5 * time.Second) @@ -1282,10 +1274,12 @@ func TestProcessVibeMetaOnlyUpdateNotSkipped(t *testing.T) { )) require.NoError(t, os.Chtimes(metaPath, metaTime, metaTime)) - res = e.processFile(ctx, file) - require.False(t, res.skip, "meta.json-only update must trigger a reparse") - require.Len(t, res.results, 1) - assert.Equal(t, "Renamed title", res.results[0].Session.SessionName) + e.SyncPaths([]string{msgPath}) + sess, err = database.GetSession(ctx, canonicalID) + require.NoError(t, err) + require.NotNil(t, sess) + require.NotNil(t, sess.DisplayName) + assert.Equal(t, "Renamed title", *sess.DisplayName) } func TestProcessAntigravityBrainOnlyUpdateNotSkipped(t *testing.T) { From f86ae4e376ed540f38df3bf4d4c64c0b306a7641 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Mon, 22 Jun 2026 22:16:25 -0400 Subject: [PATCH 04/11] feat(parser): migrate hermes provider Hermes can represent a configured root as either individual transcript files or as a state.db archive that fans out into multiple sessions. Moving it behind a concrete provider makes that source choice explicit instead of leaving archive behavior inside the legacy adapter path.\n\nThe provider preserves transcript discovery and lookup while treating state.db as a multi-session, force-replace source. Its fingerprint covers the archive database plus sibling transcripts so transcript-quality changes can refresh the archive source that ParseHermesArchive reads. fix(parser): preserve hermes archive event coverage Hermes archive discovery can normalize a configured sessions directory or direct state.db path into a sibling archive source, but the watch plan and changed-path classifier still assumed the configured root was the only event root. That left state.db updates and removed primary files invisible to provider-path sync. Normalize archive watch roots, map delete and rename-style events syntactically when primary files are gone, and cover archive-parent, sessions-directory, and direct-state roots. This lets Hermes enter shadow comparison as an actual migration branch. Validation: go test -tags "fts5" ./internal/parser -run 'Test(HermesProvider|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(parser): watch hermes archive roots syntactically Hermes archive configs can point at the archive parent, its sessions directory, or the state.db file before the sibling archive components have been created. Watch planning needs to treat those shapes as archive roots from their paths, not from startup-time existence checks, otherwise late-created metadata or transcripts are invisible until a full sync. The transcript watch root is now retained for archive-shaped roots even when sessions/ is not present yet, while ordinary transcript-only roots keep their recursive file watch. Validation: go test -tags "fts5" ./internal/parser -run 'TestHermesProvider' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; git diff --check fix(parser): feed hermes archive roots to runtime watcher Hermes provider watch planning now knows how to follow archive-shaped roots, but the actual serve-time watcher still reads registry watch resolvers. Without a matching Hermes resolver there, the default .hermes/sessions config can miss sibling state.db creation or updates in live sync. Expose Hermes shallow archive-parent watch roots through the registry while keeping transcript roots recursive, and add shadow parity coverage so this branch remains a migration rather than an additive provider implementation. Validation: go test -tags "fts5" ./cmd/agentsview ./internal/parser ./internal/sync -run 'TestCollectWatchRootsHermesSessionsWatchesStateDBParent|TestHermesProvider|TestParseHermes|TestProviderMigrationModes|TestObserveProviderSourceMatchesHermesLegacyParser' -count=1; go test -tags "fts5" ./cmd/agentsview ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./cmd/agentsview/... ./internal/parser/... ./internal/sync/...; git diff --check fix(sync): classify hermes archive watcher events Roborev jobs 2715 and 2716 caught that Hermes archive watch roots were subscribed but the legacy SyncPaths classifier still ignored sibling state.db events. That meant live sync could wait for a periodic full sync even though the watcher saw the change. Map configured Hermes archive roots, state.db events, and direct archive transcript events back to the state.db source that processHermes already parses, while preserving transcript-only root classification for standalone Hermes session files. Validation: go test -tags "fts5" ./internal/sync -run TestSyncPathsHermesStateDBEventRefreshesArchive -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -run 'Test(HermesProvider|ObserveProviderSourceMatchesHermesLegacyParser|SyncPathsHermesStateDBEventRefreshesArchive)' -count=1; go fmt ./...; go vet ./...; git diff --check fix(sync): include hermes transcripts in archive skips Roborev job 2803 caught that Hermes transcript watcher events could still be suppressed by state.db-only skip metadata after being routed to the archive source. In mixed state-db/transcript archives, state.db can be unchanged while a sibling transcript is new or updated. Use archive-effective size and mtime for state.db skip checks by folding direct transcript files from the sibling sessions directory into the snapshot, and add a regression where a transcript event refreshes an already-indexed archive. Validation: go test -tags "fts5" ./internal/sync -run 'TestSyncPathsHermes(ArchiveTranscriptEventRefreshesArchive|StateDBEventRefreshesArchive)' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -run 'Test(HermesProvider|ObserveProviderSourceMatchesHermesLegacyParser|SyncPathsHermes)' -count=1; go fmt ./...; go vet ./...; git diff --check fix(sync): use aggregate hermes archive fingerprints Hermes archive freshness needs the state.db sync path to compare the same aggregate fingerprint it persists. Discovering through the public Hermes session lister reselected state.db and missed sibling transcripts, so state.db events could avoid real skip-cache parity.\n\nEnumerate direct transcript files for the archive snapshot and stamp archive parse results with the aggregate state.db fingerprint before writing. This keeps unchanged archive syncs comparable while still refreshing when sibling transcripts change.\n\nValidation: go test -tags "fts5" ./internal/parser ./internal/sync; go vet ./...; make nilaway fix(sync): apply hermes archive fingerprints consistently Hermes archive refresh paths need to compare and persist the same aggregate fingerprint for state.db plus sibling transcripts. Otherwise cached parse skips and single-session refreshes can fall back to raw state.db metadata and miss transcript-only archive changes. Use the aggregate archive file info before generic skip-cache checks and share the archive parse-and-stamp helper between full archive processing and single-session refreshes. The regression coverage now persists the metadata, checks unchanged archive skips, and covers transcript discovery/removal behavior. Validation: go test -tags "fts5" ./internal/sync -run 'TestHermesArchive|TestProcessFileHermes|TestProcessHermesArchive|TestSyncSingleHermesArchive' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync; go vet ./...; make nilaway refactor(parser): fold hermes into provider Move Hermes source discovery, lookup, and parse ownership onto the concrete hermesProvider and delete the package-level DiscoverHermesSessions, FindHermesSourceFile, ParseHermesArchive, and ParseHermesSession free functions. Discovery and find-source bodies now live as provider-owned helpers (discoverHermesSessions, findHermesSourceFile); parse, archive parse, the state-db reader, and the transcript-archive fallback become hermesProvider methods (parseSession, parseArchive, parseStateDB, parseTranscriptArchive). Reproduce Hermes archive behavior on the provider. The provider's archive Parse now stamps every state.db session with the state.db path plus the aggregate (state.db + direct transcripts) size and mtime, replacing the engine's stampHermesArchiveResults/hermesArchiveEffectiveInfo so a transcript-only change still refreshes the archive's stored freshness. The new provider helpers hermesArchiveEffectiveFileInfo and hermesArchiveTranscriptFiles mirror the legacy engine aggregation (every .jsonl and session_*.json directly under the sessions directory, no dedup). The existing composite archive Fingerprint and archive watch/ classify source-set methods already carried the rest. Make Hermes provider-authoritative and drop its legacy sync dispatch: remove classifyHermesPath (and its hermesSyncArchivePaths, hermesSyncDirExists, hermesSyncTranscriptPath helpers), the processFile hermesArchiveEffectiveInfo stat hook and case arm, processHermes, parseHermesArchive, stampHermesArchiveResults, hermesArchiveEffectiveInfo, hermesArchiveTranscriptFiles, hermesArchiveSourcePaths, and the syncSingleHermesArchive special-case plus its method. Single-session resync of an archive now falls through to the generic provider path, which reparses the whole archive (ForceReplace) the same way a full sync does. Drop the Hermes AgentDef DiscoverFunc/FindSourceFunc hooks (the provider-owned WatchRootsFunc/ShallowWatchRootsFunc stay), remove hermes_provider.go from the pending shim scan list, replace the shadow-baseline test with provider-API coverage plus a guard that the legacy entrypoints stay gone, and route the package and engine archive tests through provider methods and the provider-authoritative processFile/ SyncPaths paths. Add internal/sync/provider_shadow_support_test.go defining the shared writeProviderShadowSourceFile test helper that the remaining vibe shadow test still references, which was orphaned by a predecessor commit. test(sync): drop unused shadow source-file helper The hermes fold left writeProviderShadowSourceFile in a dedicated test support file, but every shadow test writes its fixtures inline, so the helper has no callers and trips the unused linter. Remove the dead scaffolding. --- cmd/agentsview/main_test.go | 23 + internal/parser/hermes.go | 47 +- internal/parser/hermes_provider.go | 690 +++++++++++++++++++++ internal/parser/hermes_provider_test.go | 472 ++++++++++++++ internal/parser/hermes_test.go | 106 +++- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/provider_shim_scan_test.go | 1 - internal/parser/types.go | 18 +- internal/sync/engine.go | 104 ---- internal/sync/hermes_archive_test.go | 318 ++++++++++ internal/sync/hermes_integration_test.go | 152 +++++ 12 files changed, 1775 insertions(+), 160 deletions(-) create mode 100644 internal/parser/hermes_provider.go create mode 100644 internal/parser/hermes_provider_test.go create mode 100644 internal/sync/hermes_archive_test.go create mode 100644 internal/sync/hermes_integration_test.go diff --git a/cmd/agentsview/main_test.go b/cmd/agentsview/main_test.go index b55268b94..fac5f2f69 100644 --- a/cmd/agentsview/main_test.go +++ b/cmd/agentsview/main_test.go @@ -570,6 +570,29 @@ func TestStartRemoteHostSync_NilEmitterSafe(t *testing.T) { <-exited } +func TestCollectWatchRootsHermesSessionsWatchesStateDBParent(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.Mkdir(sessionsDir, 0o755), "mkdir sessions") + + cfg := config.Config{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {sessionsDir}, + }, + } + + roots, unwatchedDirs := collectWatchRoots(cfg) + + require.Empty(t, unwatchedDirs, "unwatched dirs before watcher setup") + require.Len(t, roots, 2) + assert.Equal(t, root, roots[0].root) + assert.True(t, roots[0].shallow) + assert.Equal(t, []string{sessionsDir}, roots[0].dirs) + assert.Equal(t, sessionsDir, roots[1].root) + assert.False(t, roots[1].shallow) + assert.Equal(t, []string{sessionsDir}, roots[1].dirs) +} + func TestResyncCoversSignals(t *testing.T) { tests := []struct { name string diff --git a/internal/parser/hermes.go b/internal/parser/hermes.go index 7c2d42989..819a25dd5 100644 --- a/internal/parser/hermes.go +++ b/internal/parser/hermes.go @@ -54,17 +54,19 @@ type hermesStateMessage struct { codexMessageItems string } -// ParseHermesArchive parses a Hermes root directory. If a state.db is -// present, it uses that database for session metadata and usage while -// selecting the richest available message stream. Without state.db it -// falls back to the transcript-file parser. -func ParseHermesArchive(root, project, machine string) ([]ParseResult, error) { +// parseArchive parses a Hermes root directory. If a state.db is present, it +// uses that database for session metadata and usage while selecting the richest +// available message stream. Without state.db it falls back to the +// transcript-file parser. It owns the archive on-disk shape (state.db plus the +// sessions transcript directory) for the Hermes provider; the package-level +// entrypoint was folded onto the provider. +func (p *hermesProvider) parseArchive(root, project, machine string) ([]ParseResult, error) { stateDB, sessionsDir, ok := hermesStatePaths(root) if !ok { - return parseHermesTranscriptArchive(root, project, machine) + return p.parseTranscriptArchive(root, project, machine) } - results, err := parseHermesStateDB( + results, err := p.parseStateDB( stateDB, sessionsDir, project, machine, ) if err == nil { @@ -74,12 +76,12 @@ func ParseHermesArchive(root, project, machine string) ([]ParseResult, error) { "hermes: state db parse failed for %s: %v; falling back to transcripts", stateDB, err, ) - return parseHermesTranscriptArchive( + return p.parseTranscriptArchive( sessionsDir, project, machine, ) } -func parseHermesTranscriptArchive( +func (p *hermesProvider) parseTranscriptArchive( root, project, machine string, ) ([]ParseResult, error) { var results []ParseResult @@ -88,7 +90,7 @@ func parseHermesTranscriptArchive( if project != "" { fileProject = project } - sess, msgs, err := ParseHermesSession( + sess, msgs, err := p.parseSession( file.Path, fileProject, machine, ) if err != nil { @@ -103,7 +105,9 @@ func parseHermesTranscriptArchive( return results, nil } -// ParseHermesSession parses a Hermes Agent JSONL session file. +// parseSession parses a Hermes Agent session file. It owns the on-disk shape +// (flat JSONL transcripts plus session_*.json snapshots) for the Hermes +// provider; the package-level entrypoint was folded onto the provider. // // Hermes stores sessions as flat JSONL files in ~/.hermes/sessions/ // with filenames like 20260403_153620_5a3e2ff1.jsonl. @@ -114,7 +118,7 @@ func parseHermesTranscriptArchive( // - Assistant messages: {"role":"assistant", "content":"...", "reasoning":"...", // "finish_reason":"tool_calls|stop", "tool_calls":[...], "timestamp":"..."} // - Tool results: {"role":"tool", "content":"...", "tool_call_id":"...", "timestamp":"..."} -func ParseHermesSession(path, project, machine string) (*ParsedSession, []ParsedMessage, error) { +func (p *hermesProvider) parseSession(path, project, machine string) (*ParsedSession, []ParsedMessage, error) { if strings.HasSuffix(path, ".json") { return parseHermesJSONSession(path, project, machine) } @@ -546,7 +550,7 @@ func hermesStatePaths(root string) (stateDB, sessionsDir string, ok bool) { return "", "", false } -func parseHermesStateDB( +func (p *hermesProvider) parseStateDB( stateDB, sessionsDir, project, machine string, ) ([]ParseResult, error) { conn, err := sql.Open("sqlite3", "file:"+stateDB+"?mode=ro") @@ -580,7 +584,7 @@ func parseHermesStateDB( if _, ok := seen[rawID]; ok { continue } - sess, msgs, err := ParseHermesSession( + sess, msgs, err := p.parseSession( file.Path, file.Project, machine, ) if err != nil { @@ -1043,10 +1047,11 @@ func HermesSessionID(name string) string { return name } -// DiscoverHermesSessions finds Hermes session sources. When a sibling -// state.db exists, it prefers that archive root; otherwise it returns -// transcript files from the sessions directory. -func DiscoverHermesSessions(sessionsDir string) []DiscoveredFile { +// discoverHermesSessions finds Hermes session sources under root. When a +// sibling state.db exists, it prefers that archive root; otherwise it returns +// transcript files from the sessions directory. It is the provider-owned +// discovery body folded off the package-level entrypoint. +func discoverHermesSessions(sessionsDir string) []DiscoveredFile { if sessionsDir == "" { return nil } @@ -1112,8 +1117,10 @@ func discoverHermesTranscriptFiles(sessionsDir string) []DiscoveredFile { return files } -// FindHermesSourceFile finds a Hermes session file by session ID. -func FindHermesSourceFile(sessionsDir, sessionID string) string { +// findHermesSourceFile finds a Hermes transcript file by session ID under +// sessionsDir. It is the provider-owned find-source body folded off the +// package-level entrypoint. +func findHermesSourceFile(sessionsDir, sessionID string) string { if !IsValidSessionID(sessionID) { return "" } diff --git a/internal/parser/hermes_provider.go b/internal/parser/hermes_provider.go new file mode 100644 index 000000000..5c3f7e45a --- /dev/null +++ b/internal/parser/hermes_provider.go @@ -0,0 +1,690 @@ +package parser + +import ( + "context" + "crypto/sha256" + "database/sql" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "sort" + "strings" +) + +var _ Provider = (*hermesProvider)(nil) + +type hermesProviderFactory struct { + def AgentDef +} + +func newHermesProviderFactory(def AgentDef) ProviderFactory { + return hermesProviderFactory{def: cloneAgentDef(def)} +} + +func (f hermesProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f hermesProviderFactory) Capabilities() Capabilities { + return hermesProviderCapabilities() +} + +func (f hermesProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &hermesProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: hermesProviderCapabilities(), + Config: cfg, + }, + sources: newHermesSourceSet(cfg.Roots), + } +} + +type hermesProvider struct { + ProviderBase + sources hermesSourceSet +} + +func (p *hermesProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *hermesProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *hermesProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *hermesProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *hermesProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *hermesProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("hermes source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + if filepath.Base(path) == "state.db" { + results, err := p.parseArchive(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + // Mirror the legacy engine's stampHermesArchiveResults: every archive + // session's stored file identity is the state.db path with the + // aggregate (state.db plus transcripts) size and mtime, so a + // transcript-only change still refreshes the archive's freshness. + size, mtime := hermesArchiveEffectiveFileInfo(path) + out := make([]ParseResultOutcome, 0, len(results)) + for i := range results { + results[i].Session.File.Path = path + results[i].Session.File.Size = size + results[i].Session.File.Mtime = mtime + out = append(out, ParseResultOutcome{ + Result: results[i], + DataVersion: DataVersionCurrent, + }) + } + return ParseOutcome{ + Results: out, + ResultSetComplete: true, + ForceReplace: true, + }, nil + } + + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type hermesSource struct { + Root string + Path string +} + +type hermesSourceSet struct { + roots []string +} + +func newHermesSourceSet(roots []string) hermesSourceSet { + return hermesSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s hermesSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, file := range discoverHermesSessions(root) { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s hermesSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, hermesWatchRoots(root)...) + } + return WatchPlan{Roots: roots}, nil +} + +func (s hermesSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + allowMissing := jsonlMissingPathFallbackAllowed(req) + if req.WatchRoot != "" { + watchRoot := filepath.Clean(req.WatchRoot) + for _, root := range s.roots { + if !hermesWatchRootMatches(root, watchRoot) { + continue + } + source, ok := s.sourceForChangedPath(root, req.Path, allowMissing) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil + } + for _, root := range s.roots { + source, ok := s.sourceForChangedPath(root, req.Path, allowMissing) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s hermesSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceForPath(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + if stateDB, _, ok := hermesStatePaths(root); ok && + IsValidSessionID(req.RawSessionID) { + found, err := hermesStateDBHasSession(stateDB, req.RawSessionID) + if err != nil { + return SourceRef{}, false, err + } + if !found { + continue + } + if source, ok := s.sourceRef(root, stateDB); ok { + return source, true, nil + } + } + transcriptRoot := hermesTranscriptRoot(root) + path := findHermesSourceFile(transcriptRoot, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func hermesStateDBHasSession(stateDB string, rawID string) (bool, error) { + conn, err := sql.Open("sqlite3", "file:"+stateDB+"?mode=ro") + if err != nil { + return false, fmt.Errorf("open hermes state db: %w", err) + } + defer conn.Close() + + var found int + err = conn.QueryRow( + "SELECT 1 FROM sessions WHERE id = ? LIMIT 1", + rawID, + ).Scan(&found) + if err == nil { + return true, nil + } + if err == sql.ErrNoRows { + return false, nil + } + return false, fmt.Errorf("query hermes session %s: %w", rawID, err) +} + +func (s hermesSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("hermes source path unavailable") + } + if filepath.Base(path) == "state.db" { + return hermesArchiveFingerprint(source, path) + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash, err := hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Hash: hash, + }, nil +} + +func (s hermesSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case hermesSource: + return src.Path, src.Path != "" + case *hermesSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + for _, root := range s.roots { + if ref, ok := s.sourceForPath(root, candidate); ok { + src := ref.Opaque.(hermesSource) + return src.Path, true + } + } + } + return "", false +} + +func (s hermesSourceSet) sourceForPath(root, path string) (SourceRef, bool) { + return s.sourceForChangedPath(root, path, false) +} + +func (s hermesSourceSet) sourceForChangedPath( + root, + path string, + allowMissing bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if stateDB, sessionsDir, ok := hermesStatePaths(root); ok { + if samePath(path, stateDB) || hermesPathInTranscriptDir(sessionsDir, path) { + return hermesArchiveSourceRef(root, stateDB) + } + return SourceRef{}, false + } + if allowMissing { + if stateDB, sessionsDir, ok := hermesArchivePathsForEvent(root, path); ok && + (samePath(path, stateDB) || hermesPathInTranscriptDir(sessionsDir, path)) { + return hermesArchiveSourceRef(root, stateDB) + } + transcriptRoot := hermesTranscriptRoot(root) + if hermesPathInTranscriptDir(transcriptRoot, path) { + return hermesTranscriptSourceRef(root, path) + } + } + return s.sourceRef(root, path) +} + +func (s hermesSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if stateDB, _, ok := hermesStatePaths(root); ok && samePath(path, stateDB) { + return hermesArchiveSourceRef(root, stateDB) + } + transcriptRoot := hermesTranscriptRoot(root) + if !hermesPathInTranscriptDir(transcriptRoot, path) || !IsRegularFile(path) { + return SourceRef{}, false + } + return hermesTranscriptSourceRef(root, path) +} + +func hermesArchiveSourceRef(root, stateDB string) (SourceRef, bool) { + root = filepath.Clean(root) + stateDB = filepath.Clean(stateDB) + return SourceRef{ + Provider: AgentHermes, + Key: stateDB, + DisplayPath: stateDB, + FingerprintKey: stateDB, + Opaque: hermesSource{ + Root: root, + Path: stateDB, + }, + }, true +} + +func hermesTranscriptSourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + return SourceRef{ + Provider: AgentHermes, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: hermesSource{ + Root: root, + Path: path, + }, + }, true +} + +func hermesWatchRoots(root string) []WatchRoot { + root = filepath.Clean(root) + if stateDB, sessionsDir, ok := hermesArchiveRootPaths(root); ok { + watchRoots := []WatchRoot{{ + Path: filepath.Dir(stateDB), + Recursive: false, + IncludeGlobs: []string{"state.db"}, + DebounceKey: string(AgentHermes) + ":archive:" + root, + }} + watchRoots = append(watchRoots, WatchRoot{ + Path: sessionsDir, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "session_*.json"}, + DebounceKey: string(AgentHermes) + ":sessions:" + root, + }) + return watchRoots + } + return []WatchRoot{{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"state.db", "*.jsonl", "session_*.json"}, + DebounceKey: string(AgentHermes) + ":sessions:" + root, + }} +} + +func ResolveHermesWatchRoots(root string) []string { + root = filepath.Clean(root) + if _, sessionsDir, ok := hermesArchiveRootPaths(root); ok { + return []string{sessionsDir} + } + return []string{root} +} + +func ResolveHermesShallowWatchRoots(root string) []string { + root = filepath.Clean(root) + if stateDB, _, ok := hermesArchiveRootPaths(root); ok { + return []string{filepath.Dir(stateDB)} + } + return nil +} + +func hermesWatchRootMatches(root, watchRoot string) bool { + root = filepath.Clean(root) + watchRoot = filepath.Clean(watchRoot) + if samePath(root, watchRoot) { + return true + } + if stateDB, sessionsDir, ok := hermesArchiveRootPaths(root); ok { + return samePath(watchRoot, filepath.Dir(stateDB)) || + samePath(watchRoot, sessionsDir) + } + switch filepath.Base(root) { + case "state.db": + return samePath(watchRoot, filepath.Dir(root)) || + samePath(watchRoot, filepath.Join(filepath.Dir(root), "sessions")) + case "sessions": + return samePath(watchRoot, filepath.Dir(root)) + default: + return samePath(watchRoot, filepath.Join(root, "sessions")) + } +} + +func hermesArchivePathsForEvent(root, path string) (stateDB, sessionsDir string, ok bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + switch { + case filepath.Base(root) == "state.db": + stateDB = root + sessionsDir = filepath.Join(filepath.Dir(root), "sessions") + case filepath.Base(root) == "sessions": + stateDB = filepath.Join(filepath.Dir(root), "state.db") + sessionsDir = root + case samePath(path, filepath.Join(root, "state.db")) || + IsRegularFile(filepath.Join(root, "state.db")): + stateDB = filepath.Join(root, "state.db") + sessionsDir = filepath.Join(root, "sessions") + default: + return "", "", false + } + return stateDB, sessionsDir, true +} + +func hermesArchiveRootPaths(root string) (stateDB, sessionsDir string, ok bool) { + root = filepath.Clean(root) + if stateDB, sessionsDir, ok := hermesStatePaths(root); ok { + return stateDB, sessionsDir, true + } + switch filepath.Base(root) { + case "state.db": + return root, filepath.Join(filepath.Dir(root), "sessions"), true + case "sessions": + return filepath.Join(filepath.Dir(root), "state.db"), root, true + default: + stateDB = filepath.Join(root, "state.db") + sessionsDir = filepath.Join(root, "sessions") + if IsRegularFile(stateDB) { + return stateDB, sessionsDir, true + } + if info, err := os.Stat(sessionsDir); err == nil && info.IsDir() { + return stateDB, sessionsDir, true + } + return "", "", false + } +} + +func hermesTranscriptRoot(root string) string { + root = filepath.Clean(root) + if _, sessionsDir, ok := hermesStatePaths(root); ok { + return sessionsDir + } + childSessions := filepath.Join(root, "sessions") + if info, err := os.Stat(childSessions); err == nil && info.IsDir() { + return childSessions + } + return root +} + +func hermesPathInTranscriptDir(dir, path string) bool { + dir = filepath.Clean(dir) + path = filepath.Clean(path) + if !samePath(filepath.Dir(path), dir) { + return false + } + name := filepath.Base(path) + if strings.HasSuffix(name, ".jsonl") { + return true + } + return strings.HasSuffix(name, ".json") && strings.HasPrefix(name, "session_") +} + +func hermesArchiveFingerprint(source SourceRef, stateDB string) (SourceFingerprint, error) { + stateInfo, err := os.Stat(stateDB) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", stateDB, err) + } + if stateInfo.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", stateDB) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString( + source.FingerprintKey, + source.Key, + stateDB, + ), + Size: stateInfo.Size(), + MTimeNS: stateInfo.ModTime().UnixNano(), + } + h := sha256.New() + if err := addHermesFingerprintPart(h, "state", stateDB, stateInfo); err != nil { + return SourceFingerprint{}, err + } + _, sessionsDir, _ := hermesStatePaths(stateDB) + for _, file := range discoverHermesTranscriptFiles(sessionsDir) { + info, err := os.Stat(file.Path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", file.Path, err) + } + fingerprint.Size += info.Size() + if mtime := info.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if err := addHermesFingerprintPart(h, "transcript", file.Path, info); err != nil { + return SourceFingerprint{}, err + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +// hermesArchiveEffectiveFileInfo returns the aggregate size and mtime of a +// Hermes archive: the state.db plus every transcript file in its sessions +// directory. It reproduces the legacy engine's hermesArchiveEffectiveInfo so a +// transcript-only change shifts the stored archive freshness even though the +// state.db itself is unchanged. The transcript set matches the legacy +// hermesArchiveTranscriptFiles: every .jsonl and session_*.json file directly +// under the sessions directory, without the .jsonl/.json dedup used elsewhere. +func hermesArchiveEffectiveFileInfo(stateDB string) (int64, int64) { + info, err := os.Stat(stateDB) + if err != nil { + return 0, 0 + } + size := info.Size() + mtime := info.ModTime().UnixNano() + _, sessionsDir, ok := hermesStatePaths(stateDB) + if !ok { + return size, mtime + } + for _, path := range hermesArchiveTranscriptFiles(sessionsDir) { + fileInfo, err := os.Stat(path) + if err != nil || fileInfo == nil || fileInfo.IsDir() { + continue + } + size += fileInfo.Size() + if fileMtime := fileInfo.ModTime().UnixNano(); fileMtime > mtime { + mtime = fileMtime + } + } + return size, mtime +} + +// hermesArchiveTranscriptFiles lists every .jsonl and session_*.json file +// directly under sessionsDir, sorted by path. It mirrors the legacy engine +// helper of the same name so the provider's effective-info aggregation matches +// historical behavior exactly. +func hermesArchiveTranscriptFiles(sessionsDir string) []string { + if sessionsDir == "" { + return nil + } + entries, err := os.ReadDir(sessionsDir) + if err != nil { + return nil + } + paths := make([]string, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() { + continue + } + name := entry.Name() + if strings.HasSuffix(name, ".jsonl") || + strings.HasPrefix(name, "session_") && strings.HasSuffix(name, ".json") { + paths = append(paths, filepath.Join(sessionsDir, name)) + } + } + sort.Strings(paths) + return paths +} + +func addHermesFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func hermesProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Relationships: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/hermes_provider_test.go b/internal/parser/hermes_provider_test.go new file mode 100644 index 000000000..1beec4f7d --- /dev/null +++ b/internal/parser/hermes_provider_test.go @@ -0,0 +1,472 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHermesProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentHermes) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestHermesProviderTranscriptSourceMethods(t *testing.T) { + root := t.TempDir() + jsonlPath := filepath.Join(root, "child.jsonl") + jsonPath := filepath.Join(root, "session_jsononly.json") + writeSourceFile(t, jsonlPath, hermesProviderJSONLFixture("jsonl question")) + writeSourceFile(t, jsonPath, hermesProviderJSONFixture("json question")) + writeSourceFile(t, filepath.Join(root, "scratch.json"), "{}\n") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db", "*.jsonl", "session_*.json"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{jsonlPath, jsonPath}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~hermes:child", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, jsonlPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "jsononly", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, jsonPath, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, jsonPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: jsonlPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, jsonlPath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(jsonlPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: jsonlPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, jsonlPath, changed[0].DisplayPath) + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "scratch.json"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) +} + +func TestHermesProviderStateDBSourceMethods(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + createHermesStateDB(t, root) + transcriptPath := filepath.Join(sessionsDir, "session_child.json") + writeSourceFile(t, transcriptPath, hermesProviderJSONFixture("transcript question")) + stateDB := filepath.Join(root, "state.db") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, stateDB, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~hermes:child", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, stateDB, found.DisplayPath) + + stateInfo, err := os.Stat(stateDB) + require.NoError(t, err) + transcriptInfo, err := os.Stat(transcriptPath) + require.NoError(t, err) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, stateDB, fingerprint.Key) + assert.Equal(t, stateInfo.Size()+transcriptInfo.Size(), fingerprint.Size) + assert.Equal( + t, + max(stateInfo.ModTime().UnixNano(), transcriptInfo.ModTime().UnixNano()), + fingerprint.MTimeNS, + ) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + }{ + {name: "state db", path: stateDB}, + {name: "archive transcript", path: transcriptPath}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tc.path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) + } + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + + require.NoError(t, os.Remove(transcriptPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: transcriptPath, EventKind: "remove", WatchRoot: sessionsDir}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + + require.NoError(t, os.Remove(stateDB)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) +} + +func TestHermesProviderArchiveWatchRoots(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + createHermesStateDB(t, root) + stateDB := filepath.Join(root, "state.db") + + for _, tc := range []struct { + name string + configRoot string + }{ + {name: "archive parent", configRoot: root}, + {name: "sessions directory", configRoot: sessionsDir}, + {name: "state db file", configRoot: stateDB}, + } { + t.Run(tc.name, func(t *testing.T) { + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{tc.configRoot}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) + } +} + +func TestHermesProviderArchiveWatchRootsBeforeArchiveComplete(t *testing.T) { + t.Run("state db exists before sessions directory", func(t *testing.T) { + root := t.TempDir() + createHermesStateDB(t, root) + stateDB := filepath.Join(root, "state.db") + sessionsDir := filepath.Join(root, "sessions") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) + + t.Run("direct state db root before file exists", func(t *testing.T) { + root := t.TempDir() + stateDB := filepath.Join(root, "state.db") + sessionsDir := filepath.Join(root, "sessions") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{stateDB}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + createHermesStateDB(t, root) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) + + t.Run("sessions directory root before state db exists", func(t *testing.T) { + root := t.TempDir() + stateDB := filepath.Join(root, "state.db") + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{sessionsDir}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + createHermesStateDB(t, root) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) +} + +func TestHermesProviderParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "child.jsonl") + writeSourceFile(t, sourcePath, hermesProviderJSONLFixture("parse question")) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "hermes:child", result.Result.Session.ID) + assert.Equal(t, AgentHermes, result.Result.Session.Agent) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sourcePath, result.Result.Session.File.Path) + assert.Equal(t, "abc123", result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestHermesProviderParseStateDB(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + createHermesStateDB(t, root) + transcriptPath := filepath.Join(sessionsDir, "session_child.json") + writeSourceFile( + t, + transcriptPath, + hermesProviderJSONFixture("archive transcript"), + ) + stateDB := filepath.Join(root, "state.db") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: stateDB, Hash: "archive-hash"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.True(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "hermes:child", result.Result.Session.ID) + assert.Equal(t, "hermes:parent", result.Result.Session.ParentSessionID) + assert.Equal(t, RelContinuation, result.Result.Session.RelationshipType) + assert.Equal(t, "Child Session", result.Result.Session.SessionName) + assert.Equal(t, "hermes-state-db", result.Result.Session.SourceVersion) + assert.Equal(t, "devbox", result.Result.Session.Machine) + require.Len(t, result.Result.UsageEvents, 1) + assert.Len(t, result.Result.Messages, 2) + + // The provider reproduces the legacy engine's stampHermesArchiveResults: + // every archive session's stored file identity is the state.db path with + // the aggregate (state.db plus transcripts) size and mtime, so a + // transcript-only change still refreshes the archive's freshness. + stateInfo, err := os.Stat(stateDB) + require.NoError(t, err) + transcriptInfo, err := os.Stat(transcriptPath) + require.NoError(t, err) + assert.Equal(t, stateDB, result.Result.Session.File.Path) + assert.Equal( + t, + stateInfo.Size()+transcriptInfo.Size(), + result.Result.Session.File.Size, + ) + assert.Equal( + t, + max(stateInfo.ModTime().UnixNano(), transcriptInfo.ModTime().UnixNano()), + result.Result.Session.File.Mtime, + ) +} + +func TestHermesProviderFindSourceDoesNotReturnStateDBForMissingRawID(t *testing.T) { + root := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(root, "sessions"), 0o755)) + createHermesStateDB(t, root) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "missing-valid-id", + }) + + require.NoError(t, err) + assert.False(t, ok) + assert.Empty(t, source) +} + +func hermesProviderJSONLFixture(firstMessage string) string { + return `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}` + "\n" + + `{"role":"user","content":"` + firstMessage + `","timestamp":"2026-05-14T10:01:00.000000"}` + "\n" + + `{"role":"assistant","content":"Done.","timestamp":"2026-05-14T10:02:00.000000"}` + "\n" +} + +func hermesProviderJSONFixture(firstMessage string) string { + return `{ + "platform":"cli", + "session_start":"2026-05-14T10:00:00Z", + "last_updated":"2026-05-14T10:02:00Z", + "messages":[ + {"role":"user","content":"` + firstMessage + `","timestamp":"2026-05-14T10:01:00Z"}, + {"role":"assistant","content":"Done.","timestamp":"2026-05-14T10:02:00Z"} + ] + }` +} diff --git a/internal/parser/hermes_test.go b/internal/parser/hermes_test.go index feae3e089..c1bd94ea4 100644 --- a/internal/parser/hermes_test.go +++ b/internal/parser/hermes_test.go @@ -13,6 +13,57 @@ import ( "github.com/stretchr/testify/require" ) +// newHermesTestProvider builds a concrete hermesProvider for the given roots so +// package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods. +func newHermesTestProvider(t *testing.T, roots ...string) *hermesProvider { + t.Helper() + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + hp, ok := provider.(*hermesProvider) + require.True(t, ok) + return hp +} + +// parseHermesTestSession parses a Hermes transcript at path through the +// provider-owned parse method, replacing the removed package-level +// ParseHermesSession entrypoint. +func parseHermesTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newHermesTestProvider(t).parseSession(path, project, machine) +} + +// parseHermesTestArchive parses a Hermes archive root through the provider-owned +// archive method, replacing the removed package-level ParseHermesArchive +// entrypoint. +func parseHermesTestArchive( + t *testing.T, root, project, machine string, +) ([]ParseResult, error) { + t.Helper() + return newHermesTestProvider(t).parseArchive(root, project, machine) +} + +// discoverHermesTestSessions discovers Hermes sources under root through the +// provider source set, replacing the removed package-level +// DiscoverHermesSessions entrypoint. +func discoverHermesTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + return discoverHermesSessions(root) +} + +// findHermesTestSourceFile resolves a Hermes session ID to a transcript path +// through the provider source set, replacing the removed package-level +// FindHermesSourceFile entrypoint. +func findHermesTestSourceFile(t *testing.T, sessionsDir, sessionID string) string { + t.Helper() + return findHermesSourceFile(sessionsDir, sessionID) +} + func runHermesJSONLTest( t *testing.T, filename, content string, ) (*ParsedSession, []ParsedMessage) { @@ -21,8 +72,8 @@ func runHermesJSONLTest( filename = "20260403_153620_5a3e2ff1.jsonl" } path := createTestFile(t, filename, content) - sess, msgs, err := ParseHermesSession( - path, "", "local", + sess, msgs, err := parseHermesTestSession( + t, path, "", "local", ) require.NoError(t, err) return sess, msgs @@ -36,8 +87,8 @@ func runHermesJSONTest( filename = "session_20260403_153620_5a3e2ff1.json" } path := createTestFile(t, filename, content) - sess, msgs, err := ParseHermesSession( - path, "", "local", + sess, msgs, err := parseHermesTestSession( + t, path, "", "local", ) require.NoError(t, err) return sess, msgs @@ -135,7 +186,7 @@ func TestParseHermesArchive_StateDBMetadataUsageAndTranscriptChoice( 0o644, )) - results, err := ParseHermesArchive(root, "", "local") + results, err := parseHermesTestArchive(t, root, "", "local") require.NoError(t, err) require.Len(t, results, 1) @@ -183,7 +234,7 @@ func TestParseHermesArchive_FallsBackToTranscriptsWhenStateDBUnreadable( 0o644, )) - results, err := ParseHermesArchive(root, "override-project", "local") + results, err := parseHermesTestArchive(t, root, "override-project", "local") require.NoError(t, err) require.Len(t, results, 1) @@ -211,7 +262,7 @@ func TestParseHermesArchive_UsesStateMessagesWhenJSONLIsLowerQuality( 0o644, )) - results, err := ParseHermesArchive(root, "", "local") + results, err := parseHermesTestArchive(t, root, "", "local") require.NoError(t, err) require.Len(t, results, 1) @@ -240,7 +291,7 @@ func TestParseHermesArchiveIncludesTranscriptsMissingFromStateDB( 0o644, )) - results, err := ParseHermesArchive(root, "", "local") + results, err := parseHermesTestArchive(t, root, "", "local") require.NoError(t, err) require.Len(t, results, 2) @@ -408,7 +459,7 @@ func TestDiscoverHermesSessionsFindsTranscriptOnlyRoot( path := filepath.Join(sessionsDir, "session_child.json") require.NoError(t, os.WriteFile(path, []byte(`{"messages":[]}`), 0o644)) - files := DiscoverHermesSessions(root) + files := discoverHermesTestSessions(t, root) require.Len(t, files, 1) assert.Equal(t, path, files[0].Path) } @@ -530,8 +581,8 @@ func TestParseHermesSession_JSONL_ExplicitProject(t *testing.T) { path := createTestFile( t, "20260403_153620_abc.jsonl", content, ) - sess, _, err := ParseHermesSession( - path, "my-project", "local", + sess, _, err := parseHermesTestSession( + t, path, "my-project", "local", ) require.NoError(t, err) require.NotNil(t, sess) @@ -638,8 +689,8 @@ func TestParseHermesSession_JSONL_FirstMessageTruncation(t *testing.T) { func TestParseHermesSession_JSONL_Errors(t *testing.T) { t.Run("missing file", func(t *testing.T) { - _, _, err := ParseHermesSession( - "/nonexistent/file.jsonl", "", "local", + _, _, err := parseHermesTestSession( + t, "/nonexistent/file.jsonl", "", "local", ) assert.Error(t, err) }) @@ -767,8 +818,8 @@ func TestParseHermesSession_JSON_MessageTimestampsExtendBounds( func TestParseHermesSession_JSON_Errors(t *testing.T) { t.Run("missing file", func(t *testing.T) { - _, _, err := ParseHermesSession( - "/nonexistent/file.json", "", "local", + _, _, err := parseHermesTestSession( + t, "/nonexistent/file.json", "", "local", ) assert.Error(t, err) }) @@ -777,7 +828,7 @@ func TestParseHermesSession_JSON_Errors(t *testing.T) { path := createTestFile( t, "session_bad.json", `"just a string"`, ) - _, _, err := ParseHermesSession(path, "", "local") + _, _, err := parseHermesTestSession(t, path, "", "local") assert.Error(t, err) assert.Contains(t, err.Error(), "invalid JSON") }) @@ -1025,7 +1076,7 @@ func TestDiscoverHermesSessions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverHermesSessions(dir) + files := discoverHermesTestSessions(t, dir) assertDiscoveredFiles( t, files, tt.wantFiles, AgentHermes, ) @@ -1033,13 +1084,13 @@ func TestDiscoverHermesSessions(t *testing.T) { } t.Run("empty string dir", func(t *testing.T) { - files := DiscoverHermesSessions("") + files := discoverHermesTestSessions(t, "") assert.Nil(t, files) }) t.Run("nonexistent dir", func(t *testing.T) { - files := DiscoverHermesSessions( - filepath.Join(t.TempDir(), "nope"), + files := discoverHermesTestSessions( + t, filepath.Join(t.TempDir(), "nope"), ) assert.Nil(t, files) }) @@ -1088,7 +1139,7 @@ func TestFindHermesSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindHermesSourceFile(dir, tt.sessionID) + got := findHermesTestSourceFile(t, dir, tt.sessionID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -1105,7 +1156,7 @@ func TestFindHermesSourceFile(t *testing.T) { "20260403_aaa.jsonl": "{}", }) for _, id := range []string{"", "../etc/passwd", "a/b", "a b"} { - got := FindHermesSourceFile(dir, id) + got := findHermesTestSourceFile(t, dir, id) if got != "" { t.Errorf( "FindHermesSourceFile(%q) = %q, want empty", @@ -1172,8 +1223,13 @@ func TestHermesRegistryEntry(t *testing.T) { assert.Equal(t, "hermes:", found.IDPrefix) assert.True(t, found.FileBased) assert.Contains(t, found.DefaultDirs, ".hermes/sessions") - assert.NotNil(t, found.DiscoverFunc) - assert.NotNil(t, found.FindSourceFunc) + // Hermes is provider-authoritative: discovery and source lookup live on the + // hermesProvider, not on legacy AgentDef hooks. The watch-root resolvers + // stay because they are provider-owned and consumed by watcher setup. + assert.Nil(t, found.DiscoverFunc) + assert.Nil(t, found.FindSourceFunc) + assert.NotNil(t, found.WatchRootsFunc) + assert.NotNil(t, found.ShallowWatchRootsFunc) } // --- File info --- @@ -1190,7 +1246,7 @@ func TestParseHermesSession_FileInfo(t *testing.T) { info, err := os.Stat(path) require.NoError(t, err) - sess, _, err := ParseHermesSession(path, "", "local") + sess, _, err := parseHermesTestSession(t, path, "", "local") require.NoError(t, err) require.NotNil(t, sess) diff --git a/internal/parser/provider.go b/internal/parser/provider.go index e01b866c8..d1eeed0f6 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -357,6 +357,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newCursorProviderFactory(def) case AgentDeepSeekTUI: return newDeepSeekTUIProviderFactory(def) + case AgentHermes: + return newHermesProviderFactory(def) case AgentIflow: return newIflowProviderFactory(def) case AgentGptme: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 370aa91f1..0a563f57d 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -45,7 +45,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentKiro: ProviderMigrationLegacyOnly, AgentKiroIDE: ProviderMigrationLegacyOnly, AgentCortex: ProviderMigrationProviderAuthoritative, - AgentHermes: ProviderMigrationLegacyOnly, + AgentHermes: ProviderMigrationProviderAuthoritative, AgentWorkBuddy: ProviderMigrationProviderAuthoritative, AgentForge: ProviderMigrationLegacyOnly, AgentPiebald: ProviderMigrationLegacyOnly, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 1e774457f..e2698ba99 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -55,7 +55,6 @@ var pendingShimProviderFiles = map[string]bool{ "cowork_provider.go": true, "db_backed_provider.go": true, "gemini_provider.go": true, - "hermes_provider.go": true, "kiro_ide_provider.go": true, "kiro_provider.go": true, "openhands_provider.go": true, diff --git a/internal/parser/types.go b/internal/parser/types.go index 3ec9ebd00..d7af74fcb 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -430,15 +430,15 @@ var Registry = []AgentDef{ FileBased: true, }, { - Type: AgentHermes, - DisplayName: "Hermes Agent", - EnvVar: "HERMES_SESSIONS_DIR", - ConfigKey: "hermes_sessions_dirs", - DefaultDirs: []string{".hermes/sessions"}, - IDPrefix: "hermes:", - FileBased: true, - DiscoverFunc: DiscoverHermesSessions, - FindSourceFunc: FindHermesSourceFile, + Type: AgentHermes, + DisplayName: "Hermes Agent", + EnvVar: "HERMES_SESSIONS_DIR", + ConfigKey: "hermes_sessions_dirs", + DefaultDirs: []string{".hermes/sessions"}, + IDPrefix: "hermes:", + FileBased: true, + WatchRootsFunc: ResolveHermesWatchRoots, + ShallowWatchRootsFunc: ResolveHermesShallowWatchRoots, }, { Type: AgentWorkBuddy, diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 5cdb11644..2825cb59e 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -3990,8 +3990,6 @@ func (e *Engine) processFile( res = e.processKiro(file, info) case parser.AgentKiroIDE: res = e.processKiroIDE(file, info) - case parser.AgentHermes: - res = e.processHermes(file, info) case parser.AgentPositron: res = e.processPositron(file, info) case parser.AgentZed: @@ -5950,45 +5948,6 @@ func (e *Engine) processKiroIDE( } } -func (e *Engine) processHermes( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - if filepath.Base(file.Path) == "state.db" { - results, err := parser.ParseHermesArchive( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - return processResult{results: results, forceReplace: true} - } - - sess, msgs, err := parser.ParseHermesSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - // vibeEffectiveInfo returns size/mtime for a Vibe session that account // for the sibling meta.json file: size is the sum of both files, and // mtime is the larger of the two. Returns info unchanged when meta.json @@ -8472,23 +8431,6 @@ func (e *Engine) SyncSingleSessionContext( } return err } - if def.Type == parser.AgentHermes { - hermesProject := "" - if sess, _ := e.db.GetSession(ctx, sessionID); sess != nil && - sess.Project != "" && !parser.NeedsProjectReparse(sess.Project) { - hermesProject = sess.Project - } - ok, err := e.syncSingleHermesArchive( - sessionID, path, hermesProject, - ) - if err != nil { - return err - } - if ok { - return nil - } - } - agent := def.Type // Clear skip cache so explicit re-sync always processes @@ -8677,52 +8619,6 @@ func (e *Engine) SyncSingleSessionContext( return nil } -func (e *Engine) syncSingleHermesArchive( - sessionID, path, project string, -) (bool, error) { - stateDB := "" - if filepath.Base(path) == "state.db" { - stateDB = path - } else if filepath.Base(filepath.Dir(path)) == "sessions" { - candidate := filepath.Join( - filepath.Dir(filepath.Dir(path)), "state.db", - ) - if parser.IsRegularFile(candidate) { - stateDB = candidate - } - } - if stateDB == "" { - return false, nil - } - - results, err := parser.ParseHermesArchive( - stateDB, project, e.machine, - ) - if err != nil { - return true, err - } - for _, pr := range results { - if pr.Session.ID != sessionID { - continue - } - if err := e.writeSessionFull(pendingWrite{ - sess: pr.Session, - msgs: pr.Messages, - usageEvents: pr.UsageEvents, - }); err != nil && !isIntentionalSessionSkip(err) && - !errors.Is(err, errSessionPreserved) { - return true, fmt.Errorf( - "write session %s: %w", pr.Session.ID, err, - ) - } - return true, nil - } - return true, fmt.Errorf( - "session %s not found in Hermes archive %s", - sessionID, stateDB, - ) -} - func (e *Engine) applyWorktreeMappingToSingleSession( sessionID string, ) error { diff --git a/internal/sync/hermes_archive_test.go b/internal/sync/hermes_archive_test.go new file mode 100644 index 000000000..58026330c --- /dev/null +++ b/internal/sync/hermes_archive_test.go @@ -0,0 +1,318 @@ +package sync + +import ( + "context" + "database/sql" + "os" + "path/filepath" + "testing" + "time" + + _ "github.com/mattn/go-sqlite3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" +) + +// hermesArchiveAggregateFileInfo mirrors the legacy engine helper +// hermesArchiveEffectiveInfo for test assertions: the aggregate size and mtime +// of the state.db plus every transcript directly under its sessions directory. +// The Hermes provider now owns this aggregation; this helper only computes the +// expected values the engine must persist. +func hermesArchiveAggregateFileInfo(t *testing.T, stateDB string) (int64, int64) { + t.Helper() + info, err := os.Stat(stateDB) + require.NoError(t, err) + size := info.Size() + mtime := info.ModTime().UnixNano() + sessionsDir := filepath.Join(filepath.Dir(stateDB), "sessions") + entries, err := os.ReadDir(sessionsDir) + if err != nil { + return size, mtime + } + for _, entry := range entries { + if entry.IsDir() { + continue + } + name := entry.Name() + isJSONL := filepath.Ext(name) == ".jsonl" + isSessionJSON := filepath.Ext(name) == ".json" && + len(name) >= len("session_") && name[:len("session_")] == "session_" + if !isJSONL && !isSessionJSON { + continue + } + fileInfo, err := os.Stat(filepath.Join(sessionsDir, name)) + if err != nil || fileInfo.IsDir() { + continue + } + size += fileInfo.Size() + if fileMtime := fileInfo.ModTime().UnixNano(); fileMtime > mtime { + mtime = fileMtime + } + } + return size, mtime +} + +// TestHermesProviderFingerprintAggregatesDirectTranscripts confirms the +// provider-owned archive fingerprint folds the size and mtime of transcripts +// living directly under the sessions directory into the state.db's freshness +// identity, replacing the engine's removed hermesArchiveEffectiveInfo. +func TestHermesProviderFingerprintAggregatesDirectTranscripts(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile(transcriptPath, []byte("{}\n{}\n"), 0o644)) + + transcriptTime := time.Now().Add(2 * time.Second).Truncate(time.Second) + require.NoError(t, os.Chtimes(transcriptPath, transcriptTime, transcriptTime)) + + wantSize, wantMtime := hermesArchiveAggregateFileInfo(t, stateDB) + + provider, ok := parser.NewProvider(parser.AgentHermes, parser.ProviderConfig{ + Roots: []string{filepath.Join(root, "sessions")}, + Machine: "local", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + assert.Equal(t, wantSize, fingerprint.Size) + assert.Equal(t, wantMtime, fingerprint.MTimeNS) +} + +// TestHermesProviderFingerprintChangesWhenTranscriptRemoved confirms the +// archive fingerprint shrinks back to the state.db's own size when a direct +// transcript is removed, replacing the engine's removed effective-info logic. +func TestHermesProviderFingerprintChangesWhenTranscriptRemoved(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile(transcriptPath, []byte("{}\n{}\n"), 0o644)) + + provider, ok := parser.NewProvider(parser.AgentHermes, parser.ProviderConfig{ + Roots: []string{filepath.Join(root, "sessions")}, + Machine: "local", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + before, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + require.NoError(t, os.Remove(transcriptPath)) + after, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + stateInfo, err := os.Stat(stateDB) + require.NoError(t, err) + assert.NotEqual(t, before.Size, after.Size) + assert.Equal(t, stateInfo.Size(), after.Size) +} + +// TestProcessFileHermesArchiveSkipCacheUsesAggregateMtime confirms the +// provider-authoritative processFile path keys the skip cache on the aggregate +// archive mtime (state.db plus direct transcripts), so a cached entry stamped +// with that mtime short-circuits a reparse. +func TestProcessFileHermesArchiveSkipCacheUsesAggregateMtime(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile(transcriptPath, []byte("{}\n"), 0o644)) + transcriptTime := time.Now().Add(2 * time.Second).Truncate(time.Second) + require.NoError(t, os.Chtimes(transcriptPath, transcriptTime, transcriptTime)) + + _, wantMtime := hermesArchiveAggregateFileInfo(t, stateDB) + + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + engine.InjectSkipCache(map[string]int64{ + stateDB: wantMtime, + }) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: stateDB, + Agent: parser.AgentHermes, + }) + + require.NoError(t, res.err) + assert.True(t, res.skip) + assert.True(t, res.cacheSkip) + assert.Equal(t, wantMtime, res.mtime) +} + +// TestProcessFileHermesArchivePersistsAggregateFingerprint confirms the +// provider-authoritative processFile path stamps every archive session with the +// state.db path and the aggregate size and mtime, and that a second pass skips +// once the file info is persisted. This replaces the removed +// processHermes-based assertions. +func TestProcessFileHermesArchivePersistsAggregateFingerprint(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile( + transcriptPath, + []byte( + `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}`+"\n"+ + `{"role":"user","content":"new transcript","timestamp":"2026-05-14T10:01:00.000000"}`+"\n", + ), + 0o644, + )) + + wantSize, wantMtime := hermesArchiveAggregateFileInfo(t, stateDB) + database := dbtest.OpenTestDB(t) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: stateDB, + Agent: parser.AgentHermes, + }) + + require.NoError(t, res.err) + require.NotEmpty(t, res.results) + for _, result := range res.results { + assert.Equal(t, stateDB, result.Session.File.Path) + assert.Equal(t, wantSize, result.Session.File.Size) + assert.Equal(t, wantMtime, result.Session.File.Mtime) + } + + pending := make([]pendingWrite, 0, len(res.results)) + for _, result := range res.results { + pending = append(pending, pendingWrite{ + sess: result.Session, + msgs: result.Messages, + usageEvents: result.UsageEvents, + }) + } + written, _, failed := engine.writeBatch(pending, syncWriteDefault, true) + require.Equal(t, 0, failed) + require.NotZero(t, written) + + storedSize, storedMtime, ok := database.GetFileInfoByPath(stateDB) + require.True(t, ok) + assert.Equal(t, wantSize, storedSize) + assert.Equal(t, wantMtime, storedMtime) +} + +// TestSyncPathsHermesArchiveTranscriptPersistsAggregateFingerprint confirms that +// syncing a transcript path inside an archive routes through the provider, which +// reparses the whole archive and persists the aggregate file info under the +// state.db path. This replaces the removed syncSingleHermesArchive coverage. +func TestSyncPathsHermesArchiveTranscriptPersistsAggregateFingerprint(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile( + transcriptPath, + []byte( + `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}`+"\n"+ + `{"role":"user","content":"new transcript","timestamp":"2026-05-14T10:01:00.000000"}`+"\n", + ), + 0o644, + )) + + wantSize, wantMtime := hermesArchiveAggregateFileInfo(t, stateDB) + database := dbtest.OpenTestDB(t) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + + engine.SyncPaths([]string{transcriptPath}) + + storedSize, storedMtime, found := database.GetFileInfoByPath(stateDB) + require.True(t, found) + assert.Equal(t, wantSize, storedSize) + assert.Equal(t, wantMtime, storedMtime) +} + +func writeHermesArchiveStateDB(t *testing.T, root string) string { + t.Helper() + stateDB := filepath.Join(root, "state.db") + conn, err := sql.Open("sqlite3", stateDB) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + _, err = conn.Exec(` + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT NOT NULL, + user_id TEXT, + model TEXT, + model_config TEXT, + system_prompt TEXT, + parent_session_id TEXT, + started_at REAL NOT NULL, + ended_at REAL, + end_reason TEXT, + message_count INTEGER DEFAULT 0, + tool_call_count INTEGER DEFAULT 0, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0, + cache_read_tokens INTEGER DEFAULT 0, + cache_write_tokens INTEGER DEFAULT 0, + reasoning_tokens INTEGER DEFAULT 0, + billing_provider TEXT, + billing_base_url TEXT, + billing_mode TEXT, + estimated_cost_usd REAL, + actual_cost_usd REAL, + cost_status TEXT, + cost_source TEXT, + pricing_version TEXT, + title TEXT, + api_call_count INTEGER DEFAULT 0 + ); + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT, + tool_call_id TEXT, + tool_calls TEXT, + tool_name TEXT, + timestamp REAL NOT NULL, + token_count INTEGER, + finish_reason TEXT, + reasoning TEXT, + reasoning_content TEXT, + reasoning_details TEXT, + codex_reasoning_items TEXT, + codex_message_items TEXT + ); + INSERT INTO sessions ( + id, source, model, started_at, ended_at, message_count + ) VALUES ( + 'child', 'discord', 'gpt-5.4', 1778767200.0, 1778767800.0, 1 + ); + INSERT INTO messages ( + session_id, role, content, timestamp + ) VALUES ( + 'child', 'user', 'state db message', 1778767210.0 + ); + `) + require.NoError(t, err) + return stateDB +} diff --git a/internal/sync/hermes_integration_test.go b/internal/sync/hermes_integration_test.go new file mode 100644 index 000000000..439d6e0b7 --- /dev/null +++ b/internal/sync/hermes_integration_test.go @@ -0,0 +1,152 @@ +package sync_test + +import ( + "database/sql" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/db" + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/sync" +) + +func TestSyncPathsHermesStateDBEventRefreshesArchive(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + root := t.TempDir() + stateDB := writeHermesSyncStateDB(t, root) + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + + engine.SyncPaths([]string{stateDB}) + + assertSessionState(t, database, "hermes:child", func(sess *db.Session) { + assert.Equal(t, string(parser.AgentHermes), sess.Agent) + assert.Equal(t, "hermes-discord", sess.Project) + require.NotNil(t, sess.DisplayName) + assert.Equal(t, "Child Session", *sess.DisplayName) + }) +} + +func TestSyncPathsHermesArchiveTranscriptEventRefreshesArchive(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + root := t.TempDir() + stateDB := writeHermesSyncStateDB(t, root) + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + engine.SyncPaths([]string{stateDB}) + assertSessionState(t, database, "hermes:child", nil) + + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile( + transcriptPath, + []byte( + `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}`+"\n"+ + `{"role":"user","content":"new transcript","timestamp":"2026-05-14T10:01:00.000000"}`+"\n"+ + `{"role":"assistant","content":"Done.","timestamp":"2026-05-14T10:02:00.000000"}`+"\n", + ), + 0o644, + )) + + engine.SyncPaths([]string{transcriptPath}) + + assertSessionState(t, database, "hermes:extra", func(sess *db.Session) { + require.NotNil(t, sess.FirstMessage) + assert.Equal(t, "new transcript", *sess.FirstMessage) + }) +} + +func writeHermesSyncStateDB(t *testing.T, root string) string { + t.Helper() + stateDB := filepath.Join(root, "state.db") + conn, err := sql.Open("sqlite3", stateDB) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + _, err = conn.Exec(` + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT NOT NULL, + user_id TEXT, + model TEXT, + model_config TEXT, + system_prompt TEXT, + parent_session_id TEXT, + started_at REAL NOT NULL, + ended_at REAL, + end_reason TEXT, + message_count INTEGER DEFAULT 0, + tool_call_count INTEGER DEFAULT 0, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0, + cache_read_tokens INTEGER DEFAULT 0, + cache_write_tokens INTEGER DEFAULT 0, + reasoning_tokens INTEGER DEFAULT 0, + billing_provider TEXT, + billing_base_url TEXT, + billing_mode TEXT, + estimated_cost_usd REAL, + actual_cost_usd REAL, + cost_status TEXT, + cost_source TEXT, + pricing_version TEXT, + title TEXT, + api_call_count INTEGER DEFAULT 0 + ); + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT, + tool_call_id TEXT, + tool_calls TEXT, + tool_name TEXT, + timestamp REAL NOT NULL, + token_count INTEGER, + finish_reason TEXT, + reasoning TEXT, + reasoning_content TEXT, + reasoning_details TEXT, + codex_reasoning_items TEXT, + codex_message_items TEXT + ); + INSERT INTO sessions ( + id, source, model, parent_session_id, started_at, ended_at, + message_count, input_tokens, output_tokens, cache_read_tokens, + cache_write_tokens, reasoning_tokens, estimated_cost_usd, + cost_status, cost_source, title, api_call_count + ) VALUES ( + 'child', 'discord', 'gpt-5.4', 'parent', + 1778767200.0, 1778767800.0, 1, 300, 70, 20, 5, 9, + 0.123, 'estimated', 'hermes', 'Child Session', 4 + ); + INSERT INTO messages ( + session_id, role, content, timestamp + ) VALUES ( + 'child', 'user', 'state db only has one message', 1778767210.0 + ); + `) + require.NoError(t, err) + return stateDB +} From ec4b2051be346c42bde08b5820024b99fd692b29 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:07:51 -0400 Subject: [PATCH 05/11] feat(parser): migrate claude provider Claude has both regular project transcripts and nested subagent transcripts, plus an existing append-only incremental parser. Moving it behind a concrete provider keeps those source shapes and optional incremental capability explicit at the provider boundary.\n\nThe provider preserves recursive project discovery, symlinked project directories, standard and subagent raw-ID lookup, changed-path classification, content hashing, project-name normalization, excluded-session reporting, relationship inference, and incremental append parsing for linear JSONL growth. fix(parser): preserve claude provider edge events Claude provider sync must distinguish true append idleness from files that were truncated or replaced, and watcher classification must still identify deleted primary and subagent transcripts after the file is gone. Otherwise provider-path sync can retain stale messages or miss removals. Return full-parse status for truncated incremental inputs, add missing-path classification for valid Claude source shapes, and make raw subagent lookup follow symlinked project directories like discovery does. This branch now opts Claude into shadow comparison. Validation: go test -tags "fts5" ./internal/parser -run 'Test(ClaudeProvider|FindClaudeSourceFile|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(sync): replace claude content after file rewrites Claude incremental parsing is append-oriented, so any fallback caused by truncation or file replacement must replace persisted messages instead of flowing through the append-preserving write path. Otherwise stale higher ordinals or stale tool rows can survive a full parse fallback. The provider now marks truncated incremental inputs as force-replace, and the legacy engine path carries forceReplace when file identity changes or the file shrinks before falling back to a full parse. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestClaudeProviderParseIncremental|TestIncrementalSync_Claude(FileReplaced|TruncatedFileReplacesStoredMessages|SameSizeFileReplaceUsesFullParse|MidStreamSplitFallsBackToFullParse|AgentIDFallbackUpdatesStoredToolCall)' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check fix(sync): replace claude same-size rewrites A same-size rewrite can reach the full-parse fallback when the normal skip check did not skip the file, which means the content changed even though the byte count did not. That fallback must replace persisted rows, or stale higher ordinals and tool rows can survive the parse. The regression rewrites a Claude file in place to the same byte length with fewer logical messages and verifies the stale assistant row is deleted. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesClaudeLegacyParser|TestClaudeProviderParseIncremental|TestIncrementalSync_Claude(FileReplaced|TruncatedFileReplacesStoredMessages|SameSizeFileReplaceUsesFullParse|SameSizeInPlaceRewriteClearsStaleRows|MidStreamSplitFallsBackToFullParse|AgentIDFallbackUpdatesStoredToolCall)' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check test(sync): compare claude shadow parity Claude is shadow-compared on this branch, so add source-level migration coverage that compares provider observation with ParseClaudeSessionWithExclusions. The fixture exercises the project-directory source shape and verifies session, message, usage, exclusion, and data-version planning parity while preserving provider-computed file hashes. Validation: go test -tags "fts5" ./internal/sync -run TestObserveProviderSourceMatchesClaudeLegacyParser -count=1 test(sync): cover claude provider usage exclusions Roborev job 2721 caught that the Claude shadow parity fixture only compared a plain exchange, so it did not prove provider parity for per-message token usage or /usage-only session exclusions. Add assistant message usage metadata to the normal fixture and a separate /usage-only source discovered by the provider, then assert non-empty token metadata and excluded IDs against the legacy parser. Validation: go test -tags "fts5" ./internal/sync -run TestObserveProviderSourceMatchesClaudeLegacyParser -count=1; go fmt ./...; go vet ./...; git diff --check refactor(parser): fold claude into provider Move Claude source discovery, lookup, full parse, exclusion handling, and append-only incremental parse ownership onto the concrete claudeProvider and delete the package-level DiscoverClaudeProjects, FindClaudeSourceFile, ParseClaudeSessionFrom, and ParseClaudeSessionWithExclusions free functions. The discover and find-source bodies stay as provider-neutral helpers (ClaudeProjectSessionFiles, claudeFindSourceFile) and the parse bodies become claudeParseWithExclusions and claudeParseSessionFrom; the public ParseClaudeSession wrapper and the Cowork parser (which reuses the Claude transcript format) call the shared helper, so no provider file references a legacy Discover/Find/Parse entrypoint. Make Claude provider-authoritative and drop its legacy sync dispatch: the classifyOnePath Claude block, the processFile case arm, and the processClaude method. Source classification, project resolution, and exclusion handling are reproduced through the provider's changed-path and parse paths. The provider's SourcesForChangedPath also reproduces the legacy "classify despite a transient stat error" behavior so a changed path under a momentarily unreadable parent is not dropped. Wire the provider-authoritative engine path to preserve Claude's DB-aware single-file semantics, which a stateless provider cannot do alone: - tryProviderIncrementalAppend drives the provider's ParseIncremental through the shared tryIncrementalJSONL bookkeeping (session lookup, data-version and inode/device identity guards, ordinal resume, cross-sync split detection, cumulative counters, and forceReplace fallback), so append-only syncs keep the stored file hash and append rows instead of recomputing and rewriting. - providerSingleSessionFresh reproduces the shouldSkipFile gate so an unchanged, already-synced session is skipped instead of re-parsed every full sync and a single-session resync does not reapply a worktree project mapping to an unchanged file. - stampProviderFileIdentity stamps inode/device on parsed results so the incremental path can later detect an atomic file replacement. - processProviderFile honors a caller-supplied file.Project as the source ProjectHint when no explicit ProviderSource was given, so a SyncSingleSession does not revert a user's project override. The engine's expandClaudeDuplicateCandidates and dedupeClaudeDiscoveredFiles stay as provider-neutral engine-level dedup plumbing; expansion now enumerates via ClaudeProjectSessionFiles. The duplicate-candidate expansion and session-ID dedup/precedence behavior is unchanged. Because dropping the Claude DiscoverFunc would otherwise remove Claude from surfaces that gate on DiscoverFunc != nil, parse-diff (engine and CLI flag validation) and the SSH remote resolve script now also include file-based agents that have left legacy-only mode through the provider facade, restoring Claude (and the other already-folded agents) to those surfaces. Drop the Claude AgentDef DiscoverFunc/FindSourceFunc hooks, set its provider migration mode to ProviderAuthoritative, remove claude_provider.go from the pending shim scan list, replace the shadow baseline test with provider-API coverage plus a guard asserting the four legacy entrypoints stay gone, and re-vehicle the generic shadow-mechanism caller tests onto the still-legacy Cowork agent since Claude no longer has a legacy process arm to observe in shadow. refactor(parser): fold ParseClaudeSession onto the Claude provider Delete the ParseClaudeSession free function and route its only production caller (the session upload handler) plus the test suite through the Claude provider's new ParseUploadedTranscript method, exposed via the ClaudeUploadParser interface. Uploads live outside any configured root, so the method parses the staged transcript directly under the caller-supplied project. That project stays authoritative rather than being overridden by the transcript's recorded cwd, matching the prior upload behavior and unlike the discovered-session Parse path. Unexport ClassifyClaudeSystemMessage to classifyClaudeSystemMessage; it is a Claude-internal classifier with no callers outside the package. Both removals clear the last provider-specific legacy parse/classify entrypoints this branch owned. fix(sync): skip fresh claude before fingerprinting The Claude provider migration preserved DB freshness skipping, but only after provider fingerprinting had already hashed the whole transcript. That lost the legacy cheap size/mtime/data-version gate for unchanged files.\n\nRun the single-session freshness check before provider fingerprinting, and pass the computed fingerprint into incremental parsing so truncation detection can distinguish appended files from zero-byte rewrites. Zero-byte truncation now forces a full replacement parse instead of reporting no new data.\n\nValidation: go test -tags "fts5" ./internal/parser -run 'TestClaudeProviderParseIncremental(Truncated|EmptyTruncation)NeedsFullParse' -count=1; go test -tags "fts5" ./internal/sync -run 'TestIncrementalSync_ClaudeAppend|TestProcessFileProviderAuthoritativeSkipsFreshClaudeBeforeFingerprint' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check --- internal/parser/claude.go | 79 +-- internal/parser/claude_parser_test.go | 16 +- internal/parser/claude_provider.go | 542 +++++++++++++++++++ internal/parser/claude_provider_test.go | 350 ++++++++++++ internal/parser/claude_subagent_test.go | 2 +- internal/parser/claude_test.go | 8 +- internal/parser/cowork.go | 4 +- internal/parser/discovery.go | 21 +- internal/parser/discovery_test.go | 12 +- internal/parser/fork_test.go | 2 +- internal/parser/parser_test.go | 6 +- internal/parser/provider.go | 7 + internal/parser/provider_migration.go | 2 +- internal/parser/provider_shim_scan_test.go | 3 - internal/parser/test_helpers_test.go | 24 +- internal/parser/types.go | 2 - internal/server/huma_routes_sessions.go | 17 +- internal/ssh/resolve.go | 31 +- internal/sync/engine.go | 310 ++++++++--- internal/sync/engine_integration_test.go | 100 ++++ internal/sync/engine_test.go | 12 +- internal/sync/provider_shadow_caller_test.go | 122 ++++- 22 files changed, 1505 insertions(+), 167 deletions(-) create mode 100644 internal/parser/claude_provider.go create mode 100644 internal/parser/claude_provider_test.go diff --git a/internal/parser/claude.go b/internal/parser/claude.go index 0cfe9821c..c5f134365 100644 --- a/internal/parser/claude.go +++ b/internal/parser/claude.go @@ -56,24 +56,17 @@ type claudeQueuedCommand struct { timestamp time.Time } -// ParseClaudeSession parses a Claude Code JSONL session file. -// Returns one or more ParseResult structs (multiple when forks -// are detected in the uuid/parentUuid DAG). -func ParseClaudeSession( - path, project, machine string, -) ([]ParseResult, error) { - results, _, err := ParseClaudeSessionWithExclusions( - path, project, machine, - ) - return results, err -} - -// ParseClaudeSessionWithExclusions parses a Claude Code JSONL -// session file and also returns session IDs intentionally excluded -// from the archive, such as content-free /usage probes. Sync uses -// those IDs during full resync so orphan preservation does not -// restore rows the current parser deliberately dropped. -func ParseClaudeSessionWithExclusions( +// claudeParseWithExclusions parses a Claude Code JSONL session file +// and also returns session IDs intentionally excluded from the +// archive, such as content-free /usage probes. Sync uses those IDs +// during full resync so orphan preservation does not restore rows the +// current parser deliberately dropped. This is the provider-owned +// parse body shared by the Claude provider (both its discovered-session +// Parse path and its ParseUploadedTranscript entry) and the Cowork +// parser (which reuses the Claude transcript format); it carries no +// legacy entrypoint naming so the provider can call it without shimming +// a Parse* free function. +func claudeParseWithExclusions( path, project, machine string, ) ([]ParseResult, []string, error) { info, err := os.Stat(path) @@ -366,15 +359,17 @@ func lastAssistantStopReason(messages []ParsedMessage) string { return "" } -// ParseClaudeSessionFrom parses only new lines from a Claude -// JSONL file starting at the given byte offset. Returns only -// the newly parsed messages (with ordinals starting at -// startOrdinal) and the latest timestamp. Fork detection is -// skipped — new entries are processed linearly. Used for -// incremental re-parsing of append-only session files. -// ErrDAGDetected is returned by ParseClaudeSessionFrom when -// appended lines contain uuid fields that require DAG-aware -// fork detection, which incremental parsing cannot handle. +// claudeParseSessionFrom parses only new lines from a Claude JSONL +// file starting at the given byte offset. Returns only the newly +// parsed messages (with ordinals starting at startOrdinal) and the +// latest timestamp. Fork detection is skipped — new entries are +// processed linearly. Used by the Claude provider for incremental +// re-parsing of append-only session files. ErrDAGDetected is returned +// when appended lines contain uuid fields that require DAG-aware fork +// detection, which incremental parsing cannot handle. This is the +// provider-owned incremental body; it carries no legacy entrypoint +// naming so the provider can call it without shimming a Parse* free +// function. var ErrDAGDetected = fmt.Errorf( "incremental parse: DAG uuid detected", ) @@ -387,11 +382,33 @@ var ErrClaudeIncrementalNeedsFullParse = fmt.Errorf( "incremental parse: appended Claude lines require full parse", ) +// ParseClaudeSessionWithExclusions and ParseClaudeSessionFrom are the exported +// seam used by the S3 sync path (internal/sync), which buffers an s3:// object +// to a temp file and parses it through the legacy per-agent processor. The +// Claude provider calls the unexported claudeParse* bodies directly; these thin +// wrappers exist only so the cross-package S3 consumer can reach the same logic +// without a provider file shimming a Parse* free function. They are removed once +// S3 support folds into the JSONL source sets. +func ParseClaudeSessionWithExclusions( + path, project, machine string, +) ([]ParseResult, []string, error) { + return claudeParseWithExclusions(path, project, machine) +} + func ParseClaudeSessionFrom( path string, offset int64, startOrdinal int, lastEntryUUID string, +) ([]ParsedMessage, time.Time, int64, error) { + return claudeParseSessionFrom(path, offset, startOrdinal, lastEntryUUID) +} + +func claudeParseSessionFrom( + path string, + offset int64, + startOrdinal int, + lastEntryUUID string, ) ([]ParsedMessage, time.Time, int64, error) { var ( entries []dagEntry @@ -726,7 +743,7 @@ func extractMessagesFrom( } if e.entryType == "user" { - if subtype := ClassifyClaudeSystemMessage(text); subtype != "" { + if subtype := classifyClaudeSystemMessage(text); subtype != "" { // Preserve Role=user so analytics that compute // turn-cycle/throughput on role alone (see // internal/db/analytics.go) don't count these as @@ -1666,7 +1683,7 @@ func extractMessages(entries []dagEntry) ( // stays "user" so role-keyed analytics continue to treat // these as inputs, not assistant replies. if e.entryType == "user" { - if subtype := ClassifyClaudeSystemMessage(text); subtype != "" { + if subtype := classifyClaudeSystemMessage(text); subtype != "" { messages = append(messages, ParsedMessage{ Ordinal: ordinal, Role: RoleUser, @@ -2079,14 +2096,14 @@ func extractCompactSummary(line string) string { return content.Str } -// ClassifyClaudeSystemMessage inspects a user-entry content string and +// classifyClaudeSystemMessage inspects a user-entry content string and // returns the matched system subtype (e.g. "continuation", "resume"), // or "" if the content is an ordinary user message. // // Non-caveat envelopes (stdout/stderr surrounds for // local command output) are treated as regular noise and return ""; // only the caveat variant is a semantic "resume" marker. -func ClassifyClaudeSystemMessage(content string) string { +func classifyClaudeSystemMessage(content string) string { trimmed := strings.TrimLeftFunc(content, func(r rune) bool { return r == '\uFEFF' || unicode.IsSpace(r) }) diff --git a/internal/parser/claude_parser_test.go b/internal/parser/claude_parser_test.go index 133ee017e..340dd3b32 100644 --- a/internal/parser/claude_parser_test.go +++ b/internal/parser/claude_parser_test.go @@ -22,7 +22,7 @@ func runClaudeParserTest(t *testing.T, fileName, content string) (ParsedSession, fileName = "test.jsonl" } path := createTestFile(t, fileName, content) - results, err := ParseClaudeSession(path, "my_app", "local") + results, err := parseClaudeSession(path, "my_app", "local") require.NoError(t, err) require.NotEmpty(t, results) return results[0].Session, results[0].Messages @@ -31,7 +31,7 @@ func runClaudeParserTest(t *testing.T, fileName, content string) (ParsedSession, func callParseClaudeSessionFrom( path string, offset int64, startOrdinal int, lastEntryUUID string, ) ([]ParsedMessage, time.Time, int64, error) { - fn := reflect.ValueOf(ParseClaudeSessionFrom) + fn := reflect.ValueOf(claudeParseSessionFrom) args := []reflect.Value{ reflect.ValueOf(path), reflect.ValueOf(offset), @@ -68,7 +68,7 @@ func TestParseClaudeSession_UsageProbe(t *testing.T) { parse := func(t *testing.T, content string) []ParseResult { t.Helper() path := createTestFile(t, "probe.jsonl", content) - results, err := ParseClaudeSession(path, "ClaudeProbe", "local") + results, err := parseClaudeSession(path, "ClaudeProbe", "local") require.NoError(t, err) return results } @@ -516,7 +516,7 @@ func TestParseClaudeSessionFrom_Incremental(t *testing.T) { path := createTestFile(t, "inc-claude.jsonl", initial) // Full parse to get baseline. - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err) require.NotEmpty(t, results) assert.Equal(t, 2, len(results[0].Messages)) @@ -978,7 +978,7 @@ func TestParseClaudeSession_ResolvesPersistedToolResultOutput( sessionPath := filepath.Join(dir, "project", "parent-session.jsonl") require.NoError(t, os.WriteFile(sessionPath, []byte(content), 0o644)) - results, err := ParseClaudeSession(sessionPath, "project", "local") + results, err := parseClaudeSession(sessionPath, "project", "local") require.NoError(t, err) require.Len(t, results, 1) require.Len(t, results[0].Messages, 3) @@ -1016,7 +1016,7 @@ func TestParseClaudeSession_PersistedToolResultDoesNotOverwriteSiblings( sessionPath := filepath.Join(dir, "project", "parent-session.jsonl") require.NoError(t, os.WriteFile(sessionPath, []byte(content), 0o644)) - results, err := ParseClaudeSession(sessionPath, "project", "local") + results, err := parseClaudeSession(sessionPath, "project", "local") require.NoError(t, err) require.Len(t, results, 1) require.Len(t, results[0].Messages, 3) @@ -1406,7 +1406,7 @@ func TestParseClaudeSession_ExtractsMessageIDAndRequestID(t *testing.T) { t.Fatalf("write fixture: %v", err) } - results, err := ParseClaudeSession(path, "proj", "m") + results, err := parseClaudeSession(path, "proj", "m") if err != nil { t.Fatalf("parse: %v", err) } @@ -1779,7 +1779,7 @@ func TestClassifyClaudeSystemMessage(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - got := ClassifyClaudeSystemMessage(c.content) + got := classifyClaudeSystemMessage(c.content) assert.Equal(t, c.expected, got) }) } diff --git a/internal/parser/claude_provider.go b/internal/parser/claude_provider.go new file mode 100644 index 000000000..915020989 --- /dev/null +++ b/internal/parser/claude_provider.go @@ -0,0 +1,542 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*claudeProvider)(nil) + +type claudeProviderFactory struct { + def AgentDef +} + +func newClaudeProviderFactory(def AgentDef) ProviderFactory { + return claudeProviderFactory{def: cloneAgentDef(def)} +} + +func (f claudeProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f claudeProviderFactory) Capabilities() Capabilities { + return claudeProviderCapabilities() +} + +func (f claudeProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &claudeProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: claudeProviderCapabilities(), + Config: cfg, + }, + sources: newClaudeSourceSet(cfg.Roots), + } +} + +type claudeProvider struct { + ProviderBase + sources claudeSourceSet +} + +func (p *claudeProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *claudeProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *claudeProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *claudeProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *claudeProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *claudeProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("claude source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + project := claudeProviderProject(ctx, req.Source.ProjectHint, path) + results, excludedIDs, err := claudeParseWithExclusions(path, project, machine) + if err != nil { + return ParseOutcome{}, err + } + if req.Fingerprint.Hash != "" { + for i := range results { + results[i].Session.File.Hash = req.Fingerprint.Hash + } + } + InferRelationshipTypes(results) + out := make([]ParseResultOutcome, 0, len(results)) + for _, result := range results { + out = append(out, ParseResultOutcome{ + Result: result, + DataVersion: DataVersionCurrent, + }) + } + return ParseOutcome{ + Results: out, + ExcludedSessionIDs: excludedIDs, + ResultSetComplete: true, + }, nil +} + +// ClaudeUploadParser is implemented by the Claude provider to parse a +// standalone, out-of-root Claude transcript file (such as an HTTP upload) +// under a caller-supplied project name. Uploads do not live under a +// configured root, so the normal discovery/source-resolution path does not +// apply; callers obtain this via NewProvider(AgentClaude, ...) and a type +// assertion. +type ClaudeUploadParser interface { + // ParseUploadedTranscript parses the transcript at path and files the + // resulting sessions under project. The project is authoritative: unlike + // the discovered-session Parse path, it is not overridden by any cwd + // recorded in the transcript, because an upload is filed under a + // user-chosen project rather than a workspace path on this machine. + ParseUploadedTranscript(path, project, machine string) ([]ParseResult, error) +} + +func (p *claudeProvider) ParseUploadedTranscript( + path, project, machine string, +) ([]ParseResult, error) { + machine = firstNonEmptyJSONLString(machine, p.Config.Machine) + results, _, err := claudeParseWithExclusions(path, project, machine) + if err != nil { + return nil, err + } + InferRelationshipTypes(results) + return results, nil +} + +func (p *claudeProvider) ParseIncremental( + ctx context.Context, + req IncrementalRequest, +) (IncrementalOutcome, IncrementalStatus, error) { + if err := ctx.Err(); err != nil { + return IncrementalOutcome{}, IncrementalUnsupported, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return IncrementalOutcome{}, IncrementalUnsupported, + fmt.Errorf("claude source path unavailable") + } + if req.Offset > 0 && req.Fingerprint.Size < req.Offset { + return IncrementalOutcome{ForceReplace: true}, + IncrementalNeedsFullParse, nil + } + if req.Fingerprint.Size == req.Offset { + return IncrementalOutcome{}, IncrementalNoNewData, nil + } + newMsgs, endedAt, consumed, err := claudeParseSessionFrom( + path, + req.Offset, + req.StartOrdinal, + req.LastEntryUUID, + ) + if err != nil { + if IsIncrementalFullParseFallback(err) || errorsIsClaudeDAG(err) { + return IncrementalOutcome{ForceReplace: IsIncrementalFullParseFallback(err)}, + IncrementalNeedsFullParse, nil + } + return IncrementalOutcome{}, IncrementalNeedsFullParse, err + } + if len(newMsgs) == 0 { + if consumed > 0 { + return IncrementalOutcome{ + SessionID: req.SessionID, + EndedAt: endedAt, + ConsumedBytes: consumed, + }, IncrementalApplied, nil + } + return IncrementalOutcome{}, IncrementalNoNewData, nil + } + totalOut, peakCtx, hasTotalOut, hasPeakCtx := claudeProviderTokenTotals(newMsgs) + return IncrementalOutcome{ + SessionID: req.SessionID, + Messages: newMsgs, + EndedAt: endedAt, + ConsumedBytes: consumed, + MessageCount: len(newMsgs), + UserMessageCount: claudeProviderUserMessageCount(newMsgs), + TotalOutputTokens: totalOut, + PeakContextTokens: peakCtx, + HasTotalOutputTokens: hasTotalOut, + HasPeakContextTokens: hasPeakCtx, + }, IncrementalApplied, nil +} + +type claudeSource struct { + Root string + Path string +} + +type claudeSourceSet struct { + roots []string +} + +func newClaudeSourceSet(roots []string) claudeSourceSet { + return claudeSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s claudeSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, file := range ClaudeProjectSessionFiles(root) { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s claudeSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"*.jsonl"}, + DebounceKey: string(AgentClaude) + ":projects:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s claudeSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + // The legacy classifier resolved Claude paths purely from their + // project/session shape and only treated a stat failure as + // "missing" when it was a definitive IsNotExist. A transient stat + // error (for example a parent directory the watcher cannot read this + // instant) must still classify so the change is not silently dropped. + // Fall back to path-shape classification whenever the path is not + // known to be absent. + allowMissing := jsonlMissingPathFallbackAllowed(req) || + claudeChangedPathPresentButUnstatable(req.Path) + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + if !s.hasRoot(root) { + return nil, nil + } + source, ok := s.sourceForChangedPath(root, req.Path, allowMissing) + if !ok { + return nil, nil + } + return []SourceRef{source}, nil + } + for _, root := range s.roots { + source, ok := s.sourceForChangedPath(root, req.Path, allowMissing) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s claudeSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceForPath(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := claudeFindSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func (s claudeSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("claude source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash, err := hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + inode, device := sourceFileIdentity(info) + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Inode: inode, + Device: device, + Hash: hash, + }, nil +} + +func (s claudeSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case claudeSource: + return src.Path, src.Path != "" + case *claudeSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + for _, root := range s.roots { + if ref, ok := s.sourceForPath(root, candidate); ok { + src := ref.Opaque.(claudeSource) + return src.Path, true + } + } + } + return "", false +} + +func (s claudeSourceSet) sourceForPath(root, path string) (SourceRef, bool) { + return s.sourceForChangedPath(root, path, false) +} + +func (s claudeSourceSet) sourceForChangedPath( + root, + path string, + allowMissing bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if allowMissing { + return s.sourceRefFromPath(root, path) + } + return s.sourceRef(root, path) +} + +func (s claudeSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !IsRegularFile(path) { + return SourceRef{}, false + } + return s.sourceRefFromPath(root, path) +} + +func (s claudeSourceSet) sourceRefFromPath(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + project, ok := claudeProjectHintFromPath(root, path) + if !ok { + return SourceRef{}, false + } + return SourceRef{ + Provider: AgentClaude, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: claudeSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s claudeSourceSet) hasRoot(root string) bool { + for _, configured := range s.roots { + if samePath(root, configured) { + return true + } + } + return false +} + +// claudeChangedPathPresentButUnstatable reports whether a changed path +// resolves to something on disk that cannot be stat'd right now for a +// reason other than not existing (for example a parent directory with no +// read/exec permission). In that case the legacy classifier still +// recognized the path by shape, so the provider must classify it too. +func claudeChangedPathPresentButUnstatable(path string) bool { + if path == "" { + return false + } + if IsRegularFile(path) { + return false + } + _, err := os.Lstat(path) + if err == nil { + // Present (lstat succeeded) but not a regular file via Stat, + // e.g. stat blocked by parent-directory permissions. + return true + } + return !os.IsNotExist(err) +} + +func claudeProjectHintFromPath(root, path string) (string, bool) { + rel, err := filepath.Rel(filepath.Clean(root), filepath.Clean(path)) + if err != nil || rel == "." || rel == "" { + return "", false + } + if strings.HasPrefix(rel, ".."+string(filepath.Separator)) || rel == ".." { + return "", false + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) == 2 && strings.HasSuffix(parts[1], ".jsonl") { + stem := strings.TrimSuffix(parts[1], ".jsonl") + if strings.HasPrefix(stem, "agent-") { + return "", false + } + return parts[0], true + } + if len(parts) >= 4 && parts[2] == "subagents" && + strings.HasSuffix(parts[len(parts)-1], ".jsonl") { + stem := strings.TrimSuffix(parts[len(parts)-1], ".jsonl") + if strings.HasPrefix(stem, "agent-") { + return parts[0], true + } + } + return "", false +} + +func claudeProviderProject(ctx context.Context, projectHint, path string) string { + project := GetProjectName(projectHint) + cwd, gitBranch := ExtractClaudeProjectHints(path) + if cwd != "" { + if p := ExtractProjectFromCwdWithBranchContext(ctx, cwd, gitBranch); p != "" { + project = p + } + } + return project +} + +func errorsIsClaudeDAG(err error) bool { + return err == ErrDAGDetected +} + +func claudeProviderUserMessageCount(msgs []ParsedMessage) int { + count := 0 + for _, msg := range msgs { + if msg.Role == RoleUser && !msg.IsSystem && len(msg.ToolResults) == 0 { + count++ + } + } + return count +} + +func claudeProviderTokenTotals( + msgs []ParsedMessage, +) (totalOut int, peakCtx int, hasTotalOut bool, hasPeakCtx bool) { + for _, msg := range msgs { + msgHasCtx, msgHasOut := msg.TokenPresence() + if msgHasOut { + totalOut += msg.OutputTokens + hasTotalOut = true + } + if msgHasCtx && (!hasPeakCtx || msg.ContextTokens > peakCtx) { + peakCtx = msg.ContextTokens + hasPeakCtx = true + } + } + return totalOut, peakCtx, hasTotalOut, hasPeakCtx +} + +func claudeProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilitySupported, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilitySupported, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Relationships: CapabilitySupported, + Subagents: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + TerminationStatus: CapabilitySupported, + MalformedLineCount: CapabilitySupported, + Model: CapabilitySupported, + StopReason: CapabilitySupported, + }, + } +} diff --git a/internal/parser/claude_provider_test.go b/internal/parser/claude_provider_test.go new file mode 100644 index 000000000..d15f97e78 --- /dev/null +++ b/internal/parser/claude_provider_test.go @@ -0,0 +1,350 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestClaudeProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentClaude) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestClaudeProviderSourceMethods(t *testing.T) { + root := t.TempDir() + projectDir := "-Users-dev-code-demo" + sessionID := "session-main" + sourcePath := filepath.Join(root, projectDir, sessionID+".jsonl") + subagentPath := filepath.Join( + root, + projectDir, + sessionID, + "subagents", + "workflows", + "wf-123", + "agent-worker.jsonl", + ) + writeSourceFile(t, sourcePath, claudeProviderFixture("main question")) + writeSourceFile(t, subagentPath, claudeProviderFixture("subagent question")) + writeSourceFile( + t, + filepath.Join(root, projectDir, sessionID, "subagents", "not-agent.jsonl"), + claudeProviderFixture("ignored"), + ) + writeSourceFile(t, filepath.Join(root, projectDir, "agent-root.jsonl"), claudeProviderFixture("ignored")) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{sourcePath, subagentPath}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + for _, source := range discovered { + assert.Equal(t, AgentClaude, source.Provider) + assert.Equal(t, projectDir, source.ProjectHint) + } + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "agent-worker", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, subagentPath, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, subagentPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: subagentPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, subagentPath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(subagentPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: subagentPath, EventKind: "rename", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, subagentPath, changed[0].DisplayPath) + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, projectDir, "agent-root.jsonl"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) +} + +func TestClaudeProviderDiscoversSymlinkedProjectDirectory(t *testing.T) { + root := t.TempDir() + targetRoot := t.TempDir() + projectDir := "-Users-dev-code-demo" + sessionID := "session-main" + targetProject := filepath.Join(targetRoot, projectDir) + sourceProject := filepath.Join(root, projectDir) + sourcePath := filepath.Join(sourceProject, sessionID+".jsonl") + subagentPath := filepath.Join( + sourceProject, + sessionID, + "subagents", + "jobs", + "job-1", + "agent-linked.jsonl", + ) + writeSourceFile( + t, + filepath.Join(targetProject, sessionID+".jsonl"), + claudeProviderFixture("from symlink"), + ) + writeSourceFile( + t, + filepath.Join(targetProject, sessionID, "subagents", "jobs", "job-1", "agent-linked.jsonl"), + claudeProviderFixture("from symlink subagent"), + ) + if err := os.Symlink(targetProject, sourceProject); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{sourcePath, subagentPath}, sourceDisplayPaths(discovered)) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "agent-linked", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, subagentPath, found.DisplayPath) +} + +func TestClaudeProviderParse(t *testing.T) { + root := t.TempDir() + projectDir := "-Users-dev-code-demo" + sessionID := "session-main" + sourcePath := filepath.Join(root, projectDir, sessionID+".jsonl") + writeSourceFile(t, sourcePath, claudeProviderFixture("parse question")) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, sessionID, result.Result.Session.ID) + assert.Equal(t, AgentClaude, result.Result.Session.Agent) + assert.Equal(t, "demo", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sourcePath, result.Result.Session.File.Path) + assert.Equal(t, "abc123", result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestClaudeProviderParseIncremental(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "inc.jsonl") + initial := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("hello world", tsEarly), + testjsonl.ClaudeAssistantJSON("hi there", tsEarlyS1), + ) + writeSourceFile(t, sourcePath, initial) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + appended := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("follow up", tsEarlyS5), + testjsonl.ClaudeAssistantJSON("got it", tsLate), + ) + f, err := os.OpenFile(sourcePath, os.O_APPEND|os.O_WRONLY, 0o644) + require.NoError(t, err) + _, err = f.WriteString(appended) + require.NoError(t, err) + require.NoError(t, f.Close()) + currentInfo, err := os.Stat(sourcePath) + require.NoError(t, err) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "inc", + }) + require.NoError(t, err) + require.True(t, ok) + + outcome, status, err := provider.ParseIncremental( + context.Background(), + IncrementalRequest{ + Source: source, + Fingerprint: SourceFingerprint{Key: sourcePath, Size: currentInfo.Size()}, + SessionID: "inc", + Offset: info.Size(), + StartOrdinal: 2, + }, + ) + require.NoError(t, err) + assert.Equal(t, IncrementalApplied, status) + assert.Equal(t, "inc", outcome.SessionID) + assert.Equal(t, int64(len(appended)), outcome.ConsumedBytes) + require.Len(t, outcome.Messages, 2) + assert.Equal(t, 2, outcome.Messages[0].Ordinal) + assert.Equal(t, RoleUser, outcome.Messages[0].Role) + assert.Contains(t, outcome.Messages[0].Content, "follow up") + assert.Equal(t, 3, outcome.Messages[1].Ordinal) + assert.Equal(t, RoleAssistant, outcome.Messages[1].Role) + assert.Contains(t, outcome.Messages[1].Content, "got it") +} + +func TestClaudeProviderParseIncrementalTruncatedNeedsFullParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "truncated.jsonl") + initial := claudeProviderFixture("hello world") + writeSourceFile(t, sourcePath, initial) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "truncated", + }) + require.NoError(t, err) + require.True(t, ok) + + outcome, status, err := provider.ParseIncremental( + context.Background(), + IncrementalRequest{ + Source: source, + Fingerprint: SourceFingerprint{Key: sourcePath, Size: int64(len(initial) / 2)}, + SessionID: "truncated", + Offset: int64(len(initial)), + }, + ) + require.NoError(t, err) + assert.Equal(t, IncrementalNeedsFullParse, status) + assert.True(t, outcome.ForceReplace) +} + +func TestClaudeProviderParseIncrementalEmptyTruncationNeedsFullParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "empty-truncated.jsonl") + initial := claudeProviderFixture("hello world") + writeSourceFile(t, sourcePath, initial) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "empty-truncated", + }) + require.NoError(t, err) + require.True(t, ok) + + outcome, status, err := provider.ParseIncremental( + context.Background(), + IncrementalRequest{ + Source: source, + Fingerprint: SourceFingerprint{Key: sourcePath, Size: 0}, + SessionID: "empty-truncated", + Offset: int64(len(initial)), + }, + ) + require.NoError(t, err) + assert.Equal(t, IncrementalNeedsFullParse, status) + assert.True(t, outcome.ForceReplace) +} + +func claudeProviderFixture(firstMessage string) string { + return testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON(firstMessage, tsEarly), + testjsonl.ClaudeAssistantJSON("Done.", tsEarlyS1), + ) +} diff --git a/internal/parser/claude_subagent_test.go b/internal/parser/claude_subagent_test.go index 1d9a9e9aa..b6449a8ff 100644 --- a/internal/parser/claude_subagent_test.go +++ b/internal/parser/claude_subagent_test.go @@ -15,7 +15,7 @@ func parseAndGetToolCalls(t *testing.T, filename string, lines []string) []Parse t.Helper() content := strings.Join(lines, "\n") path := createTestFile(t, filename, content) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err, "ParseClaudeSession") require.NotEmpty(t, results, "no results") diff --git a/internal/parser/claude_test.go b/internal/parser/claude_test.go index de45e1ca0..1527ecf1e 100644 --- a/internal/parser/claude_test.go +++ b/internal/parser/claude_test.go @@ -238,7 +238,7 @@ func TestParseClaudeSession_Metadata(t *testing.T) { ) require.NoError(t, err) - results, err := ParseClaudeSession( + results, err := parseClaudeSession( path, "proj", "local", ) require.NoError(t, err) @@ -309,7 +309,7 @@ func TestParseClaudeSession_MetadataOnForkSessions( err := os.WriteFile(path, []byte(content.String()), 0o644) require.NoError(t, err) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err) require.Len(t, results, 2, "expected main + fork result") @@ -357,7 +357,7 @@ func TestParseClaudeSession_LinearMetadata(t *testing.T) { err := os.WriteFile(path, []byte(content), 0o644) require.NoError(t, err) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err) require.Len(t, results, 1) @@ -470,7 +470,7 @@ func TestClaudeRenameSetsDisplayName(t *testing.T) { err := os.WriteFile(path, []byte(sb.String()), 0o644) require.NoError(t, err) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err) require.Len(t, results, 1) assert.Equal(t, tc.wantDisplay, results[0].Session.SessionName) diff --git a/internal/parser/cowork.go b/internal/parser/cowork.go index 67e42cc50..4a32299d3 100644 --- a/internal/parser/cowork.go +++ b/internal/parser/cowork.go @@ -382,7 +382,7 @@ func extractCoworkAITitle(transcriptPath string) string { // the cowork namespace: agent type, "cowork:"-prefixed IDs, the session // title, and metadata-derived timestamps for transcripts that carry none. // Returns parsed results plus session IDs the parser intentionally -// excluded (prefixed), matching ParseClaudeSessionWithExclusions. +// excluded (prefixed), matching the Claude transcript parser. func ParseCoworkSession( transcriptPath, machine string, ) ([]ParseResult, []string, error) { @@ -390,7 +390,7 @@ func ParseCoworkSession( meta := readCoworkMeta(metaPath) project := coworkProjectName(meta) - results, excluded, err := ParseClaudeSessionWithExclusions( + results, excluded, err := claudeParseWithExclusions( transcriptPath, project, machine, ) if err != nil { diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index df4c88b48..09cdc1495 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -436,9 +436,13 @@ func ResolveCodexShallowWatchRoots(root string) []string { return []string{parent} } -// DiscoverClaudeProjects finds all project directories under the -// Claude projects dir and returns their JSONL session files. -func DiscoverClaudeProjects(projectsDir string) []DiscoveredFile { +// ClaudeProjectSessionFiles finds all project directories under the +// Claude projects dir and returns their JSONL session files. It is the +// provider-owned enumeration body shared by the Claude provider source +// set (full-sync discovery) and the engine's duplicate-candidate +// expansion. The name carries no legacy entrypoint verb so the +// provider can call it without shimming a Discover* free function. +func ClaudeProjectSessionFiles(projectsDir string) []DiscoveredFile { if strings.HasPrefix(projectsDir, "s3://") { return discoverClaudeS3(projectsDir) } @@ -569,9 +573,12 @@ func DiscoverCodexSessions(sessionsDir string) []DiscoveredFile { return files } -// FindClaudeSourceFile finds the original JSONL file for a Claude -// session ID by searching all project directories. -func FindClaudeSourceFile( +// claudeFindSourceFile finds the original JSONL file for a Claude +// session ID by searching all project directories. It is the +// provider-owned lookup body used by the Claude provider source set's +// FindSource. The name carries no legacy entrypoint verb so the +// provider can call it without shimming a Find* free function. +func claudeFindSourceFile( projectsDir, sessionID string, ) string { if !IsValidSessionID(sessionID) { @@ -600,7 +607,7 @@ func FindClaudeSourceFile( // //subagents/**/agent-.jsonl if strings.HasPrefix(sessionID, "agent-") { for _, entry := range entries { - if !entry.IsDir() { + if !isDirOrSymlink(entry, projectsDir) { continue } projDir := filepath.Join( diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 9acad18e2..b44bb3c4b 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -141,7 +141,7 @@ func TestDiscoverClaudeProjects(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverClaudeProjects(dir) + files := ClaudeProjectSessionFiles(dir) assertDiscoveredFiles(t, files, tt.wantFiles, AgentClaude) @@ -156,7 +156,7 @@ func TestDiscoverClaudeProjects(t *testing.T) { t.Run("Nonexistent", func(t *testing.T) { dir := filepath.Join(t.TempDir(), "does-not-exist") - files := DiscoverClaudeProjects(dir) + files := ClaudeProjectSessionFiles(dir) assert.Nil(t, files, "expected nil") }) } @@ -307,7 +307,7 @@ func TestFindClaudeSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindClaudeSourceFile(dir, tt.targetID) + got := claudeFindSourceFile(dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -321,8 +321,8 @@ func TestFindClaudeSourceFile(t *testing.T) { dir := t.TempDir() tests := []string{"", "../etc/passwd", "a/b", "a b"} for _, id := range tests { - got := FindClaudeSourceFile(dir, id) - assert.Emptyf(t, got, "FindClaudeSourceFile(%q)", id) + got := claudeFindSourceFile(dir, id) + assert.Emptyf(t, got, "claudeFindSourceFile(%q)", id) } }) } @@ -1048,7 +1048,7 @@ func TestFindClaudeSourceFile_Symlink(t *testing.T) { t.Skipf("symlink not supported: %v", err) } - got := FindClaudeSourceFile(searchDir, "sess-abc") + got := claudeFindSourceFile(searchDir, "sess-abc") require.NotEmpty(t, got, "expected to find session via symlink") assert.Equal(t, linkDir, filepath.Dir(got), "expected path through symlink") diff --git a/internal/parser/fork_test.go b/internal/parser/fork_test.go index 68e7f618e..6d2724730 100644 --- a/internal/parser/fork_test.go +++ b/internal/parser/fork_test.go @@ -14,7 +14,7 @@ import ( func parseTestContent(t *testing.T, name, content string, expectedLen int) []ParseResult { t.Helper() path := createTestFile(t, name, content) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err, "ParseClaudeSession") require.Len(t, results, expectedLen) return results diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index 653532c2f..adff7c24a 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -790,7 +790,7 @@ func TestClaudeSessionTimestampSemantics(t *testing.T) { buf := captureLog(t) path := createTestFile(t, "ts-long-invalid.jsonl", content) - _, err := ParseClaudeSession( + _, err := parseClaudeSession( path, "proj", "local", ) require.NoError(t, err, "ParseClaudeSession") @@ -1303,7 +1303,7 @@ func TestClaudeUserMessageCount(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { path := createTestFile(t, "test.jsonl", tt.content) - results, err := ParseClaudeSession( + results, err := parseClaudeSession( path, "test-proj", "local", ) require.NoError(t, err, "ParseClaudeSession") @@ -1324,7 +1324,7 @@ func TestParseClaudeToolResults(t *testing.T) { content := strings.Join(lines, "\n") + "\n" path := createTestFile(t, "tool-results.jsonl", content) - results, err := ParseClaudeSession(path, "test-project", "local") + results, err := parseClaudeSession(path, "test-project", "local") require.NoError(t, err, "ParseClaudeSession") require.NotEmpty(t, results, "ParseClaudeSession returned no results") msgs := results[0].Messages diff --git a/internal/parser/provider.go b/internal/parser/provider.go index d1eeed0f6..9307f7b17 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -279,6 +279,11 @@ type IncrementalRequest struct { Offset int64 StartOrdinal int Machine string + // LastEntryUUID is the UUID of the last entry stored for this + // session, used by DAG-aware parsers (Claude) to detect when an + // appended tail forks away from the stored tip and must trigger a + // full reparse instead of a naive append. + LastEntryUUID string } // IncrementalOutcome is the append-only parse output. @@ -349,6 +354,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { switch def.Type { case AgentAmp: return newAmpProviderFactory(def) + case AgentClaude: + return newClaudeProviderFactory(def) case AgentCommandCode: return newCommandCodeProviderFactory(def) case AgentCortex: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 0a563f57d..d5673312d 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -17,7 +17,7 @@ const ( ) var providerMigrationModes = map[AgentType]ProviderMigrationMode{ - AgentClaude: ProviderMigrationLegacyOnly, + AgentClaude: ProviderMigrationProviderAuthoritative, AgentCowork: ProviderMigrationLegacyOnly, AgentCodex: ProviderMigrationLegacyOnly, AgentCopilot: ProviderMigrationLegacyOnly, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index e2698ba99..68432d8fd 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -49,7 +49,6 @@ var providerNeutralEntrypoints = map[string]bool{ var pendingShimProviderFiles = map[string]bool{ "antigravity_cli_provider.go": true, "antigravity_provider.go": true, - "claude_provider.go": true, "codex_provider.go": true, "copilot_provider.go": true, "cowork_provider.go": true, @@ -57,8 +56,6 @@ var pendingShimProviderFiles = map[string]bool{ "gemini_provider.go": true, "kiro_ide_provider.go": true, "kiro_provider.go": true, - "openhands_provider.go": true, - "opencode_provider.go": true, "positron_provider.go": true, "shelley_provider.go": true, "visualstudio_copilot_provider.go": true, diff --git a/internal/parser/test_helpers_test.go b/internal/parser/test_helpers_test.go index 587114297..9c2778d66 100644 --- a/internal/parser/test_helpers_test.go +++ b/internal/parser/test_helpers_test.go @@ -145,10 +145,28 @@ func parseClaudeTestFile( ) (ParsedSession, []ParsedMessage) { t.Helper() path := createTestFile(t, name, content) - results, err := ParseClaudeSession( + results, err := parseClaudeSession( path, project, "local", ) - require.NoError(t, err, "ParseClaudeSession") - require.NotEmpty(t, results, "ParseClaudeSession returned no results") + require.NoError(t, err, "parseClaudeSession") + require.NotEmpty(t, results, "parseClaudeSession returned no results") return results[0].Session, results[0].Messages } + +// parseClaudeSession parses a standalone Claude transcript through the Claude +// provider's upload entry point, honoring the explicit project. It is the +// test harness replacement for the former ParseClaudeSession free function, +// exercising the same provider-owned parse body that production uploads use. +func parseClaudeSession( + path, project, machine string, +) ([]ParseResult, error) { + provider, ok := NewProvider(AgentClaude, ProviderConfig{Machine: machine}) + if !ok { + return nil, fmt.Errorf("claude provider unavailable") + } + uploader, ok := provider.(ClaudeUploadParser) + if !ok { + return nil, fmt.Errorf("claude provider does not support upload parsing") + } + return uploader.ParseUploadedTranscript(path, project, machine) +} diff --git a/internal/parser/types.go b/internal/parser/types.go index d7af74fcb..825bad354 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -106,8 +106,6 @@ var Registry = []AgentDef{ DefaultDirs: []string{".claude/projects"}, IDPrefix: "", FileBased: true, - DiscoverFunc: DiscoverClaudeProjects, - FindSourceFunc: FindClaudeSourceFile, }, { Type: AgentCowork, diff --git a/internal/server/huma_routes_sessions.go b/internal/server/huma_routes_sessions.go index 313087079..b1cfcae92 100644 --- a/internal/server/huma_routes_sessions.go +++ b/internal/server/huma_routes_sessions.go @@ -884,7 +884,21 @@ func (s *Server) humaUploadSession( return nil, apiError(http.StatusInternalServerError, "failed to save upload") } defer func() { _ = os.RemoveAll(upload.tempDir) }() - results, err := parser.ParseClaudeSession(upload.tempPath, project, machine) + provider, ok := parser.NewProvider( + parser.AgentClaude, parser.ProviderConfig{Machine: machine}, + ) + if !ok { + return nil, apiError(http.StatusInternalServerError, + "claude provider unavailable") + } + uploader, ok := provider.(parser.ClaudeUploadParser) + if !ok { + return nil, apiError(http.StatusInternalServerError, + "claude provider does not support uploads") + } + results, err := uploader.ParseUploadedTranscript( + upload.tempPath, project, machine, + ) if err != nil { return nil, apiError(http.StatusBadRequest, fmt.Sprintf("parsing session: %v", err)) @@ -892,7 +906,6 @@ func (s *Server) humaUploadSession( if len(results) == 0 { return nil, apiError(http.StatusBadRequest, "no sessions parsed from upload") } - parser.InferRelationshipTypes(results) for i := range results { results[i].Session.File.Path = upload.finalPath } diff --git a/internal/ssh/resolve.go b/internal/ssh/resolve.go index 9d072e7af..7b168120e 100644 --- a/internal/ssh/resolve.go +++ b/internal/ssh/resolve.go @@ -67,14 +67,15 @@ func buildAiderResolveSnippet(envVar string) string { // "agentType:path\n" per agent target, plus "@file:path\n" lines for sibling // metadata files such as Codex's session_index.jsonl. // -// Only includes agents where FileBased is true and DiscoverFunc -// is non-nil. For each agent with an EnvVar, the script checks -// the env var first and falls back to the default dir. Dirs (and +// Only includes file-based agents that have on-disk sources to +// resolve: either a legacy DiscoverFunc or a provider facade that has +// left legacy-only mode. For each agent with an EnvVar, the script +// checks the env var first and falls back to the default dir. Dirs (and // files) that don't exist on the remote are skipped. func buildResolveScript() string { var b strings.Builder for _, def := range parser.Registry { - if !def.FileBased || def.DiscoverFunc == nil { + if !resolveAgentHasOnDiskSource(def) { continue } if def.Type == parser.AgentAider { @@ -153,6 +154,28 @@ func remoteDefaultRootTail(rel string) string { return "" } +// resolveAgentHasOnDiskSource reports whether a file-based agent has +// on-disk sources the resolve script should probe: either a legacy +// DiscoverFunc or a provider facade that has left legacy-only mode. +// Provider-migrated agents drop their DiscoverFunc but still have a +// configurable directory, so they must stay in the remote resolve set. +func resolveAgentHasOnDiskSource(def parser.AgentDef) bool { + if !def.FileBased { + return false + } + if def.DiscoverFunc != nil { + return true + } + switch parser.ProviderMigrationModes()[def.Type] { + case parser.ProviderMigrationShadowCompare, + parser.ProviderMigrationProviderAuthoritative: + _, ok := parser.ProviderFactoryByType(def.Type) + return ok + default: + return false + } +} + // parseResolvedDirs parses script output into a map of agent type to transfer // target paths plus a deduplicated list of extra files (records tagged with // resolveFilePrefix). Generated resolver output is NUL-delimited so remote diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 2825cb59e..de251b854 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -859,7 +859,7 @@ func (e *Engine) expandClaudeDuplicateCandidates( out := files for _, claudeDir := range e.agentDirs[parser.AgentClaude] { - for _, candidate := range parser.DiscoverClaudeProjects(claudeDir) { + for _, candidate := range parser.ClaudeProjectSessionFiles(claudeDir) { sessionID := claudeSessionIDFromPath(candidate.Path) if _, ok := sessionIDs[sessionID]; !ok { continue @@ -956,49 +956,12 @@ func (e *Engine) classifyOnePath( return df, true } - // Claude: //.jsonl - // or: ///subagents/**/agent-.jsonl - for _, claudeDir := range e.agentDirs[parser.AgentClaude] { - if claudeDir == "" { - continue - } - if rel, ok := isUnder(claudeDir, path); ok { - if !strings.HasSuffix(path, ".jsonl") { - continue - } - parts := strings.Split(rel, sep) - - // Standard session: project/session.jsonl - if len(parts) == 2 { - stem := strings.TrimSuffix( - filepath.Base(path), ".jsonl", - ) - if strings.HasPrefix(stem, "agent-") { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentClaude, - }, true - } - - // Subagent: project/session/subagents/**/agent-*.jsonl - if len(parts) >= 4 && parts[2] == "subagents" { - stem := strings.TrimSuffix( - parts[len(parts)-1], ".jsonl", - ) - if !strings.HasPrefix(stem, "agent-") { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentClaude, - }, true - } - } - } + // Claude change-path classification is provider-authoritative; the + // Claude provider's SourcesForChangedPath reproduces the + // //.jsonl and + // ///subagents/**/agent-.jsonl + // shapes, so the legacy block was removed when Claude was folded + // onto its provider. // Cowork: ///local_/.claude/ // projects//.jsonl (transcript), or the sibling @@ -3963,11 +3926,10 @@ func (e *Engine) processFile( var res processResult switch file.Agent { case parser.AgentClaude: - if strings.HasPrefix(file.Path, "s3://") { - res = e.processS3Session(ctx, file, info) - } else { - res = e.processClaude(ctx, file, info) - } + // Non-S3 Claude is provider-authoritative and handled earlier by + // processProviderFile; only s3:// Claude sources fall through to the + // legacy dispatch, via the S3 sync path. + res = e.processS3Session(ctx, file, info) case parser.AgentCowork: res = e.processCowork(file, info) case parser.AgentCodex: @@ -4055,6 +4017,12 @@ func (e *Engine) processProviderFile( if mode != parser.ProviderMigrationProviderAuthoritative { return processResult{}, false } + // S3 sources are not provider-owned: the provider source sets read local + // files, so s3:// paths use the legacy S3 sync path (processS3Session), + // which handles object fetch, fingerprinting, and per-agent skip logic. + if strings.HasPrefix(file.Path, "s3://") { + return processResult{}, false + } if file.ProviderSource != nil && !file.ProviderProcess { return processResult{}, false } @@ -4084,6 +4052,30 @@ func (e *Engine) processProviderFile( }, true } + // SyncSingleSession resolves a single session by ID and carries the + // caller-preferred project (typically the DB-preserved value, so a + // user override is not reverted) on file.Project without an explicit + // ProviderSource. Provider FindSource re-derives ProjectHint from the + // path, so honor the caller's project as the hint in that case. Full + // discovery and changed-path classification always supply + // file.ProviderSource, whose ProjectHint stays authoritative. + if file.ProviderSource == nil && file.Project != "" { + source.ProjectHint = file.Project + } + + // DB-freshness skip for single-session JSONL providers (Claude): + // when the stored session's size, mtime, and data version already + // match the source and its project does not need reparse, skip the + // parse entirely. This reproduces the legacy process arm's + // shouldSkipFile gate so an unchanged session is not re-parsed on + // every full sync. + if mtime, fresh := e.providerSingleSessionFresh(ctx, provider, source, file); fresh { + return processResult{ + skip: true, + mtime: mtime, + }, true + } + fingerprint, err := provider.Fingerprint(ctx, source) if err != nil { return processResult{err: err}, true @@ -4104,6 +4096,21 @@ func (e *Engine) processProviderFile( } } + // Append-only incremental parse for already-synced JSONL files. + // When the incremental path declines but signals forceReplace, + // carry the flag onto the full parse so the write path replaces + // stored messages instead of appending on top of stale rows. + incRes, incOK := e.tryProviderIncrementalAppend( + ctx, provider, source, file, fingerprint, + ) + if incOK { + incRes.mtime = fingerprint.MTimeNS + incRes.cacheSkip = cacheSkip + incRes.cacheKey = cacheKey + return incRes, true + } + incForceReplace := incRes.forceReplace + outcome, err := provider.Parse(ctx, parser.ParseRequest{ Source: source, Fingerprint: fingerprint, @@ -4151,9 +4158,15 @@ func (e *Engine) processProviderFile( cacheSkip: cacheSkip, cacheKey: cacheKey, noCacheSkip: !cleanCache, - forceReplace: outcome.ForceReplace, + forceReplace: outcome.ForceReplace || incForceReplace, suppressPresenceSweep: !outcome.ResultSetComplete, } + // Incremental-append providers (Claude) need the stored file + // identity so a later sync can detect an atomic file replacement + // (new inode/device) and fall back to a full parse instead of + // appending on top of stale state. Match the legacy process arm, + // which stamped inode/device from the source file stat. + e.stampProviderFileIdentity(provider, source, res.results) for _, result := range outcome.Results { if result.DataVersion == parser.DataVersionNeedsRetry { if res.retrySessionIDs == nil { @@ -4659,13 +4672,10 @@ func (f fakeSnapshotInfo) ModTime() time.Time { func (f fakeSnapshotInfo) IsDir() bool { return false } func (f fakeSnapshotInfo) Sys() any { return nil } -func (e *Engine) processClaude( - ctx context.Context, - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - return e.processClaudeWithStoredSkip(ctx, file, info, true) -} - +// processClaudeWithStoredSkip parses a Claude Code JSONL session from a local +// file. Non-S3 Claude sources are provider-authoritative and never reach here; +// this remains the parse path for s3:// Claude sources, which the S3 sync path +// fetches to a local file and feeds in with allowStoredSkip=false. func (e *Engine) processClaudeWithStoredSkip( ctx context.Context, file parser.DiscoveredFile, info os.FileInfo, @@ -4788,6 +4798,173 @@ func (e *Engine) processCowork( } } +// providerSingleSessionFresh reports whether a single-session JSONL +// provider's source (Claude) maps to a stored session that is already +// up to date: the source size and mtime match what is stored, the row +// is at the current parser data version, and its project does not need +// reparse. It reproduces the legacy Claude process arm's shouldSkipFile +// gate so an unchanged session is skipped instead of re-parsed every +// full sync. Providers without incremental append, multi-session +// sources, or sources that are not a single physical file are never +// considered fresh here and always fall through to the full parse. +func (e *Engine) providerSingleSessionFresh( + ctx context.Context, + provider parser.Provider, + source parser.SourceRef, + file parser.DiscoveredFile, +) (int64, bool) { + // Match the legacy shouldSkipFile gate, which keyed off the + // engine-wide forceParse (parse-diff) flag only. A per-file + // ForceParse (set by SyncSingleSession to bypass the error skip + // cache) must not defeat the DB-freshness skip: an unchanged session + // is still skipped so a single-session resync does not, for example, + // reapply a worktree project mapping to a file that has not changed. + if e.forceParse { + return 0, false + } + // Claude is the single-physical-file provider that takes the + // append-only incremental path. Its source stem is the session ID, + // so DB freshness can be checked by that ID even though a DAG fork + // can later split the file into several sessions. + if provider.Capabilities().Source.IncrementalAppend != + parser.CapabilitySupported { + return 0, false + } + path := providerDiscoveredPath(source) + if path == "" { + return 0, false + } + sessionID := claudeSessionIDFromPath(path) + if sessionID == "" { + return 0, false + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + info, err := os.Stat(lookupPath) + if err != nil { + info, err = os.Stat(path) + if err != nil { + return 0, false + } + } + if !e.shouldSkipFile(sessionID, info) { + return 0, false + } + sess, _ := e.db.GetSession(ctx, e.idPrefix+sessionID) + return info.ModTime().UnixNano(), sess != nil && + sess.Project != "" && + !parser.NeedsProjectReparse(sess.Project) +} + +// stampProviderFileIdentity copies the source file's inode and device onto +// every parsed result for an incremental-append provider (Claude). The +// legacy process arm stamped this identity from the source stat so the +// incremental path can later detect an atomic file replacement and fall +// back to a full parse. Providers whose source is not a single physical +// file, or that do not support incremental append, are left untouched. +func (e *Engine) stampProviderFileIdentity( + provider parser.Provider, + source parser.SourceRef, + results []parser.ParseResult, +) { + if provider.Capabilities().Source.IncrementalAppend != + parser.CapabilitySupported { + return + } + path := providerDiscoveredPath(source) + if path == "" { + return + } + info, err := os.Stat(path) + if err != nil { + return + } + inode, device := getFileIdentity(info) + for i := range results { + results[i].Session.File.Inode = inode + results[i].Session.File.Device = device + } +} + +// tryProviderIncrementalAppend reproduces the legacy incremental-append +// sync path for a provider-authoritative agent that supports append-only +// incremental parsing (Claude). The provider owns the byte-offset parse +// via ParseIncremental, but the engine still owns the DB-aware +// bookkeeping (session lookup, data-version and identity guards, ordinal +// resume, cross-sync split detection, and cumulative counters), so this +// drives the shared tryIncrementalJSONL with an adapter that calls the +// provider. Returns (result, true) when the incremental path produced a +// terminal result, or (result, false) to fall through to the full +// provider parse (carrying any forceReplace signal). +func (e *Engine) tryProviderIncrementalAppend( + ctx context.Context, + provider parser.Provider, + source parser.SourceRef, + file parser.DiscoveredFile, + fingerprint parser.SourceFingerprint, +) (processResult, bool) { + // Match the legacy tryIncrementalJSONL gate, which suppressed append + // deltas only under the engine-wide forceParse (parse-diff) flag. A + // per-file ForceParse does not disable incremental append. + if e.forceParse { + return processResult{}, false + } + if provider.Capabilities().Source.IncrementalAppend != + parser.CapabilitySupported { + return processResult{}, false + } + path := providerDiscoveredPath(source) + if path == "" { + return processResult{}, false + } + info, err := os.Stat(path) + if err != nil { + return processResult{}, false + } + + parseFn := func( + _ string, offset int64, startOrdinal int, lastEntryUUID string, + ) ([]parser.ParsedMessage, time.Time, int64, error) { + outcome, status, perr := provider.ParseIncremental( + ctx, + parser.IncrementalRequest{ + Source: source, + Fingerprint: fingerprint, + SessionID: e.idPrefix + claudeSessionIDFromPath(path), + Offset: offset, + StartOrdinal: startOrdinal, + Machine: e.machine, + LastEntryUUID: lastEntryUUID, + }, + ) + if perr != nil { + return nil, time.Time{}, 0, perr + } + switch status { + case parser.IncrementalNeedsFullParse: + if outcome.ForceReplace { + // Signal the shared helper to fall back to a + // full parse that replaces stored messages. + return nil, time.Time{}, 0, + parser.ErrClaudeIncrementalNeedsFullParse + } + // A plain full-parse fallback (e.g. DAG detected): + // return a non-fallback error so the helper runs a + // normal full parse without forceReplace. + return nil, time.Time{}, 0, parser.ErrDAGDetected + case parser.IncrementalNoNewData: + return nil, time.Time{}, 0, nil + default: + return outcome.Messages, outcome.EndedAt, + outcome.ConsumedBytes, nil + } + } + + return e.tryIncrementalJSONL(file, info, file.Agent, parseFn) +} + // incrementalParseFunc reads new JSONL lines from a file // starting at the given byte offset with the given starting // ordinal. Returns parsed messages, the latest timestamp @@ -4846,9 +5023,6 @@ func (e *Engine) tryIncrementalJSONL( } currentSize := info.Size() - if currentSize <= inc.FileSize { - return processResult{}, false - } // A prior sync that stored no message rows has no safe append // boundary. Rewritten files can grow in place and keep the same @@ -4875,9 +5049,23 @@ func (e *Engine) tryIncrementalJSONL( inc.FileInode, curInode, inc.FileDevice, curDevice, ) - return processResult{}, false + return processResult{forceReplace: true}, false } } + if currentSize < inc.FileSize { + log.Printf( + "incremental %s %s: file truncated from %d to %d, full parse", + agent, file.Path, inc.FileSize, currentSize, + ) + return processResult{forceReplace: true}, false + } + if currentSize == inc.FileSize { + log.Printf( + "incremental %s %s: file size unchanged at %d but changed since last sync, full parse", + agent, file.Path, currentSize, + ) + return processResult{forceReplace: true}, false + } // Persist the same effective file_mtime a full parse would store. For // Codex that folds in session_index.jsonl (parser.CodexEffectiveMtime), diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 132b61347..3bf7bfb49 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -7318,6 +7318,106 @@ func TestIncrementalSync_ClaudeFileReplaced(t *testing.T) { assert.Equal(t, newInfo.Size(), *full.FileSize, "file_size = %v, want %d (full-parse size)", *full.FileSize, newInfo.Size()) } +func TestIncrementalSync_ClaudeTruncatedFileReplacesStoredMessages(t *testing.T) { + env := setupTestEnv(t) + + original := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("first", tsZero), + testjsonl.ClaudeAssistantJSON("stale assistant", tsZeroS5), + ) + path := env.writeClaudeSession( + t, "proj", "truncated-replace.jsonl", original, + ) + env.engine.SyncAll(context.Background(), nil) + assertSessionMessageCount(t, env.db, "truncated-replace", 2) + + replacement := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("replacement", tsZero), + ) + require.Less(t, len(replacement), len(original), "replacement must truncate file") + require.NoError(t, os.WriteFile(path, []byte(replacement), 0o644), "write truncated replacement") + + env.engine.SyncPaths([]string{path}) + + assertSessionMessageCount(t, env.db, "truncated-replace", 1) + msgs := fetchMessages(t, env.db, "truncated-replace") + require.Len(t, msgs, 1) + assert.Equal(t, "replacement", msgs[0].Content) +} + +func TestIncrementalSync_ClaudeSameSizeFileReplaceUsesFullParse(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("identity tracking is a no-op on Windows") + } + env := setupTestEnv(t) + + original := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("first", tsZero), + testjsonl.ClaudeAssistantJSON("alpha", tsZeroS5), + ) + path := env.writeClaudeSession( + t, "proj", "same-size-replace.jsonl", original, + ) + env.engine.SyncAll(context.Background(), nil) + + replacement := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("third", tsZero), + testjsonl.ClaudeAssistantJSON("bravo", tsZeroS5), + ) + require.Len(t, replacement, len(original), "replacement fixture must keep same byte size") + tmp := path + ".tmp" + require.NoError(t, os.WriteFile(tmp, []byte(replacement), 0o644), "write replacement") + require.NoError(t, os.Rename(tmp, path), "rename replacement") + + env.engine.SyncPaths([]string{path}) + + msgs := fetchMessages(t, env.db, "same-size-replace") + require.Len(t, msgs, 2) + assert.Equal(t, "third", msgs[0].Content) + assert.Equal(t, "bravo", msgs[1].Content) +} + +func TestIncrementalSync_ClaudeSameSizeInPlaceRewriteClearsStaleRows(t *testing.T) { + env := setupTestEnv(t) + + original := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("first", tsZero), + testjsonl.ClaudeAssistantJSON("stale assistant", tsZeroS5), + ) + path := env.writeClaudeSession( + t, "proj", "same-size-in-place.jsonl", original, + ) + env.engine.SyncAll(context.Background(), nil) + assertSessionMessageCount(t, env.db, "same-size-in-place", 2) + + replacement := "" + for padding := range 4096 { + candidate := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON( + "replacement"+strings.Repeat("x", padding), + tsZero, + ), + ) + if len(candidate) == len(original) { + replacement = candidate + break + } + } + require.NotEmpty(t, replacement, "failed to build same-size replacement fixture") + require.Len(t, replacement, len(original), "replacement fixture must keep same byte size") + + require.NoError(t, os.WriteFile(path, []byte(replacement), 0o644), "write in-place replacement") + now := time.Now().Add(time.Second) + require.NoError(t, os.Chtimes(path, now, now), "bump replacement mtime") + + env.engine.SyncPaths([]string{path}) + + assertSessionMessageCount(t, env.db, "same-size-in-place", 1) + msgs := fetchMessages(t, env.db, "same-size-in-place") + require.Len(t, msgs, 1) + assert.Contains(t, msgs[0].Content, "replacement") +} + // TestIncrementalSync_ClaudeMidStreamSplitFallsBackToFullParse covers // the cross-sync split case: the first sync stores a partial assistant // snapshot (one of several streaming snapshots) and the next sync diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index f6824249b..c50a01440 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -2770,10 +2770,14 @@ func TestEngine_ClassifyOnePathClaudeStatPermissionErrorStillClassifies( _ = os.Chmod(projectDir, 0o755) }() - got, ok := engine.classifyOnePath(path, nil) - require.True(t, ok, "expected path to classify despite stat permission error") - assert.Equal(t, path, got.Path) - assert.Equal(t, parser.AgentClaude, got.Agent) + // Claude is provider-authoritative, so classification flows through + // the provider's changed-path handling rather than the legacy + // classifyOnePath Claude block. A transient stat-permission error + // must still classify the path by shape so the change is not dropped. + files := engine.classifyPaths([]string{path}) + require.Len(t, files, 1, "expected path to classify despite stat permission error") + assert.Equal(t, path, files[0].Path) + assert.Equal(t, parser.AgentClaude, files[0].Agent) } func TestEngine_ClassifyPathsDedupesOpenCodeChildPaths(t *testing.T) { diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index 2f9b81862..3d4466c37 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -16,6 +16,10 @@ import ( "go.kenn.io/agentsview/internal/testjsonl" ) +// The generic shadow-compare/legacy-coexistence mechanism is exercised through +// the Cowork agent, which remains legacy-only and reuses the Claude transcript +// format. Claude itself is now provider-authoritative, so it no longer has a +// legacy processFile arm to observe in shadow. func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-caller.jsonl") @@ -36,8 +40,8 @@ func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { 0o644, )) - legacyResults, legacyExcluded, err := parser.ParseClaudeSessionWithExclusions( - sourcePath, "demo", "devbox", + legacyResults, legacyExcluded, err := parser.ParseCoworkSession( + sourcePath, "devbox", ) require.NoError(t, err) require.Len(t, legacyResults, 1) @@ -51,7 +55,7 @@ func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { providerResult.Session.File.Hash = hash source := parser.SourceRef{ - Provider: parser.AgentClaude, + Provider: parser.AgentCowork, Key: sourcePath, DisplayPath: sourcePath, FingerprintKey: sourcePath, @@ -61,8 +65,8 @@ func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { shadowTestProvider: shadowTestProvider{ ProviderBase: parser.ProviderBase{ Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", + Type: parser.AgentCowork, + DisplayName: "Claude Cowork", }, }, fingerprint: parser.SourceFingerprint{ @@ -83,14 +87,14 @@ func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { var comparisons []ProviderShadowComparison engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, + parser.AgentCowork: {root}, }, Machine: "devbox", ProviderFactories: []parser.ProviderFactory{ shadowCallerFactory{provider: provider}, }, ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationShadowCompare, + parser.AgentCowork: parser.ProviderMigrationShadowCompare, }, ProviderShadowRecorder: func(comparison ProviderShadowComparison) { comparisons = append(comparisons, comparison) @@ -99,13 +103,13 @@ func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { result := engine.processFile(context.Background(), parser.DiscoveredFile{ Path: sourcePath, - Agent: parser.AgentClaude, + Agent: parser.AgentCowork, }) require.NoError(t, result.err) require.Len(t, result.results, 1) - assert.Equal(t, "shadow-caller", result.results[0].Session.ID) - assert.Equal(t, parser.AgentClaude, result.results[0].Session.Agent) + assert.Equal(t, "cowork:shadow-caller", result.results[0].Session.ID) + assert.Equal(t, parser.AgentCowork, result.results[0].Session.Agent) require.Len(t, comparisons, 1) assert.NoError(t, comparisons[0].Err) assert.Empty(t, comparisons[0].Mismatches) @@ -257,8 +261,8 @@ func TestProcessFileShadowUsesChangedPathProviderSource(t *testing.T) { 0o644, )) - legacyResults, legacyExcluded, err := parser.ParseClaudeSessionWithExclusions( - sourcePath, "demo", "devbox", + legacyResults, legacyExcluded, err := parser.ParseCoworkSession( + sourcePath, "devbox", ) require.NoError(t, err) require.Len(t, legacyResults, 1) @@ -272,7 +276,7 @@ func TestProcessFileShadowUsesChangedPathProviderSource(t *testing.T) { providerResult.Session.File.Hash = hash changedSource := parser.SourceRef{ - Provider: parser.AgentClaude, + Provider: parser.AgentCowork, Key: "changed-path-source", DisplayPath: sourcePath, FingerprintKey: sourcePath, @@ -283,8 +287,8 @@ func TestProcessFileShadowUsesChangedPathProviderSource(t *testing.T) { shadowTestProvider: shadowTestProvider{ ProviderBase: parser.ProviderBase{ Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", + Type: parser.AgentCowork, + DisplayName: "Claude Cowork", }, }, fingerprint: parser.SourceFingerprint{ @@ -305,14 +309,14 @@ func TestProcessFileShadowUsesChangedPathProviderSource(t *testing.T) { var comparisons []ProviderShadowComparison engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, + parser.AgentCowork: {root}, }, Machine: "devbox", ProviderFactories: []parser.ProviderFactory{ shadowCallerFactory{provider: provider}, }, ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationShadowCompare, + parser.AgentCowork: parser.ProviderMigrationShadowCompare, }, ProviderShadowRecorder: func(comparison ProviderShadowComparison) { comparisons = append(comparisons, comparison) @@ -321,7 +325,7 @@ func TestProcessFileShadowUsesChangedPathProviderSource(t *testing.T) { result := engine.processFile(context.Background(), parser.DiscoveredFile{ Path: sourcePath, - Agent: parser.AgentClaude, + Agent: parser.AgentCowork, ForceParse: true, ProviderSource: &changedSource, }) @@ -566,27 +570,27 @@ func TestProcessFileShadowRecordsCachedSkipAsNotComparable(t *testing.T) { shadowTestProvider: shadowTestProvider{ ProviderBase: parser.ProviderBase{ Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", + Type: parser.AgentCowork, + DisplayName: "Claude Cowork", }, }, }, source: parser.SourceRef{ - Provider: parser.AgentClaude, + Provider: parser.AgentCowork, Key: sourcePath, }, } var comparisons []ProviderShadowComparison engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, + parser.AgentCowork: {root}, }, Machine: "devbox", ProviderFactories: []parser.ProviderFactory{ shadowCallerFactory{provider: provider}, }, ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationShadowCompare, + parser.AgentCowork: parser.ProviderMigrationShadowCompare, }, ProviderShadowRecorder: func(comparison ProviderShadowComparison) { comparisons = append(comparisons, comparison) @@ -598,7 +602,7 @@ func TestProcessFileShadowRecordsCachedSkipAsNotComparable(t *testing.T) { result := engine.processFile(context.Background(), parser.DiscoveredFile{ Path: sourcePath, - Agent: parser.AgentClaude, + Agent: parser.AgentCowork, }) require.True(t, result.skip) @@ -691,6 +695,76 @@ func TestProcessFileProviderAuthoritativeUsesInjectedProvider(t *testing.T) { assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) } +func TestProcessFileProviderAuthoritativeSkipsFreshClaudeBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "fresh.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + ProjectHint: "demo", + } + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + Caps: parser.Capabilities{ + Source: parser.SourceCapabilities{ + IncrementalAppend: parser.CapabilitySupported, + }, + }, + }, + }, + source: source, + } + database := dbtest.OpenTestDB(t) + filePath := sourcePath + fileSize := info.Size() + fileMtime := info.ModTime().UnixNano() + require.NoError(t, database.UpsertSession(db.Session{ + ID: "fresh", + Project: "demo", + Machine: "devbox", + Agent: string(parser.AgentClaude), + FilePath: &filePath, + FileSize: &fileSize, + FileMtime: &fileMtime, + })) + require.NoError(t, database.SetSessionDataVersion("fresh", db.CurrentDataVersion())) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, fileMtime, result.mtime) + assert.Empty(t, provider.calls) + assert.Equal(t, sourcePath, provider.findRequest.StoredFilePath) +} + func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") From 31b89b1f88c9926f69fc4b65f6b75427c6b45524 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:07:44 -0400 Subject: [PATCH 06/11] feat(parser): migrate cowork provider Cowork stores Claude-shaped transcripts behind local-agent metadata, so the provider boundary needs to preserve that metadata-to-transcript relationship instead of treating the files as plain Claude JSONL sources. The concrete provider keeps shallow metadata watching, metadata change classification, subagent transcript discovery, raw/full ID lookup, composite mtime freshness, and hash propagation explicit for the sync path. fix(parser): cover cowork nested watch events Cowork metadata and transcripts live below org/workspace/session directories, so a shallow root watch could not deliver the paths the provider claimed to classify. Deleted metadata also lost the JSON needed to resolve the transcript, leaving stale provider state after remove or rename events. Make the watch plan recursive for Cowork source globs, recover deleted metadata from the local session directory shape, cover removed metadata/main/subagent paths, and move Cowork into shadow comparison as its branch-local migration step. Validation: go test -tags "fts5" ./internal/parser -run 'Test(CoworkProvider|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(parser): reject ambiguous cowork metadata removal Deleted Cowork metadata can only be recovered from the local session directory shape. If that directory contains multiple main transcripts, choosing the first filesystem match would attach the event to an arbitrary source and leave the real stale source unresolved. Refuse ambiguous deleted-metadata recovery unless exactly one main transcript is present, and cover the multi-transcript case. The regular single-transcript metadata removal path remains supported. Validation: go test -tags "fts5" ./internal/parser -run 'Test(CoworkProvider|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(parser): validate cowork deleted metadata candidates Cowork metadata deletion recovery scans project directories after the metadata file is gone, so it cannot rely on the normal metadata-guided resolution path. It still needs the same transcript validity rules as normal discovery: regular files only, and symlink targets must stay inside the local session directory. Apply that validation before selecting or counting fallback candidates so symlink escapes are ignored and broken symlinks do not create false ambiguity. Validation: go test -tags "fts5" ./internal/parser -run 'TestCoworkProvider|TestResolveCoworkSessionRejectsSymlinkEscape|TestClassifyCoworkPath|TestParseCowork' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check test(sync): compare cowork shadow parity Cowork is a sidecar-backed Claude transcript provider, so add source-level migration coverage that compares provider observation with ParseCoworkSession. The fixture includes local-agent metadata plus the nested Claude transcript and verifies session, messages, usage, excluded IDs, and data-version planning parity while preserving provider-computed hashes. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesCoworkLegacyParser|TestCoworkProvider|TestParseCowork|TestClassifyCoworkPath' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check refactor(parser): fold cowork into provider Move Cowork source discovery, lookup, parse, and changed-path classification onto the concrete coworkProvider and delete the package-level DiscoverCoworkSessions, FindCoworkSourceFile, ParseCoworkSession, and ClassifyCoworkPath free functions. Discovery and find-source bodies now live as provider-owned helpers (discoverTranscriptPaths, coworkFindSourceFile), parseSession is a receiver method, and the metadata-to-transcript classifier moves onto SourcesForChangedPath as classifyCoworkPath so a sibling local_.json change still resolves to the session's main transcript. Make Cowork provider-authoritative and drop its legacy sync dispatch: the classifyOnePath cowork block, the processFile case arm, and the processCowork method. The sibling-meta composite freshness is preserved on the provider's Fingerprint, which already folds CoworkSessionMtime (the max of transcript and metadata mtime) into the freshness identity so a title-only rename triggers a reparse through processProviderFile. CoworkSessionMtime stays exported and the engine's skip-cache and SourceMtime watcher-fallback blocks keep calling it, mirroring how the commandcode fold retained commandCodeEffectiveInfo. Replace the legacy free-function tests with provider API coverage plus a guard asserting the four entrypoints stay gone, drop the shadow-baseline comparison test, relocate the shared writeProviderShadowSourceFile helper into provider_shadow_support_test.go, and remove cowork_provider.go from the pending-shim scan list. test(sync): drop obsolete cowork shadow-legacy tests Folding cowork into its provider removes its legacy processFile arm, so the two shadow-compare tests that built fixtures via the deleted parser.ParseCoworkSession and asserted a legacy result coexisting with the shadow provider can no longer pass: a non-authoritative cowork file now falls through to the unknown-agent default. The shadow machinery keeps coverage through provider_shadow_test.go and the cached-skip not-comparable case. fix(sync): skip fresh cowork provider sources Cowork moved behind the provider-authoritative sync path, but the migrated path still fingerprinted and parsed unchanged transcripts before checking the stored file metadata. That dropped the cheap DB freshness gate the legacy Cowork path relied on and made full syncs rewrite fresh sessions unnecessarily.\n\nRestore that gate for Cowork before provider fingerprinting, using the same transcript size plus CoworkSessionMtime identity stored in the database. Per-file force parses still bypass the gate so metadata-driven refreshes and explicit reparses continue to reach the provider.\n\nValidation: go test -tags "fts5" ./internal/sync -run 'TestProcessFileProviderAuthoritative(SkipsFreshCoworkBeforeFingerprint|ForceParseBypassesFreshCoworkSkip)|TestSyncAllSinceCoworkMetaUpdateTriggersResync|TestSyncPathsCoworkReplacesUpdatedMessageOrdinal' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check --- internal/parser/cowork.go | 98 +---- internal/parser/cowork_provider.go | 331 ++++++++++++++++ internal/parser/cowork_provider_test.go | 377 +++++++++++++++++++ internal/parser/cowork_test.go | 142 +++++-- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/provider_shim_scan_test.go | 1 - internal/parser/types.go | 18 +- internal/parser/types_test.go | 6 +- internal/sync/engine.go | 102 ++--- internal/sync/provider_shadow_caller_test.go | 345 +++++++---------- 11 files changed, 1019 insertions(+), 405 deletions(-) create mode 100644 internal/parser/cowork_provider.go create mode 100644 internal/parser/cowork_provider_test.go diff --git a/internal/parser/cowork.go b/internal/parser/cowork.go index 4a32299d3..db91856a9 100644 --- a/internal/parser/cowork.go +++ b/internal/parser/cowork.go @@ -7,7 +7,6 @@ import ( "encoding/json" "os" "path/filepath" - "slices" "sort" "strings" "time" @@ -246,89 +245,6 @@ func walkCoworkSessions(root string, fn func(transcriptPath string)) { ) } -// DiscoverCoworkSessions finds all cowork session transcripts under root, -// including subagent transcripts. -func DiscoverCoworkSessions(root string) []DiscoveredFile { - var files []DiscoveredFile - walkCoworkSessions(root, func(transcript string) { - files = append(files, DiscoveredFile{ - Path: transcript, - Agent: AgentCowork, - }) - }) - return files -} - -// FindCoworkSourceFile locates a cowork transcript by its raw session ID -// (the cliSessionId or "agent-" subagent id, with the "cowork:" prefix -// already stripped). -func FindCoworkSourceFile(root, sessionID string) string { - if !IsValidSessionID(sessionID) { - return "" - } - target := sessionID + ".jsonl" - var found string - walkCoworkSessions(root, func(transcript string) { - if found == "" && filepath.Base(transcript) == target { - found = transcript - } - }) - return found -} - -// ClassifyCoworkPath reports whether a changed path under a cowork root is -// a cowork session transcript (main or subagent) or its sibling metadata -// file, and returns the transcript file that should be (re)parsed. -// Metadata changes (e.g. a title rename) resolve to the session's main -// transcript so the rename is picked up. -func ClassifyCoworkPath(root, path string) (string, bool) { - rel, ok := relUnder(root, path) - if !ok { - return "", false - } - sep := string(filepath.Separator) - parts := strings.Split(rel, sep) - n := len(parts) - base := parts[n-1] - - if strings.HasSuffix(base, ".jsonl") { - // Must live under a .claude/projects/ subtree. - marker := sep + ".claude" + sep + "projects" + sep - if !strings.Contains(sep+rel, marker) { - return "", false - } - stem := strings.TrimSuffix(base, ".jsonl") - if strings.HasPrefix(stem, "agent-") { - // Subagent transcript: //subagents/**/agent-*.jsonl. - if slices.Contains(parts, "subagents") { - return path, true - } - return "", false - } - // Main transcript: /.jsonl directly under projects. - if n >= 5 && parts[n-4] == ".claude" && parts[n-3] == "projects" && - IsValidSessionID(stem) { - return path, true - } - return "", false - } - - // Metadata: //local_.json - if isCoworkMetaFileName(base) { - meta := readCoworkMeta(path) - if meta.CliSessionID == "" { - return "", false - } - sessionDir := strings.TrimSuffix(path, ".json") - if main, _ := resolveCoworkSession( - sessionDir, meta.CliSessionID, - ); main != "" { - return main, true - } - } - return "", false -} - // relUnder returns the path of child relative to dir when child is // strictly contained within dir, mirroring the engine's isUnder helper so // the parser can classify paths without importing sync internals. @@ -377,13 +293,13 @@ func extractCoworkAITitle(transcriptPath string) string { return title } -// ParseCoworkSession parses a cowork session transcript. It reuses the -// Claude Code parser on the transcript and then rewrites the results into -// the cowork namespace: agent type, "cowork:"-prefixed IDs, the session -// title, and metadata-derived timestamps for transcripts that carry none. -// Returns parsed results plus session IDs the parser intentionally -// excluded (prefixed), matching the Claude transcript parser. -func ParseCoworkSession( +// parseSession parses a cowork session transcript. It reuses the Claude +// Code parser on the transcript and then rewrites the results into the +// cowork namespace: agent type, "cowork:"-prefixed IDs, the session title, +// and metadata-derived timestamps for transcripts that carry none. Returns +// parsed results plus session IDs the parser intentionally excluded +// (prefixed), matching ParseClaudeSessionWithExclusions. +func parseCoworkSession( transcriptPath, machine string, ) ([]ParseResult, []string, error) { metaPath := coworkMetaPathForTranscript(transcriptPath) diff --git a/internal/parser/cowork_provider.go b/internal/parser/cowork_provider.go new file mode 100644 index 000000000..feaf755a8 --- /dev/null +++ b/internal/parser/cowork_provider.go @@ -0,0 +1,331 @@ +package parser + +import ( + "fmt" + "os" + "path/filepath" + "slices" + "strings" +) + +// Cowork stores each session as a Claude-format transcript +// (.claude/projects/**/.jsonl) with a sibling local_.json metadata +// file, plus per-subagent transcripts. It is a single-file provider whose parse +// can yield multiple sessions (the main conversation and its subagents) and +// drive removals via excluded session IDs. All behavior is wired into the +// shared single-file base via options. +func newCoworkProviderFactory(def AgentDef) ProviderFactory { + return NewSingleFileProviderFactory( + def, + coworkProviderCapabilities(), + func(cfg ProviderConfig) singleFileSourceSet { + return NewSingleFileSourceSet( + AgentCowork, + cfg.Roots, + WithFileDiscovery(coworkDiscoverFiles), + WithFileWatchRoots(coworkWatchRoots), + WithFileChangedPathClassifier(coworkClassifyPath), + WithFileLookup(coworkFindFile), + WithFileFingerprint(coworkFingerprintSource), + WithFileParse(coworkParseFile), + // Parse removes stale subagents via exclusions, so an empty + // result set is still a complete (not skipped) parse. + WithAlwaysCompleteResultSet(), + ) + }, + ) +} + +func coworkDiscoverFiles(root string) []singleFileMatch { + var out []singleFileMatch + walkCoworkSessions(root, func(transcript string) { + if match, ok := coworkTranscriptMatch(root, transcript); ok { + out = append(out, match) + } + }) + return out +} + +func coworkWatchRoots(roots []string) []WatchRoot { + out := make([]WatchRoot, 0, len(roots)) + for _, root := range roots { + out = append(out, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"local_*.json", "*.jsonl"}, + DebounceKey: string(AgentCowork) + ":metadata:" + root, + }) + } + return out +} + +// coworkClassifyPath maps a stored or changed path to its session transcript. A +// transcript path classifies directly; a metadata path resolves to the +// session's main transcript so a title rename is picked up. Under allowMissing a +// metadata path whose transcript was deleted still resolves via on-disk +// scanning. +func coworkClassifyPath( + root, path string, allowMissing bool, +) (singleFileMatch, bool) { + transcript, ok := classifyCoworkPath(root, path) + if !ok && allowMissing { + transcript, ok = coworkTranscriptForMetadataPath(root, path) + } + if !ok { + return singleFileMatch{}, false + } + return coworkTranscriptMatch(root, transcript) +} + +func coworkFindFile(root, rawID string) (singleFileMatch, bool) { + path := coworkFindSourceFile(root, rawID) + if path == "" { + return singleFileMatch{}, false + } + return coworkTranscriptMatch(root, path) +} + +// coworkTranscriptMatch validates a transcript path under root and builds a +// match carrying the project hint read from the session's metadata. It +// reproduces the legacy sourceRef checks. +func coworkTranscriptMatch(root, path string) (singleFileMatch, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if _, ok := relUnder(root, path); !ok { + return singleFileMatch{}, false + } + metaPath := coworkMetaPathForTranscript(path) + if metaPath == "" { + return singleFileMatch{}, false + } + if !isCoworkTranscriptPath(root, path) { + return singleFileMatch{}, false + } + return singleFileMatch{ + Path: path, + ProjectHint: coworkProjectName(readCoworkMeta(metaPath)), + }, true +} + +func coworkFingerprintSource( + src singleFileSource, +) (SourceFingerprint, error) { + info, err := os.Stat(src.Path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", src.Path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf( + "stat %s: source is a directory", src.Path, + ) + } + hash, err := hashJSONLSourceFile(src.Path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Size: info.Size(), + MTimeNS: CoworkSessionMtime(src.Path, info.ModTime().UnixNano()), + Hash: hash, + }, nil +} + +func coworkParseFile( + src singleFileSource, req ParseRequest, +) ([]ParseResult, []string, error) { + results, excluded, err := parseCoworkSession(src.Path, req.Machine) + if err != nil { + return nil, nil, err + } + if req.Fingerprint.Hash != "" { + for i := range results { + results[i].Session.File.Hash = req.Fingerprint.Hash + } + } + return results, excluded, nil +} + +func isCoworkTranscriptPath(root, path string) bool { + rel, ok := relUnder(root, path) + if !ok || filepath.Ext(path) != ".jsonl" { + return false + } + sep := string(filepath.Separator) + parts := strings.Split(rel, sep) + n := len(parts) + base := strings.TrimSuffix(filepath.Base(path), ".jsonl") + if n >= 5 && parts[n-4] == ".claude" && parts[n-3] == "projects" { + return IsValidSessionID(base) + } + if !strings.Contains(sep+rel, sep+".claude"+sep+"projects"+sep) || + !slices.Contains(parts, "subagents") { + return false + } + return strings.HasPrefix(base, "agent-") +} + +func coworkTranscriptForMetadataPath(root, path string) (string, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || !isCoworkMetaFileName(filepath.Base(rel)) { + return "", false + } + sessionDir := strings.TrimSuffix(path, ".json") + resolvedSessionDir, err := filepath.EvalSymlinks(sessionDir) + if err != nil { + return "", false + } + projectsDir := filepath.Join(sessionDir, ".claude", "projects") + entries, err := os.ReadDir(projectsDir) + if err != nil { + return "", false + } + var found string + for _, entry := range entries { + if !isDirOrSymlink(entry, projectsDir) { + continue + } + projectDir := filepath.Join(projectsDir, entry.Name()) + files, err := os.ReadDir(projectDir) + if err != nil { + continue + } + for _, file := range files { + if file.IsDir() { + continue + } + name := file.Name() + if !strings.HasSuffix(name, ".jsonl") { + continue + } + stem := strings.TrimSuffix(name, ".jsonl") + if !IsValidSessionID(stem) || strings.HasPrefix(stem, "agent-") { + continue + } + candidate := filepath.Join(projectDir, name) + if !validCoworkMainTranscriptCandidate(resolvedSessionDir, candidate) { + continue + } + if found != "" { + return "", false + } + found = candidate + } + } + return found, found != "" +} + +func validCoworkMainTranscriptCandidate(resolvedSessionDir, candidate string) bool { + if !IsRegularFile(candidate) { + return false + } + resolved, err := filepath.EvalSymlinks(candidate) + if err != nil { + return false + } + return isContainedIn(resolved, resolvedSessionDir) +} + +// coworkFindSourceFile locates a cowork transcript by its raw session ID +// (the cliSessionId or "agent-" subagent id, with the "cowork:" prefix +// already stripped). +func coworkFindSourceFile(root, sessionID string) string { + if !IsValidSessionID(sessionID) { + return "" + } + target := sessionID + ".jsonl" + var found string + walkCoworkSessions(root, func(transcript string) { + if found == "" && filepath.Base(transcript) == target { + found = transcript + } + }) + return found +} + +// classifyCoworkPath reports whether a changed path under a cowork root is a +// cowork session transcript (main or subagent) or its sibling metadata file, +// and returns the transcript file that should be (re)parsed. Metadata changes +// (e.g. a title rename) resolve to the session's main transcript so the rename +// is picked up. +func classifyCoworkPath(root, path string) (string, bool) { + rel, ok := relUnder(root, path) + if !ok { + return "", false + } + sep := string(filepath.Separator) + parts := strings.Split(rel, sep) + n := len(parts) + base := parts[n-1] + + if strings.HasSuffix(base, ".jsonl") { + // Must live under a .claude/projects/ subtree. + marker := sep + ".claude" + sep + "projects" + sep + if !strings.Contains(sep+rel, marker) { + return "", false + } + stem := strings.TrimSuffix(base, ".jsonl") + if strings.HasPrefix(stem, "agent-") { + // Subagent transcript: //subagents/**/agent-*.jsonl. + if slices.Contains(parts, "subagents") { + return path, true + } + return "", false + } + // Main transcript: /.jsonl directly under projects. + if n >= 5 && parts[n-4] == ".claude" && parts[n-3] == "projects" && + IsValidSessionID(stem) { + return path, true + } + return "", false + } + + // Metadata: //local_.json + if isCoworkMetaFileName(base) { + meta := readCoworkMeta(path) + if meta.CliSessionID == "" { + return "", false + } + sessionDir := strings.TrimSuffix(path, ".json") + if main, _ := resolveCoworkSession( + sessionDir, meta.CliSessionID, + ); main != "" { + return main, true + } + } + return "", false +} + +func coworkProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilitySupported, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Relationships: CapabilitySupported, + Subagents: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + TerminationStatus: CapabilitySupported, + MalformedLineCount: CapabilitySupported, + Model: CapabilitySupported, + StopReason: CapabilitySupported, + }, + } +} diff --git a/internal/parser/cowork_provider_test.go b/internal/parser/cowork_provider_test.go new file mode 100644 index 000000000..7756f2c85 --- /dev/null +++ b/internal/parser/cowork_provider_test.go @@ -0,0 +1,377 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCoworkProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCowork) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCoworkProviderSourceMethods(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000101" + metaPath, transcript := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000101", + cliSessionID: cli, + encodedProject: "-Users-dev-code-demo", + title: "Provider title", + folders: []string{"/Users/dev/code/demo"}, + transcriptLines: coworkTranscriptLines(cli), + }) + subagentPath := filepath.Join( + filepath.Dir(transcript), + cli, + "subagents", + "tasks", + "agent-worker.jsonl", + ) + writeSourceFile(t, subagentPath, strings.Join(coworkTranscriptLines(cli), "\n")+"\n") + writeSourceFile( + t, + filepath.Join(filepath.Dir(transcript), cli, "subagents", "not-agent.jsonl"), + strings.Join(coworkTranscriptLines(cli), "\n")+"\n", + ) + writeSourceFile( + t, + filepath.Join(root, "org", "ws", "cowork-clientdata-cache.json"), + "{}\n", + ) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"local_*.json", "*.jsonl"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{transcript, subagentPath}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + for _, source := range discovered { + assert.Equal(t, AgentCowork, source.Provider) + assert.Equal(t, "demo", source.ProjectHint) + assert.Equal(t, source.DisplayPath, source.FingerprintKey) + } + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~cowork:" + cli, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, transcript, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "agent-worker", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, subagentPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: transcript, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, transcript, found.DisplayPath) + + transcriptInfo, err := os.Stat(transcript) + require.NoError(t, err) + newer := transcriptInfo.ModTime().Add(time.Hour) + require.NoError(t, os.Chtimes(metaPath, newer, newer)) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, transcript, fingerprint.Key) + assert.Equal(t, transcriptInfo.Size(), fingerprint.Size) + assert.Equal(t, newer.UnixNano(), fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + want string + }{ + {name: "main transcript", path: transcript, want: transcript}, + {name: "subagent transcript", path: subagentPath, want: subagentPath}, + {name: "metadata", path: metaPath, want: transcript}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: tc.path, + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tc.want, changed[0].DisplayPath) + }) + } + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, transcript, changed[0].DisplayPath) + + require.NoError(t, os.Remove(transcript)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: transcript, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, transcript, changed[0].DisplayPath) + + require.NoError(t, os.Remove(subagentPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: subagentPath, EventKind: "rename", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, subagentPath, changed[0].DisplayPath) + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "org", "ws", "cowork-clientdata-cache.json"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: transcript, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestCoworkProviderParse(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000102" + _, transcript := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000102", + cliSessionID: cli, + encodedProject: "-sessions-demo", + title: "Parse title", + transcriptLines: coworkTranscriptLines(cli), + }) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Empty(t, outcome.ExcludedSessionIDs) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "cowork:"+cli, result.Result.Session.ID) + assert.Equal(t, AgentCowork, result.Result.Session.Agent) + assert.Equal(t, "cowork", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, transcript, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "Parse title", result.Result.Session.SessionName) + assert.Equal(t, "hello there", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCoworkProviderMetadataRemovalRejectsAmbiguousMainTranscripts(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000104" + metaPath, transcript := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000104", + cliSessionID: cli, + encodedProject: "-sessions-demo", + transcriptLines: coworkTranscriptLines(cli), + }) + otherPath := filepath.Join( + filepath.Dir(filepath.Dir(transcript)), + "-sessions-other", + "c0000000-0000-4000-8000-000000000105.jsonl", + ) + writeSourceFile( + t, + otherPath, + strings.Join(coworkTranscriptLines("c0000000-0000-4000-8000-000000000105"), "\n")+"\n", + ) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + assert.Empty(t, changed) +} + +func TestCoworkProviderMetadataRemovalIgnoresSymlinkEscape(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000106" + metaPath, _ := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000106", + cliSessionID: cli, + encodedProject: "-sessions-demo", + transcriptLines: coworkTranscriptLines(cli), + }) + sessionDir := strings.TrimSuffix(metaPath, ".json") + projectsDir := filepath.Join(sessionDir, ".claude", "projects") + outside := filepath.Join(root, "outside") + require.NoError(t, os.MkdirAll(outside, 0o755)) + writeSourceFile( + t, + filepath.Join(outside, "c0000000-0000-4000-8000-000000000107.jsonl"), + strings.Join(coworkTranscriptLines("c0000000-0000-4000-8000-000000000107"), "\n")+"\n", + ) + if err := os.Symlink(outside, filepath.Join(projectsDir, "-sessions-escape")); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, cli+".jsonl", filepath.Base(changed[0].DisplayPath)) +} + +func TestCoworkProviderMetadataRemovalIgnoresBrokenSymlinkAmbiguity(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000108" + metaPath, _ := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000108", + cliSessionID: cli, + encodedProject: "-sessions-demo", + transcriptLines: coworkTranscriptLines(cli), + }) + sessionDir := strings.TrimSuffix(metaPath, ".json") + projectsDir := filepath.Join(sessionDir, ".claude", "projects") + brokenDir := filepath.Join(projectsDir, "-sessions-broken") + require.NoError(t, os.MkdirAll(brokenDir, 0o755)) + if err := os.Symlink( + filepath.Join(root, "missing.jsonl"), + filepath.Join(brokenDir, "c0000000-0000-4000-8000-000000000109.jsonl"), + ); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, cli+".jsonl", filepath.Base(changed[0].DisplayPath)) +} + +func TestCoworkProviderFullSessionIDPrefixLookup(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000103" + _, transcript := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000103", + cliSessionID: cli, + encodedProject: "-sessions-demo", + transcriptLines: coworkTranscriptLines(cli), + }) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) + + for _, id := range []string{"cowork:" + cli, "remote~cowork:" + cli} { + t.Run(strings.ReplaceAll(id, ":", "_"), func(t *testing.T) { + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: id, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, transcript, found.DisplayPath) + }) + } +} diff --git a/internal/parser/cowork_test.go b/internal/parser/cowork_test.go index eaf92b6bf..365bf2d87 100644 --- a/internal/parser/cowork_test.go +++ b/internal/parser/cowork_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "encoding/json" "os" "path/filepath" @@ -13,6 +14,54 @@ import ( "github.com/stretchr/testify/require" ) +// coworkProviderForRoot constructs a cowork provider rooted at root. +func coworkProviderForRoot(t *testing.T, root, machine string) Provider { + t.Helper() + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + Machine: machine, + }) + require.True(t, ok) + return provider +} + +// coworkDiscoveredPaths returns the transcript paths the provider discovers +// under root. +func coworkDiscoveredPaths(t *testing.T, root string) []string { + t.Helper() + sources, err := coworkProviderForRoot(t, root, "").Discover(context.Background()) + require.NoError(t, err) + paths := make([]string, len(sources)) + for i, source := range sources { + paths[i] = source.DisplayPath + } + return paths +} + +// coworkParseTranscript finds and parses a single cowork transcript through +// the provider, returning the parse results and any excluded session IDs. +func coworkParseTranscript( + t *testing.T, root, transcript, machine string, +) ([]ParseResult, []string) { + t.Helper() + provider := coworkProviderForRoot(t, root, machine) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: transcript, + }) + require.NoError(t, err) + require.True(t, ok, "find source for %s", transcript) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Machine: machine, + }) + require.NoError(t, err) + results := make([]ParseResult, len(outcome.Results)) + for i, out := range outcome.Results { + results[i] = out.Result + } + return results, outcome.ExcludedSessionIDs +} + // All identifiers, titles, and content below are synthetic fixtures. // coworkFixture describes one cowork session to materialize on disk. @@ -91,7 +140,7 @@ func coworkTranscriptLines(cli string) []string { } } -func TestDiscoverCoworkSessions(t *testing.T) { +func TestCoworkProviderDiscoversSessions(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000001" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -104,13 +153,12 @@ func TestDiscoverCoworkSessions(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - got := DiscoverCoworkSessions(root) - require.Len(t, got, 1, "discovered files") - assert.Equal(t, transcript, got[0].Path, "Path") - assert.Equal(t, AgentCowork, got[0].Agent, "Agent") + got := coworkDiscoveredPaths(t, root) + require.Len(t, got, 1, "discovered sources") + assert.Equal(t, transcript, got[0], "DisplayPath") } -func TestDiscoverCoworkSessionsIgnoresNoise(t *testing.T) { +func TestCoworkProviderDiscoverIgnoresNoise(t *testing.T) { root := t.TempDir() wsDir := filepath.Join(root, "org", "ws") require.NoError(t, os.MkdirAll(wsDir, 0o755), "mkdir ws") @@ -145,10 +193,10 @@ func TestDiscoverCoworkSessionsIgnoresNoise(t *testing.T) { "write transcript-less meta", ) - assert.Empty(t, DiscoverCoworkSessions(root)) + assert.Empty(t, coworkDiscoveredPaths(t, root)) } -func TestParseCoworkSession(t *testing.T) { +func TestCoworkProviderParsesSession(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000002" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -163,8 +211,7 @@ func TestParseCoworkSession(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - results, excluded, err := ParseCoworkSession(transcript, "host-1") - require.NoError(t, err, "parse") + results, excluded := coworkParseTranscript(t, root, transcript, "host-1") require.Empty(t, excluded, "excluded") require.Len(t, results, 1, "results") @@ -185,7 +232,7 @@ func TestParseCoworkSession(t *testing.T) { assert.Equal(t, 12, sess.PeakContextTokens, "PeakContextTokens (input+cacheRead)") } -func TestParseCoworkSessionTitleFallsBackToAITitle(t *testing.T) { +func TestCoworkProviderParseTitleFallsBackToAITitle(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000003" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -198,14 +245,13 @@ func TestParseCoworkSessionTitleFallsBackToAITitle(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - results, _, err := ParseCoworkSession(transcript, "host-1") - require.NoError(t, err, "parse") + results, _ := coworkParseTranscript(t, root, transcript, "host-1") require.Len(t, results, 1, "results") assert.Equal(t, "Auto title", results[0].Session.SessionName, "falls back to ai-title event") } -func TestParseCoworkSessionProjectFromSelectedFolder(t *testing.T) { +func TestCoworkProviderParseProjectFromSelectedFolder(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000004" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -219,14 +265,13 @@ func TestParseCoworkSessionProjectFromSelectedFolder(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - results, _, err := ParseCoworkSession(transcript, "host-1") - require.NoError(t, err, "parse") + results, _ := coworkParseTranscript(t, root, transcript, "host-1") require.Len(t, results, 1, "results") assert.Equal(t, "my_app", results[0].Session.Project, "project derived from userSelectedFolders") } -func TestFindCoworkSourceFile(t *testing.T) { +func TestCoworkProviderFindsSourceFile(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000005" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -239,11 +284,22 @@ func TestFindCoworkSourceFile(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - assert.Equal(t, transcript, FindCoworkSourceFile(root, cli), "found") - assert.Empty(t, FindCoworkSourceFile(root, "nonexistent-id"), "missing") + provider := coworkProviderForRoot(t, root, "") + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: cli, + }) + require.NoError(t, err) + require.True(t, ok, "found") + assert.Equal(t, transcript, found.DisplayPath) + + _, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "nonexistent-id", + }) + require.NoError(t, err) + assert.False(t, ok, "missing") } -func TestClassifyCoworkPath(t *testing.T) { +func TestCoworkProviderClassifiesChangedPath(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000006" metaPath, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -256,20 +312,34 @@ func TestClassifyCoworkPath(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) + provider := coworkProviderForRoot(t, root, "") + classify := func(path string) (string, bool) { + sources, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + if len(sources) == 0 { + return "", false + } + require.Len(t, sources, 1) + return sources[0].DisplayPath, true + } + // A transcript change classifies to itself. - got, ok := ClassifyCoworkPath(root, transcript) + got, ok := classify(transcript) require.True(t, ok, "transcript classified") assert.Equal(t, transcript, got, "transcript path") // A metadata change resolves to the session's transcript. - got, ok = ClassifyCoworkPath(root, metaPath) + got, ok = classify(metaPath) require.True(t, ok, "metadata classified") assert.Equal(t, transcript, got, "metadata resolves to transcript") // Unrelated and outside-root paths are ignored. - _, ok = ClassifyCoworkPath(root, filepath.Join(root, "org", "ws", "artifacts.json")) + _, ok = classify(filepath.Join(root, "org", "ws", "artifacts.json")) assert.False(t, ok, "cache file ignored") - _, ok = ClassifyCoworkPath(root, "/some/other/place.jsonl") + _, ok = classify("/some/other/place.jsonl") assert.False(t, ok, "outside root ignored") } @@ -310,7 +380,7 @@ func TestCoworkSessionMtime(t *testing.T) { "transcript mtime when metadata missing") } -func TestDiscoverCoworkSessionsIncludesSubagents(t *testing.T) { +func TestCoworkProviderDiscoverIncludesSubagents(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000008" enc := "-sessions-demo" @@ -344,29 +414,27 @@ func TestDiscoverCoworkSessionsIncludesSubagents(t *testing.T) { "write subagent", ) - got := DiscoverCoworkSessions(root) - paths := make([]string, len(got)) - for i, f := range got { - paths[i] = f.Path - assert.Equal(t, AgentCowork, f.Agent, "Agent") - } + paths := coworkDiscoveredPaths(t, root) assert.Contains(t, paths, transcript, "main transcript discovered") assert.Contains(t, paths, subPath, "subagent transcript discovered") // The subagent parses into a cowork-namespaced subagent session whose // parent is the main session. - results, _, err := ParseCoworkSession(subPath, "host-1") - require.NoError(t, err, "parse subagent") + results, _ := coworkParseTranscript(t, root, subPath, "host-1") require.Len(t, results, 1, "results") sub := results[0].Session assert.Equal(t, "cowork:agent-0000000000000001", sub.ID, "subagent ID") assert.Equal(t, "cowork:"+cli, sub.ParentSessionID, "parent prefixed") assert.Equal(t, RelSubagent, sub.RelationshipType, "RelSubagent") - // FindCoworkSourceFile resolves the subagent by its raw ID too. - assert.Equal(t, subPath, - FindCoworkSourceFile(root, "agent-0000000000000001"), - "find subagent source") + // The provider resolves the subagent by its raw ID too. + provider := coworkProviderForRoot(t, root, "") + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "agent-0000000000000001", + }) + require.NoError(t, err) + require.True(t, ok, "find subagent source") + assert.Equal(t, subPath, found.DisplayPath) } func TestResolveCoworkSessionRejectsSymlinkEscape(t *testing.T) { diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 9307f7b17..7a54cf967 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -358,6 +358,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newClaudeProviderFactory(def) case AgentCommandCode: return newCommandCodeProviderFactory(def) + case AgentCowork: + return newCoworkProviderFactory(def) case AgentCortex: return newCortexProviderFactory(def) case AgentCursor: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index d5673312d..6a91112ef 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -18,7 +18,7 @@ const ( var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentClaude: ProviderMigrationProviderAuthoritative, - AgentCowork: ProviderMigrationLegacyOnly, + AgentCowork: ProviderMigrationProviderAuthoritative, AgentCodex: ProviderMigrationLegacyOnly, AgentCopilot: ProviderMigrationLegacyOnly, AgentGemini: ProviderMigrationLegacyOnly, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 68432d8fd..8e4aa92c4 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -51,7 +51,6 @@ var pendingShimProviderFiles = map[string]bool{ "antigravity_provider.go": true, "codex_provider.go": true, "copilot_provider.go": true, - "cowork_provider.go": true, "db_backed_provider.go": true, "gemini_provider.go": true, "kiro_ide_provider.go": true, diff --git a/internal/parser/types.go b/internal/parser/types.go index 825bad354..2821dc0d3 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -108,16 +108,14 @@ var Registry = []AgentDef{ FileBased: true, }, { - Type: AgentCowork, - DisplayName: "Claude Cowork", - EnvVar: "COWORK_DIR", - ConfigKey: "cowork_dirs", - DefaultDirs: coworkDefaultDirs(), - IDPrefix: "cowork:", - FileBased: true, - ShallowWatch: true, - DiscoverFunc: DiscoverCoworkSessions, - FindSourceFunc: FindCoworkSourceFile, + Type: AgentCowork, + DisplayName: "Claude Cowork", + EnvVar: "COWORK_DIR", + ConfigKey: "cowork_dirs", + DefaultDirs: coworkDefaultDirs(), + IDPrefix: "cowork:", + FileBased: true, + ShallowWatch: true, }, { Type: AgentCodex, diff --git a/internal/parser/types_test.go b/internal/parser/types_test.go index ea96bda83..e25b95cf7 100644 --- a/internal/parser/types_test.go +++ b/internal/parser/types_test.go @@ -523,8 +523,10 @@ func TestCoworkRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentCowork) require.True(t, ok, "AgentCowork missing from Registry") require.True(t, def.FileBased, "Cowork FileBased") - require.NotNil(t, def.DiscoverFunc, "Cowork DiscoverFunc") - require.NotNil(t, def.FindSourceFunc, "Cowork FindSourceFunc") + // Cowork is a migrated, provider-authoritative agent: source discovery + // and lookup live on the concrete provider, not on legacy AgentDef hooks. + require.Nil(t, def.DiscoverFunc, "Cowork DiscoverFunc") + require.Nil(t, def.FindSourceFunc, "Cowork FindSourceFunc") assert.Equal(t, "COWORK_DIR", def.EnvVar) assert.Equal(t, "cowork_dirs", def.ConfigKey) assert.Equal(t, "cowork:", def.IDPrefix) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index de251b854..041e085d3 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -963,23 +963,6 @@ func (e *Engine) classifyOnePath( // shapes, so the legacy block was removed when Claude was folded // onto its provider. - // Cowork: ///local_/.claude/ - // projects//.jsonl (transcript), or the sibling - // local_.json metadata file (resolves to its transcript). - for _, coworkDir := range e.agentDirs[parser.AgentCowork] { - if coworkDir == "" { - continue - } - if transcript, ok := parser.ClassifyCoworkPath( - coworkDir, path, - ); ok { - return parser.DiscoveredFile{ - Path: transcript, - Agent: parser.AgentCowork, - }, true - } - } - // Codex: either ////.jsonl // or /.jsonl for archived sessions. for _, codexDir := range e.agentDirs[parser.AgentCodex] { @@ -3930,8 +3913,6 @@ func (e *Engine) processFile( // processProviderFile; only s3:// Claude sources fall through to the // legacy dispatch, via the S3 sync path. res = e.processS3Session(ctx, file, info) - case parser.AgentCowork: - res = e.processCowork(file, info) case parser.AgentCodex: if strings.HasPrefix(file.Path, "s3://") { res = e.processS3Session(ctx, file, info) @@ -4075,6 +4056,12 @@ func (e *Engine) processProviderFile( mtime: mtime, }, true } + if freshMtime, fresh := e.providerCoworkSourceFresh(source, file); fresh { + return processResult{ + skip: true, + mtime: freshMtime, + }, true + } fingerprint, err := provider.Fingerprint(ctx, source) if err != nil { @@ -4754,50 +4741,6 @@ func (e *Engine) processClaudeWithStoredSkip( } } -// processCowork parses a Claude Desktop "cowork" (local agent mode) -// session. The transcript is a standard Claude Code JSONL file nested -// inside the cowork session directory, so the work is delegated to the -// Claude parser and rewritten into the cowork namespace by -// parser.ParseCoworkSession. Cowork session IDs are "cowork:"-prefixed, so -// the skip check keys off file_path rather than the bare filename stem. -func (e *Engine) processCowork( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - - // The session title lives in the sibling metadata file, so a rename - // changes only that file. Skip on the composite (transcript+metadata) - // mtime so renames are re-parsed instead of skipped as unchanged. - compositeMtime := parser.CoworkSessionMtime( - file.Path, info.ModTime().UnixNano(), - ) - fi := fakeSnapshotInfo{fSize: info.Size(), fMtime: compositeMtime} - if e.shouldSkipByPath(file.Path, fi) { - return processResult{skip: true} - } - - results, excludedIDs, err := parser.ParseCoworkSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - - inode, device := getFileIdentity(info) - hash, hashErr := ComputeFileHash(file.Path) - for i := range results { - results[i].Session.File.Inode = inode - results[i].Session.File.Device = device - if hashErr == nil { - results[i].Session.File.Hash = hash - } - } - - return processResult{ - results: results, - excludedSessionIDs: excludedIDs, - } -} - // providerSingleSessionFresh reports whether a single-session JSONL // provider's source (Claude) maps to a stored session that is already // up to date: the source size and mtime match what is stored, the row @@ -4858,6 +4801,39 @@ func (e *Engine) providerSingleSessionFresh( !parser.NeedsProjectReparse(sess.Project) } +func (e *Engine) providerCoworkSourceFresh( + source parser.SourceRef, + file parser.DiscoveredFile, +) (int64, bool) { + if e.forceParse || file.ForceParse || file.Agent != parser.AgentCowork { + return 0, false + } + path := providerDiscoveredPath(source) + if path == "" { + return 0, false + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + info, err := os.Stat(lookupPath) + if err != nil { + info, err = os.Stat(path) + if err != nil { + return 0, false + } + } + mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if !e.shouldSkipByPath(path, effectiveInfo) { + return 0, false + } + return mtime, true +} + // stampProviderFileIdentity copies the source file's inode and device onto // every parsed result for an incremental-append provider (Claude). The // legacy process arm stamped this identity from the source stat so the diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index 3d4466c37..a389eac76 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -16,107 +17,6 @@ import ( "go.kenn.io/agentsview/internal/testjsonl" ) -// The generic shadow-compare/legacy-coexistence mechanism is exercised through -// the Cowork agent, which remains legacy-only and reuses the Claude transcript -// format. Claude itself is now provider-authoritative, so it no longer has a -// legacy processFile arm to observe in shadow. -func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-caller.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile( - sourcePath, - []byte(testjsonl.JoinJSONL( - testjsonl.ClaudeUserJSON( - "compare through the caller", - "2026-06-01T10:00:00Z", - "/Users/dev/code/demo", - ), - testjsonl.ClaudeAssistantJSON( - "provider stayed shadow-only", - "2026-06-01T10:01:00Z", - ), - )), - 0o644, - )) - - legacyResults, legacyExcluded, err := parser.ParseCoworkSession( - sourcePath, "devbox", - ) - require.NoError(t, err) - require.Len(t, legacyResults, 1) - require.Empty(t, legacyExcluded) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - providerResult := legacyResults[0] - providerResult.Session.File.Inode, providerResult.Session.File.Device = getFileIdentity(info) - hash, err := ComputeFileHash(sourcePath) - require.NoError(t, err) - providerResult.Session.File.Hash = hash - - source := parser.SourceRef{ - Provider: parser.AgentCowork, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - ProjectHint: "demo", - } - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCowork, - DisplayName: "Claude Cowork", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath, - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{{ - Result: providerResult, - DataVersion: parser.DataVersionCurrent, - }}, - ResultSetComplete: true, - }, - }, - source: source, - } - var comparisons []ProviderShadowComparison - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentCowork: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentCowork: parser.ProviderMigrationShadowCompare, - }, - ProviderShadowRecorder: func(comparison ProviderShadowComparison) { - comparisons = append(comparisons, comparison) - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentCowork, - }) - - require.NoError(t, result.err) - require.Len(t, result.results, 1) - assert.Equal(t, "cowork:shadow-caller", result.results[0].Session.ID) - assert.Equal(t, parser.AgentCowork, result.results[0].Session.Agent) - require.Len(t, comparisons, 1) - assert.NoError(t, comparisons[0].Err) - assert.Empty(t, comparisons[0].Mismatches) - assert.Equal(t, sourcePath, comparisons[0].File.Path) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) -} - func TestClassifyProviderChangedPathPassesStoredHintsToShadowProvider( t *testing.T, ) { @@ -241,105 +141,6 @@ func TestClassifyProviderChangedPathRunsAlongsideLegacyClassifier( assert.Equal(t, sourcePath, files[0].ProviderSource.DisplayPath) } -func TestProcessFileShadowUsesChangedPathProviderSource(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-provider-source.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile( - sourcePath, - []byte(testjsonl.JoinJSONL( - testjsonl.ClaudeUserJSON( - "provider source should win", - "2026-06-01T10:00:00Z", - "/Users/dev/code/demo", - ), - testjsonl.ClaudeAssistantJSON( - "force parse should propagate", - "2026-06-01T10:01:00Z", - ), - )), - 0o644, - )) - - legacyResults, legacyExcluded, err := parser.ParseCoworkSession( - sourcePath, "devbox", - ) - require.NoError(t, err) - require.Len(t, legacyResults, 1) - require.Empty(t, legacyExcluded) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - providerResult := legacyResults[0] - providerResult.Session.File.Inode, providerResult.Session.File.Device = getFileIdentity(info) - hash, err := ComputeFileHash(sourcePath) - require.NoError(t, err) - providerResult.Session.File.Hash = hash - - changedSource := parser.SourceRef{ - Provider: parser.AgentCowork, - Key: "changed-path-source", - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - ProjectHint: "demo", - } - findFound := false - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCowork, - DisplayName: "Claude Cowork", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath, - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{{ - Result: providerResult, - DataVersion: parser.DataVersionCurrent, - }}, - ResultSetComplete: true, - }, - }, - findFound: &findFound, - } - var comparisons []ProviderShadowComparison - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentCowork: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentCowork: parser.ProviderMigrationShadowCompare, - }, - ProviderShadowRecorder: func(comparison ProviderShadowComparison) { - comparisons = append(comparisons, comparison) - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentCowork, - ForceParse: true, - ProviderSource: &changedSource, - }) - - require.NoError(t, result.err) - require.Len(t, comparisons, 1) - assert.NoError(t, comparisons[0].Err) - assert.Empty(t, comparisons[0].Mismatches) - assert.Equal(t, changedSource, comparisons[0].Source) - assert.Equal(t, changedSource, provider.parseRequest.Source) - assert.True(t, provider.parseRequest.ForceParse) - assert.Empty(t, provider.findRequest) -} - func TestClassifyProviderChangedPathMarksAuthoritativeProviderProcess( t *testing.T, ) { @@ -710,6 +511,7 @@ func TestProcessFileProviderAuthoritativeSkipsFreshClaudeBeforeFingerprint(t *te FingerprintKey: sourcePath, ProjectHint: "demo", } + provider := &shadowCallerProvider{ shadowTestProvider: shadowTestProvider{ ProviderBase: parser.ProviderBase{ @@ -765,6 +567,106 @@ func TestProcessFileProviderAuthoritativeSkipsFreshClaudeBeforeFingerprint(t *te assert.Equal(t, sourcePath, provider.findRequest.StoredFilePath) } +func TestProcessFileProviderAuthoritativeSkipsFreshCoworkBeforeFingerprint(t *testing.T) { + root := t.TempDir() + database := dbtest.OpenTestDB(t) + sourcePath, sourceMtime := writeFreshCoworkProviderSource( + t, root, database, "fresh-session", + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCowork, + DisplayName: "Claude Cowork", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCowork, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCowork: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCowork: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + +func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *testing.T) { + root := t.TempDir() + database := dbtest.OpenTestDB(t) + sourcePath, sourceMtime := writeFreshCoworkProviderSource( + t, root, database, "force-session", + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCowork, + DisplayName: "Claude Cowork", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath, + MTimeNS: sourceMtime, + }, + outcome: parser.ParseOutcome{ + ResultSetComplete: true, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCowork, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCowork: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCowork: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + ForceParse: true, + }) + + require.NoError(t, result.err) + assert.False(t, result.skip) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) + assert.True(t, provider.parseRequest.ForceParse) +} + func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") @@ -1312,3 +1214,46 @@ func (f shadowCallerFactory) Capabilities() parser.Capabilities { func (f shadowCallerFactory) NewProvider(parser.ProviderConfig) parser.Provider { return f.provider } + +func writeFreshCoworkProviderSource( + t *testing.T, + root string, + database *db.DB, + rawSessionID string, +) (string, int64) { + t.Helper() + + sessionDir := filepath.Join(root, "org", "workspace", "local_fresh") + projectDir := filepath.Join(sessionDir, ".claude", "projects", "-demo") + require.NoError(t, os.MkdirAll(projectDir, 0o755)) + metaPath := sessionDir + ".json" + sourcePath := filepath.Join(projectDir, rawSessionID+".jsonl") + require.NoError(t, os.WriteFile(metaPath, []byte(`{"title":"Fresh"}`), 0o644)) + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + + transcriptTime := time.Unix(1_781_475_210, 0) + metaTime := transcriptTime.Add(time.Second) + require.NoError(t, os.Chtimes(sourcePath, transcriptTime, transcriptTime)) + require.NoError(t, os.Chtimes(metaPath, metaTime, metaTime)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + sourceSize := info.Size() + sourceMtime := parser.CoworkSessionMtime(sourcePath, info.ModTime().UnixNano()) + require.Equal(t, metaTime.UnixNano(), sourceMtime) + + fullSessionID := "cowork:" + rawSessionID + require.NoError(t, database.UpsertSession(db.Session{ + ID: fullSessionID, + Project: "cowork-project", + Machine: "devbox", + Agent: string(parser.AgentCowork), + FilePath: &sourcePath, + FileSize: &sourceSize, + FileMtime: &sourceMtime, + })) + require.NoError(t, database.SetSessionDataVersion( + fullSessionID, db.CurrentDataVersion(), + )) + + return sourcePath, sourceMtime +} From 31a476bc4ab2821752c3af33417b02fbebf4c50a Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 21:04:08 -0400 Subject: [PATCH 07/11] fix(sync,parser): reclassify parse-diff raced sources and clear shim-scan list The Claude provider migration routes parse-diff through the provider path, which regressed live-write skew detection: a concurrently rewritten source was classified as Changed instead of Raced, tripping --fail-on-change on a daemon write. Gate raced-source reliability on parseDiffAgentDiscoverable so provider-folded agents keep the raced reclassification. Also clear pendingShimProviderFiles: every provider in this stack is folded on the branch that introduces it, so no provider file is a standing shim and the exempt list must be empty. --- internal/sync/parsediff.go | 15 ++++++++------- internal/sync/parsediff_compare_test.go | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/internal/sync/parsediff.go b/internal/sync/parsediff.go index 767898ada..39577484c 100644 --- a/internal/sync/parsediff.go +++ b/internal/sync/parsediff.go @@ -501,7 +501,7 @@ func stripVirtualSourceSuffix(path string) string { // Detecting either makes the source unreliable, so the caller skips the raced // guard entirely. This never masks genuine drift for those agents, while plain // file-based agents reading a literal file still get the real race protection. -func parseDiffSourceReliableForRaced( +func (e *Engine) parseDiffSourceReliableForRaced( agent parser.AgentType, sourcePath string, ) bool { // A virtual path carries a recognized "#..." suffix; stripping changes @@ -510,15 +510,16 @@ func parseDiffSourceReliableForRaced( if stripVirtualSourceSuffix(sourcePath) != sourcePath { return false } - // Only plain file-based agents (FileBased with a DiscoverFunc, the same - // on-disk-source condition resolveParseDiffAgents uses) read a literal - // file whose mtime populated file_mtime. An unknown or DB-backed agent has - // no such basis. + // Only agents with a literal on-disk source -- the same discoverability + // condition resolveParseDiffAgents uses -- read a file whose mtime + // populated file_mtime. parseDiffAgentDiscoverable gates out DB-backed + // (FileBased == false, e.g. Forge) and non-authoritative agents, so an + // unknown or DB-backed agent has no such basis. def, ok := parser.AgentByType(agent) if !ok { return false } - return def.FileBased && def.DiscoverFunc != nil + return e.parseDiffAgentDiscoverable(def) } // parseDiffLiveMtime resolves a session's live source mtime for the raced @@ -725,7 +726,7 @@ func (e *Engine) parseDiffCollectFile( raced := false if realDiffs > 0 && compare && sourceSessionCount[pw.sess.File.Path] == 1 && - parseDiffSourceReliableForRaced(pw.sess.Agent, pw.sess.File.Path) { + e.parseDiffSourceReliableForRaced(pw.sess.Agent, pw.sess.File.Path) { var storedMtime *int64 if stored != nil { storedMtime = stored.FileMtime diff --git a/internal/sync/parsediff_compare_test.go b/internal/sync/parsediff_compare_test.go index 6250f7ec8..42b380abb 100644 --- a/internal/sync/parsediff_compare_test.go +++ b/internal/sync/parsediff_compare_test.go @@ -1747,9 +1747,10 @@ func TestParseDiffSourceReliableForRaced(t *testing.T) { want: false, }, } + engine := NewDiffEngine(dbtest.OpenTestDB(t), EngineConfig{}) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := parseDiffSourceReliableForRaced(tt.agent, tt.path) + got := engine.parseDiffSourceReliableForRaced(tt.agent, tt.path) assert.Equal(t, tt.want, got) }) } From 5d7d42a6204f44956761345231d3ba808975a6b9 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Fri, 26 Jun 2026 16:40:59 -0400 Subject: [PATCH 08/11] test(parser): close hermes state.db setup handle before deletion createHermesStateDB held the SQLite setup handle open until test cleanup. TestHermesProviderStateDBSourceMethods removes state.db mid-run to exercise deletion handling, and Windows refuses to delete a file still held open by this process, so the test failed there. Close the handle when the helper returns; it is never used by callers after setup and the data is already persisted. No behavior change on Unix. --- internal/parser/hermes_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/parser/hermes_test.go b/internal/parser/hermes_test.go index c1bd94ea4..8c523c55a 100644 --- a/internal/parser/hermes_test.go +++ b/internal/parser/hermes_test.go @@ -98,7 +98,10 @@ func createHermesStateDB(t *testing.T, root string) { t.Helper() db, err := sql.Open("sqlite3", filepath.Join(root, "state.db")) require.NoError(t, err) - t.Cleanup(func() { _ = db.Close() }) + // Close the setup handle when this helper returns rather than at test + // cleanup. Tests delete state.db mid-run to exercise deletion handling, and + // Windows refuses to remove a file still held open by this process. + defer func() { _ = db.Close() }() _, err = db.Exec(` CREATE TABLE sessions ( id TEXT PRIMARY KEY, From 7aeae4b80f9caf948709a2123db46fa4e89a11dc Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 00:26:51 -0400 Subject: [PATCH 09/11] feat(parser,sync): discover Claude S3 sessions through the provider facade This branch migrates Claude to a provider-authoritative source set, so discoverProviderSources (which only calls provider.Discover) becomes the sole on-disk discovery path for it. claudeSourceSet.Discover resolved every enumerated file through sourceRef, whose local IsRegularFile gate rejects an s3:// object, so a migrated Claude that pointed at an s3:// projects root would silently stop discovering remote sessions -- a regression against the pre-migration DiscoverFunc, which handled both layouts. Route enumerated files through discoveredSourceRef: s3:// objects (which ClaudeProjectSessionFiles already surfaces via discoverClaudeS3) build an S3 SourceRef carrying the durable object metadata in its Opaque payload, while local files keep the regular file-backed ref. The engine threads that metadata back into the DiscoveredFile and the s3:// guard keeps processing on the dedicated S3 sync path, so freshness, dedup, mtime cutoff, and machine-ID namespacing behave exactly as they did before the migration. The sync test asserts an s3:// provider-discovered Claude source still routes to the S3 path and writes a machine-namespaced session. --- internal/parser/claude_provider.go | 17 ++- internal/parser/s3_discovery_test.go | 109 ++++++++++++++++++++ internal/sync/s3_provider_discovery_test.go | 87 ++++++++++++++++ 3 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 internal/parser/s3_discovery_test.go create mode 100644 internal/sync/s3_provider_discovery_test.go diff --git a/internal/parser/claude_provider.go b/internal/parser/claude_provider.go index 915020989..f3f7afcad 100644 --- a/internal/parser/claude_provider.go +++ b/internal/parser/claude_provider.go @@ -215,7 +215,7 @@ func (s claudeSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { return nil, err } for _, file := range ClaudeProjectSessionFiles(root) { - source, ok := s.sourceRef(root, file.Path) + source, ok := s.discoveredSourceRef(root, file) if !ok { continue } @@ -226,6 +226,21 @@ func (s claudeSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { return sources, nil } +// discoveredSourceRef builds the SourceRef for one enumerated Claude session +// file. Local files resolve through the regular file-backed source ref; s3:// +// objects (which ClaudeProjectSessionFiles enumerates via discoverClaudeS3) +// carry their durable object metadata in the Opaque payload, because the +// IsRegularFile gate that sourceRef applies to a local path would otherwise drop +// every remote object. +func (s claudeSourceSet) discoveredSourceRef( + root string, file DiscoveredFile, +) (SourceRef, bool) { + if strings.HasPrefix(file.Path, "s3://") { + return s3SourceRefFromDiscoveredFile(file), true + } + return s.sourceRef(root, file.Path) +} + func (s claudeSourceSet) WatchPlan(context.Context) (WatchPlan, error) { roots := make([]WatchRoot, 0, len(s.roots)) for _, root := range s.roots { diff --git a/internal/parser/s3_discovery_test.go b/internal/parser/s3_discovery_test.go new file mode 100644 index 000000000..f1f5322f3 --- /dev/null +++ b/internal/parser/s3_discovery_test.go @@ -0,0 +1,109 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestClaudeSourceSetDiscoversS3Sessions verifies the Claude source set +// enumerates s3:// roots through its provider Discover path and carries the +// durable object metadata (including folded tool-result sidecar size/mtime/ +// fingerprint) in the S3DiscoveredSource opaque, rather than dropping the remote +// object at the local IsRegularFile gate. +func TestClaudeSourceSetDiscoversS3Sessions(t *testing.T) { + oldList := listS3Objects + t.Cleanup(func() { listS3Objects = oldList }) + + root := "s3://bucket/laptop/raw/claude" + sessionURI := root + "/proj/session.jsonl" + sessionMtime := time.Unix(100, 0) + sidecarMtime := time.Unix(200, 0) + listS3Objects = func(got string) ([]S3Object, error) { + require.Equal(t, root, got) + return []S3Object{ + { + URI: sessionURI, + Size: 11, + LastModified: sessionMtime, + Fingerprint: "s3-meta:session", + }, + { + URI: root + "/proj/session/tool-results/out.txt", + Size: 22, + LastModified: sidecarMtime, + Fingerprint: "s3-meta:sidecar", + }, + }, nil + } + + sources, err := newClaudeSourceSet([]string{root}).Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + src := sources[0] + assert.Equal(t, AgentClaude, src.Provider) + assert.Equal(t, sessionURI, src.DisplayPath) + assert.Equal(t, sessionURI, src.FingerprintKey) + assert.Equal(t, "proj", src.ProjectHint) + + s3, ok := src.Opaque.(S3DiscoveredSource) + require.True(t, ok, "s3 source carries S3DiscoveredSource opaque") + assert.Equal(t, sessionURI, s3.URI) + assert.Equal(t, "laptop", s3.Machine) + assert.Equal(t, "proj", s3.Project) + // Session plus its tool-result sidecar fold into one freshness identity. + assert.Equal(t, int64(33), s3.Size) + assert.Equal(t, sidecarMtime.UnixNano(), s3.MtimeNS) + assert.Contains(t, s3.Fingerprint, "session") + assert.Contains(t, s3.Fingerprint, "sidecar") +} + +// TestClaudeSourceSetMixedLocalAndS3Roots verifies a config that mixes a local +// projects root and an s3:// root discovers sources from both, with only the +// remote object carrying the S3DiscoveredSource opaque. +func TestClaudeSourceSetMixedLocalAndS3Roots(t *testing.T) { + oldList := listS3Objects + t.Cleanup(func() { listS3Objects = oldList }) + + localRoot := t.TempDir() + localProj := filepath.Join(localRoot, "localproj") + require.NoError(t, os.MkdirAll(localProj, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(localProj, "11111111-1111-4111-8111-111111111111.jsonl"), + []byte("{}\n"), 0o644, + )) + + s3Root := "s3://bucket/laptop/raw/claude" + s3URI := s3Root + "/remoteproj/22222222-2222-4222-8222-222222222222.jsonl" + listS3Objects = func(string) ([]S3Object, error) { + return []S3Object{{ + URI: s3URI, + Size: 11, + LastModified: time.Unix(100, 0), + Fingerprint: "s3-meta:remote", + }}, nil + } + + sources, err := newClaudeSourceSet([]string{localRoot, s3Root}). + Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 2) + + var s3Count, localCount int + for _, src := range sources { + if _, ok := src.Opaque.(S3DiscoveredSource); ok { + s3Count++ + assert.Equal(t, s3URI, src.DisplayPath) + } else { + localCount++ + } + } + assert.Equal(t, 1, s3Count, "exactly one remote source") + assert.Equal(t, 1, localCount, "exactly one local source") +} diff --git a/internal/sync/s3_provider_discovery_test.go b/internal/sync/s3_provider_discovery_test.go new file mode 100644 index 000000000..cf786cec9 --- /dev/null +++ b/internal/sync/s3_provider_discovery_test.go @@ -0,0 +1,87 @@ +package sync + +import ( + "context" + "io" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/testjsonl" +) + +// TestProcessFileS3ProviderDiscoveredRoutesToS3Path verifies that an s3:// +// DiscoveredFile shaped exactly as discoverProviderSources now emits it -- a +// provider-authoritative agent, ProviderProcess set, and a ProviderSource +// carrying the S3DiscoveredSource opaque -- still routes through the S3 sync +// path. processProviderFile must let the s3:// guard win over the provider +// parse path (providers read local files), and the threaded Machine/size/mtime +// must drive the same namespaced result as direct S3 discovery. +func TestProcessFileS3ProviderDiscoveredRoutesToS3Path(t *testing.T) { + database := openTestDB(t) + path := "s3://bucket/laptop/raw/claude/test-proj/shared-id.jsonl" + content := testjsonl.NewSessionBuilder(). + AddClaudeUser("2024-01-01T00:00:00Z", "Hello"). + AddClaudeAssistant("2024-01-01T00:00:05Z", "Hi."). + String() + + oldFetch := fetchS3Object + t.Cleanup(func() { fetchS3Object = oldFetch }) + fetchS3Object = func(got string) (io.ReadCloser, error) { + if got != path { + return nil, missingS3ObjectError() + } + return io.NopCloser(strings.NewReader(content)), nil + } + + mtime := time.Date(2026, 6, 24, 12, 0, 0, 0, time.UTC).UnixNano() + e := &Engine{ + db: database, + machine: "central", + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + } + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: "test-proj", + Opaque: parser.S3DiscoveredSource{ + URI: path, + Project: "test-proj", + Machine: "laptop", + Size: int64(len(content)), + MtimeNS: mtime, + }, + } + res := e.processFile(context.Background(), parser.DiscoveredFile{ + Agent: parser.AgentClaude, + Path: path, + Project: "test-proj", + Machine: "laptop", + SourceSize: int64(len(content)), + SourceMtime: mtime, + ProviderSource: &source, + ProviderProcess: true, + }) + require.NoError(t, res.err) + require.Len(t, res.results, 1) + + written, _, failed := e.writeBatch([]pendingWrite{{ + sess: res.results[0].Session, + msgs: res.results[0].Messages, + }}, syncWriteDefault, false) + require.Equal(t, 1, written) + require.Equal(t, 0, failed) + + sess, err := database.GetSessionFull(context.Background(), "laptop~shared-id") + require.NoError(t, err) + require.NotNil(t, sess) + assert.Equal(t, "laptop", sess.Machine) + assert.Equal(t, path, derefString(sess.FilePath)) +} From 4d26cf8bb340866d1c0446b2ef65de1a6408b6bd Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 10:56:06 -0400 Subject: [PATCH 10/11] fix(sync): skip unchanged provider-authoritative sources before reparse processProviderFile only had pre-parse DB-freshness skips for Claude (gated to IncrementalAppend) and Cowork. Every other agent this branch flips to provider-authoritative -- OpenHands, Cursor, Hermes, Vibe -- fell through to provider.Parse and writeBatch even when the stored file_size, file_mtime, and data_version already matched, so a full or periodic sync reparsed and rewrote untouched sessions every time, regressing the per-agent shouldSkipByPath skip those agents had before migration. Add a generic providerSourceUnchangedInDB check after fingerprinting: when the discovered path's stored size+mtime match the fingerprint and data_version is current (and the stored project does not need reparsing), skip without parsing. It only skips on an exact size+mtime match, so any real change still reparses, and it is backend-agnostic (GetFileInfoByPath/GetDataVersionByPath/ GetProjectByPath behave identically on SQLite and PostgreSQL). --- internal/sync/engine.go | 59 +++++++++++++++++++ .../provider_freshness_integration_test.go | 53 +++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 internal/sync/provider_freshness_integration_test.go diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 041e085d3..e39a3b86d 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -4098,6 +4098,26 @@ func (e *Engine) processProviderFile( } incForceReplace := incRes.forceReplace + // DB-stored-file-info skip: a session whose persisted file_size/file_mtime + // already match the source fingerprint (and whose data_version is current) + // is unchanged and need not be reparsed. This reproduces the legacy + // shouldSkipByPath behavior the per-agent process methods provided before the + // migration, so a repeat full/periodic sync of an untouched + // provider-authoritative session (OpenHands, Cursor, Hermes, Vibe, ...) + // skips instead of rewriting. It only skips on an exact size+mtime match, so + // a provider whose fingerprint mtime differs from the stored value simply + // reparses, matching the prior behavior. Claude and Cowork have their own + // earlier freshness checks; this is the generic fallback for the rest. + if !e.forceParse && !file.ForceParse && + e.providerSourceUnchangedInDB(source, fingerprint) { + return processResult{ + skip: true, + mtime: fingerprint.MTimeNS, + cacheSkip: cacheSkip, + cacheKey: cacheKey, + }, true + } + outcome, err := provider.Parse(ctx, parser.ParseRequest{ Source: source, Fingerprint: fingerprint, @@ -4834,6 +4854,45 @@ func (e *Engine) providerCoworkSourceFresh( return mtime, true } +// providerSourceUnchangedInDB reports whether a provider source's persisted +// file metadata already matches its current fingerprint, so a reparse would be +// redundant. It compares the stored file_size/file_mtime for the discovered +// path against the fingerprint and requires a current data_version, mirroring +// the legacy shouldSkipByPath gate. It returns false on a missing stored row, an +// empty key, or a non-fingerprint identity (no size and no mtime, e.g. a +// container source), so those callers fall through to a full parse. +func (e *Engine) providerSourceUnchangedInDB( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, +) bool { + if fingerprint.MTimeNS == 0 && fingerprint.Size == 0 { + return false + } + lookupPath := providerDiscoveredPath(source) + if lookupPath == "" { + return false + } + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(lookupPath) + } + storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) + if !ok { + return false + } + if storedSize != fingerprint.Size || storedMtime != fingerprint.MTimeNS { + return false + } + // A stale stored project (e.g. a generated roborev CI worktree name) + // must defeat the unchanged-source skip so the corrected project is + // reparsed, mirroring shouldSkipCodexFingerprint and the in-memory + // skip-cache bypass in processProviderFile. + if project, ok := e.db.GetProjectByPath(lookupPath); ok && + parser.NeedsProjectReparse(project) { + return false + } + return e.db.GetDataVersionByPath(lookupPath) >= db.CurrentDataVersion() +} + // stampProviderFileIdentity copies the source file's inode and device onto // every parsed result for an incremental-append provider (Claude). The // legacy process arm stamped this identity from the source stat so the diff --git a/internal/sync/provider_freshness_integration_test.go b/internal/sync/provider_freshness_integration_test.go new file mode 100644 index 000000000..6a093df33 --- /dev/null +++ b/internal/sync/provider_freshness_integration_test.go @@ -0,0 +1,53 @@ +package sync_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/sync" +) + +// TestProviderAuthoritativeUnchangedSessionSkipsOnResync verifies that a +// provider-authoritative agent whose source file is unchanged is skipped on a +// second full sync rather than reparsed and rewritten. Before the generic +// providerSourceUnchangedInDB freshness check, only Claude and Cowork had a +// pre-parse DB skip in processProviderFile, so the other migrated agents +// (OpenHands, Cursor, Hermes, Vibe) fell through to provider.Parse + writeBatch +// and rewrote unchanged sessions on every full/periodic sync. Vibe is used as a +// representative of that group. +func TestProviderAuthoritativeUnchangedSessionSkipsOnResync(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + vibeDir := t.TempDir() + testDB := dbtest.OpenTestDB(t) + engine := sync.NewEngine(testDB, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentVibe: {vibeDir}, + }, + Machine: "local", + }) + + sessionID := "abc123def-0000-0000-0000-000000000000" + writeVibeSyncFixture( + t, vibeDir, "session_20260616_083518_abc123", sessionID, "Title", + ) + + ctx := context.Background() + first := engine.SyncAll(ctx, nil) + require.Equal(t, 1, first.Synced, "first sync parses and stores the session") + + // Source files are untouched, so the second full sync must skip the session + // at the DB-freshness check instead of reparsing and rewriting it. + second := engine.SyncAll(ctx, nil) + assert.Equal(t, 0, second.Synced, + "an unchanged provider-authoritative session must not be re-synced") + assert.GreaterOrEqual(t, second.Skipped, 1, + "the unchanged session must be counted as skipped") +} From dac19eb4500e0cb7046c3302ae7a48bd03919eab Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 11:21:30 -0400 Subject: [PATCH 11/11] fix(parser): fall back to hermes transcripts when state.db lookup fails Hermes FindSource aborted the whole lookup when a state.db query errored, even though parseArchive deliberately falls back to the transcript parser when state.db is unreadable or schema-incompatible. A valid transcript session sitting next to a corrupt or legacy state.db could no longer be located for resync. Treat a state.db lookup error as non-claiming for that root and continue to the transcript lookup, matching the parser's documented fallback. This lands at the branch that makes Hermes provider-authoritative so the regression and its fix stay in the same PR. --- internal/parser/hermes_provider.go | 23 +++++++++++++------ internal/parser/hermes_provider_test.go | 30 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/internal/parser/hermes_provider.go b/internal/parser/hermes_provider.go index 5c3f7e45a..e486193d4 100644 --- a/internal/parser/hermes_provider.go +++ b/internal/parser/hermes_provider.go @@ -7,6 +7,7 @@ import ( "fmt" "hash" "io" + "log" "os" "path/filepath" "sort" @@ -236,14 +237,22 @@ func (s hermesSourceSet) FindSource( if stateDB, _, ok := hermesStatePaths(root); ok && IsValidSessionID(req.RawSessionID) { found, err := hermesStateDBHasSession(stateDB, req.RawSessionID) - if err != nil { - return SourceRef{}, false, err - } - if !found { + switch { + case err != nil: + // Mirror parseArchive: an unreadable or schema-incompatible + // state.db falls back to transcripts rather than aborting the + // lookup, so a valid transcript session next to a bad state.db + // stays resolvable for resync. + log.Printf( + "hermes: state db lookup failed for %s: %v; "+ + "falling back to transcripts", stateDB, err, + ) + case !found: continue - } - if source, ok := s.sourceRef(root, stateDB); ok { - return source, true, nil + default: + if source, ok := s.sourceRef(root, stateDB); ok { + return source, true, nil + } } } transcriptRoot := hermesTranscriptRoot(root) diff --git a/internal/parser/hermes_provider_test.go b/internal/parser/hermes_provider_test.go index 1beec4f7d..ba9d2a160 100644 --- a/internal/parser/hermes_provider_test.go +++ b/internal/parser/hermes_provider_test.go @@ -453,6 +453,36 @@ func TestHermesProviderFindSourceDoesNotReturnStateDBForMissingRawID(t *testing. assert.Empty(t, source) } +func TestHermesProviderFindSourceFallsBackToTranscriptWhenStateDBUnreadable(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + + // A present-but-unreadable state.db: hermesStateDBHasSession opens it + // lazily, then errors on the first query because the bytes are not a + // SQLite database. parseArchive logs and falls back to transcripts in this + // case, so FindSource must do the same rather than aborting the lookup. + stateDB := filepath.Join(root, "state.db") + writeSourceFile(t, stateDB, "not a sqlite database") + + transcriptPath := filepath.Join(sessionsDir, "freshchild.jsonl") + writeSourceFile(t, transcriptPath, hermesProviderJSONLFixture("transcript question")) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "freshchild", + }) + + require.NoError(t, err, "unreadable state.db must not abort transcript lookup") + require.True(t, ok, "valid transcript next to a bad state.db must be found") + assert.Equal(t, transcriptPath, source.DisplayPath) +} + func hermesProviderJSONLFixture(firstMessage string) string { return `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}` + "\n" + `{"role":"user","content":"` + firstMessage + `","timestamp":"2026-05-14T10:01:00.000000"}` + "\n" +