diff --git a/cmd/agentsview/main_test.go b/cmd/agentsview/main_test.go index b55268b94..fac5f2f69 100644 --- a/cmd/agentsview/main_test.go +++ b/cmd/agentsview/main_test.go @@ -570,6 +570,29 @@ func TestStartRemoteHostSync_NilEmitterSafe(t *testing.T) { <-exited } +func TestCollectWatchRootsHermesSessionsWatchesStateDBParent(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.Mkdir(sessionsDir, 0o755), "mkdir sessions") + + cfg := config.Config{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {sessionsDir}, + }, + } + + roots, unwatchedDirs := collectWatchRoots(cfg) + + require.Empty(t, unwatchedDirs, "unwatched dirs before watcher setup") + require.Len(t, roots, 2) + assert.Equal(t, root, roots[0].root) + assert.True(t, roots[0].shallow) + assert.Equal(t, []string{sessionsDir}, roots[0].dirs) + assert.Equal(t, sessionsDir, roots[1].root) + assert.False(t, roots[1].shallow) + assert.Equal(t, []string{sessionsDir}, roots[1].dirs) +} + func TestResyncCoversSignals(t *testing.T) { tests := []struct { name string diff --git a/internal/parser/claude.go b/internal/parser/claude.go index 0cfe9821c..c5f134365 100644 --- a/internal/parser/claude.go +++ b/internal/parser/claude.go @@ -56,24 +56,17 @@ type claudeQueuedCommand struct { timestamp time.Time } -// ParseClaudeSession parses a Claude Code JSONL session file. -// Returns one or more ParseResult structs (multiple when forks -// are detected in the uuid/parentUuid DAG). -func ParseClaudeSession( - path, project, machine string, -) ([]ParseResult, error) { - results, _, err := ParseClaudeSessionWithExclusions( - path, project, machine, - ) - return results, err -} - -// ParseClaudeSessionWithExclusions parses a Claude Code JSONL -// session file and also returns session IDs intentionally excluded -// from the archive, such as content-free /usage probes. Sync uses -// those IDs during full resync so orphan preservation does not -// restore rows the current parser deliberately dropped. -func ParseClaudeSessionWithExclusions( +// claudeParseWithExclusions parses a Claude Code JSONL session file +// and also returns session IDs intentionally excluded from the +// archive, such as content-free /usage probes. Sync uses those IDs +// during full resync so orphan preservation does not restore rows the +// current parser deliberately dropped. This is the provider-owned +// parse body shared by the Claude provider (both its discovered-session +// Parse path and its ParseUploadedTranscript entry) and the Cowork +// parser (which reuses the Claude transcript format); it carries no +// legacy entrypoint naming so the provider can call it without shimming +// a Parse* free function. +func claudeParseWithExclusions( path, project, machine string, ) ([]ParseResult, []string, error) { info, err := os.Stat(path) @@ -366,15 +359,17 @@ func lastAssistantStopReason(messages []ParsedMessage) string { return "" } -// ParseClaudeSessionFrom parses only new lines from a Claude -// JSONL file starting at the given byte offset. Returns only -// the newly parsed messages (with ordinals starting at -// startOrdinal) and the latest timestamp. Fork detection is -// skipped — new entries are processed linearly. Used for -// incremental re-parsing of append-only session files. -// ErrDAGDetected is returned by ParseClaudeSessionFrom when -// appended lines contain uuid fields that require DAG-aware -// fork detection, which incremental parsing cannot handle. +// claudeParseSessionFrom parses only new lines from a Claude JSONL +// file starting at the given byte offset. Returns only the newly +// parsed messages (with ordinals starting at startOrdinal) and the +// latest timestamp. Fork detection is skipped — new entries are +// processed linearly. Used by the Claude provider for incremental +// re-parsing of append-only session files. ErrDAGDetected is returned +// when appended lines contain uuid fields that require DAG-aware fork +// detection, which incremental parsing cannot handle. This is the +// provider-owned incremental body; it carries no legacy entrypoint +// naming so the provider can call it without shimming a Parse* free +// function. var ErrDAGDetected = fmt.Errorf( "incremental parse: DAG uuid detected", ) @@ -387,11 +382,33 @@ var ErrClaudeIncrementalNeedsFullParse = fmt.Errorf( "incremental parse: appended Claude lines require full parse", ) +// ParseClaudeSessionWithExclusions and ParseClaudeSessionFrom are the exported +// seam used by the S3 sync path (internal/sync), which buffers an s3:// object +// to a temp file and parses it through the legacy per-agent processor. The +// Claude provider calls the unexported claudeParse* bodies directly; these thin +// wrappers exist only so the cross-package S3 consumer can reach the same logic +// without a provider file shimming a Parse* free function. They are removed once +// S3 support folds into the JSONL source sets. +func ParseClaudeSessionWithExclusions( + path, project, machine string, +) ([]ParseResult, []string, error) { + return claudeParseWithExclusions(path, project, machine) +} + func ParseClaudeSessionFrom( path string, offset int64, startOrdinal int, lastEntryUUID string, +) ([]ParsedMessage, time.Time, int64, error) { + return claudeParseSessionFrom(path, offset, startOrdinal, lastEntryUUID) +} + +func claudeParseSessionFrom( + path string, + offset int64, + startOrdinal int, + lastEntryUUID string, ) ([]ParsedMessage, time.Time, int64, error) { var ( entries []dagEntry @@ -726,7 +743,7 @@ func extractMessagesFrom( } if e.entryType == "user" { - if subtype := ClassifyClaudeSystemMessage(text); subtype != "" { + if subtype := classifyClaudeSystemMessage(text); subtype != "" { // Preserve Role=user so analytics that compute // turn-cycle/throughput on role alone (see // internal/db/analytics.go) don't count these as @@ -1666,7 +1683,7 @@ func extractMessages(entries []dagEntry) ( // stays "user" so role-keyed analytics continue to treat // these as inputs, not assistant replies. if e.entryType == "user" { - if subtype := ClassifyClaudeSystemMessage(text); subtype != "" { + if subtype := classifyClaudeSystemMessage(text); subtype != "" { messages = append(messages, ParsedMessage{ Ordinal: ordinal, Role: RoleUser, @@ -2079,14 +2096,14 @@ func extractCompactSummary(line string) string { return content.Str } -// ClassifyClaudeSystemMessage inspects a user-entry content string and +// classifyClaudeSystemMessage inspects a user-entry content string and // returns the matched system subtype (e.g. "continuation", "resume"), // or "" if the content is an ordinary user message. // // Non-caveat envelopes (stdout/stderr surrounds for // local command output) are treated as regular noise and return ""; // only the caveat variant is a semantic "resume" marker. -func ClassifyClaudeSystemMessage(content string) string { +func classifyClaudeSystemMessage(content string) string { trimmed := strings.TrimLeftFunc(content, func(r rune) bool { return r == '\uFEFF' || unicode.IsSpace(r) }) diff --git a/internal/parser/claude_parser_test.go b/internal/parser/claude_parser_test.go index 133ee017e..340dd3b32 100644 --- a/internal/parser/claude_parser_test.go +++ b/internal/parser/claude_parser_test.go @@ -22,7 +22,7 @@ func runClaudeParserTest(t *testing.T, fileName, content string) (ParsedSession, fileName = "test.jsonl" } path := createTestFile(t, fileName, content) - results, err := ParseClaudeSession(path, "my_app", "local") + results, err := parseClaudeSession(path, "my_app", "local") require.NoError(t, err) require.NotEmpty(t, results) return results[0].Session, results[0].Messages @@ -31,7 +31,7 @@ func runClaudeParserTest(t *testing.T, fileName, content string) (ParsedSession, func callParseClaudeSessionFrom( path string, offset int64, startOrdinal int, lastEntryUUID string, ) ([]ParsedMessage, time.Time, int64, error) { - fn := reflect.ValueOf(ParseClaudeSessionFrom) + fn := reflect.ValueOf(claudeParseSessionFrom) args := []reflect.Value{ reflect.ValueOf(path), reflect.ValueOf(offset), @@ -68,7 +68,7 @@ func TestParseClaudeSession_UsageProbe(t *testing.T) { parse := func(t *testing.T, content string) []ParseResult { t.Helper() path := createTestFile(t, "probe.jsonl", content) - results, err := ParseClaudeSession(path, "ClaudeProbe", "local") + results, err := parseClaudeSession(path, "ClaudeProbe", "local") require.NoError(t, err) return results } @@ -516,7 +516,7 @@ func TestParseClaudeSessionFrom_Incremental(t *testing.T) { path := createTestFile(t, "inc-claude.jsonl", initial) // Full parse to get baseline. - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err) require.NotEmpty(t, results) assert.Equal(t, 2, len(results[0].Messages)) @@ -978,7 +978,7 @@ func TestParseClaudeSession_ResolvesPersistedToolResultOutput( sessionPath := filepath.Join(dir, "project", "parent-session.jsonl") require.NoError(t, os.WriteFile(sessionPath, []byte(content), 0o644)) - results, err := ParseClaudeSession(sessionPath, "project", "local") + results, err := parseClaudeSession(sessionPath, "project", "local") require.NoError(t, err) require.Len(t, results, 1) require.Len(t, results[0].Messages, 3) @@ -1016,7 +1016,7 @@ func TestParseClaudeSession_PersistedToolResultDoesNotOverwriteSiblings( sessionPath := filepath.Join(dir, "project", "parent-session.jsonl") require.NoError(t, os.WriteFile(sessionPath, []byte(content), 0o644)) - results, err := ParseClaudeSession(sessionPath, "project", "local") + results, err := parseClaudeSession(sessionPath, "project", "local") require.NoError(t, err) require.Len(t, results, 1) require.Len(t, results[0].Messages, 3) @@ -1406,7 +1406,7 @@ func TestParseClaudeSession_ExtractsMessageIDAndRequestID(t *testing.T) { t.Fatalf("write fixture: %v", err) } - results, err := ParseClaudeSession(path, "proj", "m") + results, err := parseClaudeSession(path, "proj", "m") if err != nil { t.Fatalf("parse: %v", err) } @@ -1779,7 +1779,7 @@ func TestClassifyClaudeSystemMessage(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - got := ClassifyClaudeSystemMessage(c.content) + got := classifyClaudeSystemMessage(c.content) assert.Equal(t, c.expected, got) }) } diff --git a/internal/parser/claude_provider.go b/internal/parser/claude_provider.go new file mode 100644 index 000000000..f3f7afcad --- /dev/null +++ b/internal/parser/claude_provider.go @@ -0,0 +1,557 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*claudeProvider)(nil) + +type claudeProviderFactory struct { + def AgentDef +} + +func newClaudeProviderFactory(def AgentDef) ProviderFactory { + return claudeProviderFactory{def: cloneAgentDef(def)} +} + +func (f claudeProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f claudeProviderFactory) Capabilities() Capabilities { + return claudeProviderCapabilities() +} + +func (f claudeProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &claudeProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: claudeProviderCapabilities(), + Config: cfg, + }, + sources: newClaudeSourceSet(cfg.Roots), + } +} + +type claudeProvider struct { + ProviderBase + sources claudeSourceSet +} + +func (p *claudeProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *claudeProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *claudeProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *claudeProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *claudeProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *claudeProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("claude source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + project := claudeProviderProject(ctx, req.Source.ProjectHint, path) + results, excludedIDs, err := claudeParseWithExclusions(path, project, machine) + if err != nil { + return ParseOutcome{}, err + } + if req.Fingerprint.Hash != "" { + for i := range results { + results[i].Session.File.Hash = req.Fingerprint.Hash + } + } + InferRelationshipTypes(results) + out := make([]ParseResultOutcome, 0, len(results)) + for _, result := range results { + out = append(out, ParseResultOutcome{ + Result: result, + DataVersion: DataVersionCurrent, + }) + } + return ParseOutcome{ + Results: out, + ExcludedSessionIDs: excludedIDs, + ResultSetComplete: true, + }, nil +} + +// ClaudeUploadParser is implemented by the Claude provider to parse a +// standalone, out-of-root Claude transcript file (such as an HTTP upload) +// under a caller-supplied project name. Uploads do not live under a +// configured root, so the normal discovery/source-resolution path does not +// apply; callers obtain this via NewProvider(AgentClaude, ...) and a type +// assertion. +type ClaudeUploadParser interface { + // ParseUploadedTranscript parses the transcript at path and files the + // resulting sessions under project. The project is authoritative: unlike + // the discovered-session Parse path, it is not overridden by any cwd + // recorded in the transcript, because an upload is filed under a + // user-chosen project rather than a workspace path on this machine. + ParseUploadedTranscript(path, project, machine string) ([]ParseResult, error) +} + +func (p *claudeProvider) ParseUploadedTranscript( + path, project, machine string, +) ([]ParseResult, error) { + machine = firstNonEmptyJSONLString(machine, p.Config.Machine) + results, _, err := claudeParseWithExclusions(path, project, machine) + if err != nil { + return nil, err + } + InferRelationshipTypes(results) + return results, nil +} + +func (p *claudeProvider) ParseIncremental( + ctx context.Context, + req IncrementalRequest, +) (IncrementalOutcome, IncrementalStatus, error) { + if err := ctx.Err(); err != nil { + return IncrementalOutcome{}, IncrementalUnsupported, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return IncrementalOutcome{}, IncrementalUnsupported, + fmt.Errorf("claude source path unavailable") + } + if req.Offset > 0 && req.Fingerprint.Size < req.Offset { + return IncrementalOutcome{ForceReplace: true}, + IncrementalNeedsFullParse, nil + } + if req.Fingerprint.Size == req.Offset { + return IncrementalOutcome{}, IncrementalNoNewData, nil + } + newMsgs, endedAt, consumed, err := claudeParseSessionFrom( + path, + req.Offset, + req.StartOrdinal, + req.LastEntryUUID, + ) + if err != nil { + if IsIncrementalFullParseFallback(err) || errorsIsClaudeDAG(err) { + return IncrementalOutcome{ForceReplace: IsIncrementalFullParseFallback(err)}, + IncrementalNeedsFullParse, nil + } + return IncrementalOutcome{}, IncrementalNeedsFullParse, err + } + if len(newMsgs) == 0 { + if consumed > 0 { + return IncrementalOutcome{ + SessionID: req.SessionID, + EndedAt: endedAt, + ConsumedBytes: consumed, + }, IncrementalApplied, nil + } + return IncrementalOutcome{}, IncrementalNoNewData, nil + } + totalOut, peakCtx, hasTotalOut, hasPeakCtx := claudeProviderTokenTotals(newMsgs) + return IncrementalOutcome{ + SessionID: req.SessionID, + Messages: newMsgs, + EndedAt: endedAt, + ConsumedBytes: consumed, + MessageCount: len(newMsgs), + UserMessageCount: claudeProviderUserMessageCount(newMsgs), + TotalOutputTokens: totalOut, + PeakContextTokens: peakCtx, + HasTotalOutputTokens: hasTotalOut, + HasPeakContextTokens: hasPeakCtx, + }, IncrementalApplied, nil +} + +type claudeSource struct { + Root string + Path string +} + +type claudeSourceSet struct { + roots []string +} + +func newClaudeSourceSet(roots []string) claudeSourceSet { + return claudeSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s claudeSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, file := range ClaudeProjectSessionFiles(root) { + source, ok := s.discoveredSourceRef(root, file) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoveredSourceRef builds the SourceRef for one enumerated Claude session +// file. Local files resolve through the regular file-backed source ref; s3:// +// objects (which ClaudeProjectSessionFiles enumerates via discoverClaudeS3) +// carry their durable object metadata in the Opaque payload, because the +// IsRegularFile gate that sourceRef applies to a local path would otherwise drop +// every remote object. +func (s claudeSourceSet) discoveredSourceRef( + root string, file DiscoveredFile, +) (SourceRef, bool) { + if strings.HasPrefix(file.Path, "s3://") { + return s3SourceRefFromDiscoveredFile(file), true + } + return s.sourceRef(root, file.Path) +} + +func (s claudeSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"*.jsonl"}, + DebounceKey: string(AgentClaude) + ":projects:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s claudeSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + // The legacy classifier resolved Claude paths purely from their + // project/session shape and only treated a stat failure as + // "missing" when it was a definitive IsNotExist. A transient stat + // error (for example a parent directory the watcher cannot read this + // instant) must still classify so the change is not silently dropped. + // Fall back to path-shape classification whenever the path is not + // known to be absent. + allowMissing := jsonlMissingPathFallbackAllowed(req) || + claudeChangedPathPresentButUnstatable(req.Path) + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + if !s.hasRoot(root) { + return nil, nil + } + source, ok := s.sourceForChangedPath(root, req.Path, allowMissing) + if !ok { + return nil, nil + } + return []SourceRef{source}, nil + } + for _, root := range s.roots { + source, ok := s.sourceForChangedPath(root, req.Path, allowMissing) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s claudeSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceForPath(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := claudeFindSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func (s claudeSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("claude source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash, err := hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + inode, device := sourceFileIdentity(info) + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Inode: inode, + Device: device, + Hash: hash, + }, nil +} + +func (s claudeSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case claudeSource: + return src.Path, src.Path != "" + case *claudeSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + for _, root := range s.roots { + if ref, ok := s.sourceForPath(root, candidate); ok { + src := ref.Opaque.(claudeSource) + return src.Path, true + } + } + } + return "", false +} + +func (s claudeSourceSet) sourceForPath(root, path string) (SourceRef, bool) { + return s.sourceForChangedPath(root, path, false) +} + +func (s claudeSourceSet) sourceForChangedPath( + root, + path string, + allowMissing bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if allowMissing { + return s.sourceRefFromPath(root, path) + } + return s.sourceRef(root, path) +} + +func (s claudeSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !IsRegularFile(path) { + return SourceRef{}, false + } + return s.sourceRefFromPath(root, path) +} + +func (s claudeSourceSet) sourceRefFromPath(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + project, ok := claudeProjectHintFromPath(root, path) + if !ok { + return SourceRef{}, false + } + return SourceRef{ + Provider: AgentClaude, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: claudeSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s claudeSourceSet) hasRoot(root string) bool { + for _, configured := range s.roots { + if samePath(root, configured) { + return true + } + } + return false +} + +// claudeChangedPathPresentButUnstatable reports whether a changed path +// resolves to something on disk that cannot be stat'd right now for a +// reason other than not existing (for example a parent directory with no +// read/exec permission). In that case the legacy classifier still +// recognized the path by shape, so the provider must classify it too. +func claudeChangedPathPresentButUnstatable(path string) bool { + if path == "" { + return false + } + if IsRegularFile(path) { + return false + } + _, err := os.Lstat(path) + if err == nil { + // Present (lstat succeeded) but not a regular file via Stat, + // e.g. stat blocked by parent-directory permissions. + return true + } + return !os.IsNotExist(err) +} + +func claudeProjectHintFromPath(root, path string) (string, bool) { + rel, err := filepath.Rel(filepath.Clean(root), filepath.Clean(path)) + if err != nil || rel == "." || rel == "" { + return "", false + } + if strings.HasPrefix(rel, ".."+string(filepath.Separator)) || rel == ".." { + return "", false + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) == 2 && strings.HasSuffix(parts[1], ".jsonl") { + stem := strings.TrimSuffix(parts[1], ".jsonl") + if strings.HasPrefix(stem, "agent-") { + return "", false + } + return parts[0], true + } + if len(parts) >= 4 && parts[2] == "subagents" && + strings.HasSuffix(parts[len(parts)-1], ".jsonl") { + stem := strings.TrimSuffix(parts[len(parts)-1], ".jsonl") + if strings.HasPrefix(stem, "agent-") { + return parts[0], true + } + } + return "", false +} + +func claudeProviderProject(ctx context.Context, projectHint, path string) string { + project := GetProjectName(projectHint) + cwd, gitBranch := ExtractClaudeProjectHints(path) + if cwd != "" { + if p := ExtractProjectFromCwdWithBranchContext(ctx, cwd, gitBranch); p != "" { + project = p + } + } + return project +} + +func errorsIsClaudeDAG(err error) bool { + return err == ErrDAGDetected +} + +func claudeProviderUserMessageCount(msgs []ParsedMessage) int { + count := 0 + for _, msg := range msgs { + if msg.Role == RoleUser && !msg.IsSystem && len(msg.ToolResults) == 0 { + count++ + } + } + return count +} + +func claudeProviderTokenTotals( + msgs []ParsedMessage, +) (totalOut int, peakCtx int, hasTotalOut bool, hasPeakCtx bool) { + for _, msg := range msgs { + msgHasCtx, msgHasOut := msg.TokenPresence() + if msgHasOut { + totalOut += msg.OutputTokens + hasTotalOut = true + } + if msgHasCtx && (!hasPeakCtx || msg.ContextTokens > peakCtx) { + peakCtx = msg.ContextTokens + hasPeakCtx = true + } + } + return totalOut, peakCtx, hasTotalOut, hasPeakCtx +} + +func claudeProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilitySupported, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilitySupported, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Relationships: CapabilitySupported, + Subagents: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + TerminationStatus: CapabilitySupported, + MalformedLineCount: CapabilitySupported, + Model: CapabilitySupported, + StopReason: CapabilitySupported, + }, + } +} diff --git a/internal/parser/claude_provider_test.go b/internal/parser/claude_provider_test.go new file mode 100644 index 000000000..d15f97e78 --- /dev/null +++ b/internal/parser/claude_provider_test.go @@ -0,0 +1,350 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestClaudeProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentClaude) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestClaudeProviderSourceMethods(t *testing.T) { + root := t.TempDir() + projectDir := "-Users-dev-code-demo" + sessionID := "session-main" + sourcePath := filepath.Join(root, projectDir, sessionID+".jsonl") + subagentPath := filepath.Join( + root, + projectDir, + sessionID, + "subagents", + "workflows", + "wf-123", + "agent-worker.jsonl", + ) + writeSourceFile(t, sourcePath, claudeProviderFixture("main question")) + writeSourceFile(t, subagentPath, claudeProviderFixture("subagent question")) + writeSourceFile( + t, + filepath.Join(root, projectDir, sessionID, "subagents", "not-agent.jsonl"), + claudeProviderFixture("ignored"), + ) + writeSourceFile(t, filepath.Join(root, projectDir, "agent-root.jsonl"), claudeProviderFixture("ignored")) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{sourcePath, subagentPath}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + for _, source := range discovered { + assert.Equal(t, AgentClaude, source.Provider) + assert.Equal(t, projectDir, source.ProjectHint) + } + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "agent-worker", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, subagentPath, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, subagentPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: subagentPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, subagentPath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(subagentPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: subagentPath, EventKind: "rename", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, subagentPath, changed[0].DisplayPath) + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, projectDir, "agent-root.jsonl"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) +} + +func TestClaudeProviderDiscoversSymlinkedProjectDirectory(t *testing.T) { + root := t.TempDir() + targetRoot := t.TempDir() + projectDir := "-Users-dev-code-demo" + sessionID := "session-main" + targetProject := filepath.Join(targetRoot, projectDir) + sourceProject := filepath.Join(root, projectDir) + sourcePath := filepath.Join(sourceProject, sessionID+".jsonl") + subagentPath := filepath.Join( + sourceProject, + sessionID, + "subagents", + "jobs", + "job-1", + "agent-linked.jsonl", + ) + writeSourceFile( + t, + filepath.Join(targetProject, sessionID+".jsonl"), + claudeProviderFixture("from symlink"), + ) + writeSourceFile( + t, + filepath.Join(targetProject, sessionID, "subagents", "jobs", "job-1", "agent-linked.jsonl"), + claudeProviderFixture("from symlink subagent"), + ) + if err := os.Symlink(targetProject, sourceProject); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{sourcePath, subagentPath}, sourceDisplayPaths(discovered)) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "agent-linked", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, subagentPath, found.DisplayPath) +} + +func TestClaudeProviderParse(t *testing.T) { + root := t.TempDir() + projectDir := "-Users-dev-code-demo" + sessionID := "session-main" + sourcePath := filepath.Join(root, projectDir, sessionID+".jsonl") + writeSourceFile(t, sourcePath, claudeProviderFixture("parse question")) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, sessionID, result.Result.Session.ID) + assert.Equal(t, AgentClaude, result.Result.Session.Agent) + assert.Equal(t, "demo", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sourcePath, result.Result.Session.File.Path) + assert.Equal(t, "abc123", result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestClaudeProviderParseIncremental(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "inc.jsonl") + initial := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("hello world", tsEarly), + testjsonl.ClaudeAssistantJSON("hi there", tsEarlyS1), + ) + writeSourceFile(t, sourcePath, initial) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + appended := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("follow up", tsEarlyS5), + testjsonl.ClaudeAssistantJSON("got it", tsLate), + ) + f, err := os.OpenFile(sourcePath, os.O_APPEND|os.O_WRONLY, 0o644) + require.NoError(t, err) + _, err = f.WriteString(appended) + require.NoError(t, err) + require.NoError(t, f.Close()) + currentInfo, err := os.Stat(sourcePath) + require.NoError(t, err) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "inc", + }) + require.NoError(t, err) + require.True(t, ok) + + outcome, status, err := provider.ParseIncremental( + context.Background(), + IncrementalRequest{ + Source: source, + Fingerprint: SourceFingerprint{Key: sourcePath, Size: currentInfo.Size()}, + SessionID: "inc", + Offset: info.Size(), + StartOrdinal: 2, + }, + ) + require.NoError(t, err) + assert.Equal(t, IncrementalApplied, status) + assert.Equal(t, "inc", outcome.SessionID) + assert.Equal(t, int64(len(appended)), outcome.ConsumedBytes) + require.Len(t, outcome.Messages, 2) + assert.Equal(t, 2, outcome.Messages[0].Ordinal) + assert.Equal(t, RoleUser, outcome.Messages[0].Role) + assert.Contains(t, outcome.Messages[0].Content, "follow up") + assert.Equal(t, 3, outcome.Messages[1].Ordinal) + assert.Equal(t, RoleAssistant, outcome.Messages[1].Role) + assert.Contains(t, outcome.Messages[1].Content, "got it") +} + +func TestClaudeProviderParseIncrementalTruncatedNeedsFullParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "truncated.jsonl") + initial := claudeProviderFixture("hello world") + writeSourceFile(t, sourcePath, initial) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "truncated", + }) + require.NoError(t, err) + require.True(t, ok) + + outcome, status, err := provider.ParseIncremental( + context.Background(), + IncrementalRequest{ + Source: source, + Fingerprint: SourceFingerprint{Key: sourcePath, Size: int64(len(initial) / 2)}, + SessionID: "truncated", + Offset: int64(len(initial)), + }, + ) + require.NoError(t, err) + assert.Equal(t, IncrementalNeedsFullParse, status) + assert.True(t, outcome.ForceReplace) +} + +func TestClaudeProviderParseIncrementalEmptyTruncationNeedsFullParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "empty-truncated.jsonl") + initial := claudeProviderFixture("hello world") + writeSourceFile(t, sourcePath, initial) + + provider, ok := NewProvider(AgentClaude, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "empty-truncated", + }) + require.NoError(t, err) + require.True(t, ok) + + outcome, status, err := provider.ParseIncremental( + context.Background(), + IncrementalRequest{ + Source: source, + Fingerprint: SourceFingerprint{Key: sourcePath, Size: 0}, + SessionID: "empty-truncated", + Offset: int64(len(initial)), + }, + ) + require.NoError(t, err) + assert.Equal(t, IncrementalNeedsFullParse, status) + assert.True(t, outcome.ForceReplace) +} + +func claudeProviderFixture(firstMessage string) string { + return testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON(firstMessage, tsEarly), + testjsonl.ClaudeAssistantJSON("Done.", tsEarlyS1), + ) +} diff --git a/internal/parser/claude_subagent_test.go b/internal/parser/claude_subagent_test.go index 1d9a9e9aa..b6449a8ff 100644 --- a/internal/parser/claude_subagent_test.go +++ b/internal/parser/claude_subagent_test.go @@ -15,7 +15,7 @@ func parseAndGetToolCalls(t *testing.T, filename string, lines []string) []Parse t.Helper() content := strings.Join(lines, "\n") path := createTestFile(t, filename, content) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err, "ParseClaudeSession") require.NotEmpty(t, results, "no results") diff --git a/internal/parser/claude_test.go b/internal/parser/claude_test.go index de45e1ca0..1527ecf1e 100644 --- a/internal/parser/claude_test.go +++ b/internal/parser/claude_test.go @@ -238,7 +238,7 @@ func TestParseClaudeSession_Metadata(t *testing.T) { ) require.NoError(t, err) - results, err := ParseClaudeSession( + results, err := parseClaudeSession( path, "proj", "local", ) require.NoError(t, err) @@ -309,7 +309,7 @@ func TestParseClaudeSession_MetadataOnForkSessions( err := os.WriteFile(path, []byte(content.String()), 0o644) require.NoError(t, err) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err) require.Len(t, results, 2, "expected main + fork result") @@ -357,7 +357,7 @@ func TestParseClaudeSession_LinearMetadata(t *testing.T) { err := os.WriteFile(path, []byte(content), 0o644) require.NoError(t, err) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err) require.Len(t, results, 1) @@ -470,7 +470,7 @@ func TestClaudeRenameSetsDisplayName(t *testing.T) { err := os.WriteFile(path, []byte(sb.String()), 0o644) require.NoError(t, err) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err) require.Len(t, results, 1) assert.Equal(t, tc.wantDisplay, results[0].Session.SessionName) diff --git a/internal/parser/cowork.go b/internal/parser/cowork.go index 67e42cc50..db91856a9 100644 --- a/internal/parser/cowork.go +++ b/internal/parser/cowork.go @@ -7,7 +7,6 @@ import ( "encoding/json" "os" "path/filepath" - "slices" "sort" "strings" "time" @@ -246,89 +245,6 @@ func walkCoworkSessions(root string, fn func(transcriptPath string)) { ) } -// DiscoverCoworkSessions finds all cowork session transcripts under root, -// including subagent transcripts. -func DiscoverCoworkSessions(root string) []DiscoveredFile { - var files []DiscoveredFile - walkCoworkSessions(root, func(transcript string) { - files = append(files, DiscoveredFile{ - Path: transcript, - Agent: AgentCowork, - }) - }) - return files -} - -// FindCoworkSourceFile locates a cowork transcript by its raw session ID -// (the cliSessionId or "agent-" subagent id, with the "cowork:" prefix -// already stripped). -func FindCoworkSourceFile(root, sessionID string) string { - if !IsValidSessionID(sessionID) { - return "" - } - target := sessionID + ".jsonl" - var found string - walkCoworkSessions(root, func(transcript string) { - if found == "" && filepath.Base(transcript) == target { - found = transcript - } - }) - return found -} - -// ClassifyCoworkPath reports whether a changed path under a cowork root is -// a cowork session transcript (main or subagent) or its sibling metadata -// file, and returns the transcript file that should be (re)parsed. -// Metadata changes (e.g. a title rename) resolve to the session's main -// transcript so the rename is picked up. -func ClassifyCoworkPath(root, path string) (string, bool) { - rel, ok := relUnder(root, path) - if !ok { - return "", false - } - sep := string(filepath.Separator) - parts := strings.Split(rel, sep) - n := len(parts) - base := parts[n-1] - - if strings.HasSuffix(base, ".jsonl") { - // Must live under a .claude/projects/ subtree. - marker := sep + ".claude" + sep + "projects" + sep - if !strings.Contains(sep+rel, marker) { - return "", false - } - stem := strings.TrimSuffix(base, ".jsonl") - if strings.HasPrefix(stem, "agent-") { - // Subagent transcript: //subagents/**/agent-*.jsonl. - if slices.Contains(parts, "subagents") { - return path, true - } - return "", false - } - // Main transcript: /.jsonl directly under projects. - if n >= 5 && parts[n-4] == ".claude" && parts[n-3] == "projects" && - IsValidSessionID(stem) { - return path, true - } - return "", false - } - - // Metadata: //local_.json - if isCoworkMetaFileName(base) { - meta := readCoworkMeta(path) - if meta.CliSessionID == "" { - return "", false - } - sessionDir := strings.TrimSuffix(path, ".json") - if main, _ := resolveCoworkSession( - sessionDir, meta.CliSessionID, - ); main != "" { - return main, true - } - } - return "", false -} - // relUnder returns the path of child relative to dir when child is // strictly contained within dir, mirroring the engine's isUnder helper so // the parser can classify paths without importing sync internals. @@ -377,20 +293,20 @@ func extractCoworkAITitle(transcriptPath string) string { return title } -// ParseCoworkSession parses a cowork session transcript. It reuses the -// Claude Code parser on the transcript and then rewrites the results into -// the cowork namespace: agent type, "cowork:"-prefixed IDs, the session -// title, and metadata-derived timestamps for transcripts that carry none. -// Returns parsed results plus session IDs the parser intentionally -// excluded (prefixed), matching ParseClaudeSessionWithExclusions. -func ParseCoworkSession( +// parseSession parses a cowork session transcript. It reuses the Claude +// Code parser on the transcript and then rewrites the results into the +// cowork namespace: agent type, "cowork:"-prefixed IDs, the session title, +// and metadata-derived timestamps for transcripts that carry none. Returns +// parsed results plus session IDs the parser intentionally excluded +// (prefixed), matching ParseClaudeSessionWithExclusions. +func parseCoworkSession( transcriptPath, machine string, ) ([]ParseResult, []string, error) { metaPath := coworkMetaPathForTranscript(transcriptPath) meta := readCoworkMeta(metaPath) project := coworkProjectName(meta) - results, excluded, err := ParseClaudeSessionWithExclusions( + results, excluded, err := claudeParseWithExclusions( transcriptPath, project, machine, ) if err != nil { diff --git a/internal/parser/cowork_provider.go b/internal/parser/cowork_provider.go new file mode 100644 index 000000000..feaf755a8 --- /dev/null +++ b/internal/parser/cowork_provider.go @@ -0,0 +1,331 @@ +package parser + +import ( + "fmt" + "os" + "path/filepath" + "slices" + "strings" +) + +// Cowork stores each session as a Claude-format transcript +// (.claude/projects/**/.jsonl) with a sibling local_.json metadata +// file, plus per-subagent transcripts. It is a single-file provider whose parse +// can yield multiple sessions (the main conversation and its subagents) and +// drive removals via excluded session IDs. All behavior is wired into the +// shared single-file base via options. +func newCoworkProviderFactory(def AgentDef) ProviderFactory { + return NewSingleFileProviderFactory( + def, + coworkProviderCapabilities(), + func(cfg ProviderConfig) singleFileSourceSet { + return NewSingleFileSourceSet( + AgentCowork, + cfg.Roots, + WithFileDiscovery(coworkDiscoverFiles), + WithFileWatchRoots(coworkWatchRoots), + WithFileChangedPathClassifier(coworkClassifyPath), + WithFileLookup(coworkFindFile), + WithFileFingerprint(coworkFingerprintSource), + WithFileParse(coworkParseFile), + // Parse removes stale subagents via exclusions, so an empty + // result set is still a complete (not skipped) parse. + WithAlwaysCompleteResultSet(), + ) + }, + ) +} + +func coworkDiscoverFiles(root string) []singleFileMatch { + var out []singleFileMatch + walkCoworkSessions(root, func(transcript string) { + if match, ok := coworkTranscriptMatch(root, transcript); ok { + out = append(out, match) + } + }) + return out +} + +func coworkWatchRoots(roots []string) []WatchRoot { + out := make([]WatchRoot, 0, len(roots)) + for _, root := range roots { + out = append(out, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"local_*.json", "*.jsonl"}, + DebounceKey: string(AgentCowork) + ":metadata:" + root, + }) + } + return out +} + +// coworkClassifyPath maps a stored or changed path to its session transcript. A +// transcript path classifies directly; a metadata path resolves to the +// session's main transcript so a title rename is picked up. Under allowMissing a +// metadata path whose transcript was deleted still resolves via on-disk +// scanning. +func coworkClassifyPath( + root, path string, allowMissing bool, +) (singleFileMatch, bool) { + transcript, ok := classifyCoworkPath(root, path) + if !ok && allowMissing { + transcript, ok = coworkTranscriptForMetadataPath(root, path) + } + if !ok { + return singleFileMatch{}, false + } + return coworkTranscriptMatch(root, transcript) +} + +func coworkFindFile(root, rawID string) (singleFileMatch, bool) { + path := coworkFindSourceFile(root, rawID) + if path == "" { + return singleFileMatch{}, false + } + return coworkTranscriptMatch(root, path) +} + +// coworkTranscriptMatch validates a transcript path under root and builds a +// match carrying the project hint read from the session's metadata. It +// reproduces the legacy sourceRef checks. +func coworkTranscriptMatch(root, path string) (singleFileMatch, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if _, ok := relUnder(root, path); !ok { + return singleFileMatch{}, false + } + metaPath := coworkMetaPathForTranscript(path) + if metaPath == "" { + return singleFileMatch{}, false + } + if !isCoworkTranscriptPath(root, path) { + return singleFileMatch{}, false + } + return singleFileMatch{ + Path: path, + ProjectHint: coworkProjectName(readCoworkMeta(metaPath)), + }, true +} + +func coworkFingerprintSource( + src singleFileSource, +) (SourceFingerprint, error) { + info, err := os.Stat(src.Path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", src.Path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf( + "stat %s: source is a directory", src.Path, + ) + } + hash, err := hashJSONLSourceFile(src.Path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Size: info.Size(), + MTimeNS: CoworkSessionMtime(src.Path, info.ModTime().UnixNano()), + Hash: hash, + }, nil +} + +func coworkParseFile( + src singleFileSource, req ParseRequest, +) ([]ParseResult, []string, error) { + results, excluded, err := parseCoworkSession(src.Path, req.Machine) + if err != nil { + return nil, nil, err + } + if req.Fingerprint.Hash != "" { + for i := range results { + results[i].Session.File.Hash = req.Fingerprint.Hash + } + } + return results, excluded, nil +} + +func isCoworkTranscriptPath(root, path string) bool { + rel, ok := relUnder(root, path) + if !ok || filepath.Ext(path) != ".jsonl" { + return false + } + sep := string(filepath.Separator) + parts := strings.Split(rel, sep) + n := len(parts) + base := strings.TrimSuffix(filepath.Base(path), ".jsonl") + if n >= 5 && parts[n-4] == ".claude" && parts[n-3] == "projects" { + return IsValidSessionID(base) + } + if !strings.Contains(sep+rel, sep+".claude"+sep+"projects"+sep) || + !slices.Contains(parts, "subagents") { + return false + } + return strings.HasPrefix(base, "agent-") +} + +func coworkTranscriptForMetadataPath(root, path string) (string, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || !isCoworkMetaFileName(filepath.Base(rel)) { + return "", false + } + sessionDir := strings.TrimSuffix(path, ".json") + resolvedSessionDir, err := filepath.EvalSymlinks(sessionDir) + if err != nil { + return "", false + } + projectsDir := filepath.Join(sessionDir, ".claude", "projects") + entries, err := os.ReadDir(projectsDir) + if err != nil { + return "", false + } + var found string + for _, entry := range entries { + if !isDirOrSymlink(entry, projectsDir) { + continue + } + projectDir := filepath.Join(projectsDir, entry.Name()) + files, err := os.ReadDir(projectDir) + if err != nil { + continue + } + for _, file := range files { + if file.IsDir() { + continue + } + name := file.Name() + if !strings.HasSuffix(name, ".jsonl") { + continue + } + stem := strings.TrimSuffix(name, ".jsonl") + if !IsValidSessionID(stem) || strings.HasPrefix(stem, "agent-") { + continue + } + candidate := filepath.Join(projectDir, name) + if !validCoworkMainTranscriptCandidate(resolvedSessionDir, candidate) { + continue + } + if found != "" { + return "", false + } + found = candidate + } + } + return found, found != "" +} + +func validCoworkMainTranscriptCandidate(resolvedSessionDir, candidate string) bool { + if !IsRegularFile(candidate) { + return false + } + resolved, err := filepath.EvalSymlinks(candidate) + if err != nil { + return false + } + return isContainedIn(resolved, resolvedSessionDir) +} + +// coworkFindSourceFile locates a cowork transcript by its raw session ID +// (the cliSessionId or "agent-" subagent id, with the "cowork:" prefix +// already stripped). +func coworkFindSourceFile(root, sessionID string) string { + if !IsValidSessionID(sessionID) { + return "" + } + target := sessionID + ".jsonl" + var found string + walkCoworkSessions(root, func(transcript string) { + if found == "" && filepath.Base(transcript) == target { + found = transcript + } + }) + return found +} + +// classifyCoworkPath reports whether a changed path under a cowork root is a +// cowork session transcript (main or subagent) or its sibling metadata file, +// and returns the transcript file that should be (re)parsed. Metadata changes +// (e.g. a title rename) resolve to the session's main transcript so the rename +// is picked up. +func classifyCoworkPath(root, path string) (string, bool) { + rel, ok := relUnder(root, path) + if !ok { + return "", false + } + sep := string(filepath.Separator) + parts := strings.Split(rel, sep) + n := len(parts) + base := parts[n-1] + + if strings.HasSuffix(base, ".jsonl") { + // Must live under a .claude/projects/ subtree. + marker := sep + ".claude" + sep + "projects" + sep + if !strings.Contains(sep+rel, marker) { + return "", false + } + stem := strings.TrimSuffix(base, ".jsonl") + if strings.HasPrefix(stem, "agent-") { + // Subagent transcript: //subagents/**/agent-*.jsonl. + if slices.Contains(parts, "subagents") { + return path, true + } + return "", false + } + // Main transcript: /.jsonl directly under projects. + if n >= 5 && parts[n-4] == ".claude" && parts[n-3] == "projects" && + IsValidSessionID(stem) { + return path, true + } + return "", false + } + + // Metadata: //local_.json + if isCoworkMetaFileName(base) { + meta := readCoworkMeta(path) + if meta.CliSessionID == "" { + return "", false + } + sessionDir := strings.TrimSuffix(path, ".json") + if main, _ := resolveCoworkSession( + sessionDir, meta.CliSessionID, + ); main != "" { + return main, true + } + } + return "", false +} + +func coworkProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilitySupported, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Relationships: CapabilitySupported, + Subagents: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + TerminationStatus: CapabilitySupported, + MalformedLineCount: CapabilitySupported, + Model: CapabilitySupported, + StopReason: CapabilitySupported, + }, + } +} diff --git a/internal/parser/cowork_provider_test.go b/internal/parser/cowork_provider_test.go new file mode 100644 index 000000000..7756f2c85 --- /dev/null +++ b/internal/parser/cowork_provider_test.go @@ -0,0 +1,377 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCoworkProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCowork) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCoworkProviderSourceMethods(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000101" + metaPath, transcript := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000101", + cliSessionID: cli, + encodedProject: "-Users-dev-code-demo", + title: "Provider title", + folders: []string{"/Users/dev/code/demo"}, + transcriptLines: coworkTranscriptLines(cli), + }) + subagentPath := filepath.Join( + filepath.Dir(transcript), + cli, + "subagents", + "tasks", + "agent-worker.jsonl", + ) + writeSourceFile(t, subagentPath, strings.Join(coworkTranscriptLines(cli), "\n")+"\n") + writeSourceFile( + t, + filepath.Join(filepath.Dir(transcript), cli, "subagents", "not-agent.jsonl"), + strings.Join(coworkTranscriptLines(cli), "\n")+"\n", + ) + writeSourceFile( + t, + filepath.Join(root, "org", "ws", "cowork-clientdata-cache.json"), + "{}\n", + ) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"local_*.json", "*.jsonl"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{transcript, subagentPath}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + for _, source := range discovered { + assert.Equal(t, AgentCowork, source.Provider) + assert.Equal(t, "demo", source.ProjectHint) + assert.Equal(t, source.DisplayPath, source.FingerprintKey) + } + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~cowork:" + cli, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, transcript, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "agent-worker", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, subagentPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: transcript, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, transcript, found.DisplayPath) + + transcriptInfo, err := os.Stat(transcript) + require.NoError(t, err) + newer := transcriptInfo.ModTime().Add(time.Hour) + require.NoError(t, os.Chtimes(metaPath, newer, newer)) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, transcript, fingerprint.Key) + assert.Equal(t, transcriptInfo.Size(), fingerprint.Size) + assert.Equal(t, newer.UnixNano(), fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + want string + }{ + {name: "main transcript", path: transcript, want: transcript}, + {name: "subagent transcript", path: subagentPath, want: subagentPath}, + {name: "metadata", path: metaPath, want: transcript}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: tc.path, + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tc.want, changed[0].DisplayPath) + }) + } + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, transcript, changed[0].DisplayPath) + + require.NoError(t, os.Remove(transcript)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: transcript, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, transcript, changed[0].DisplayPath) + + require.NoError(t, os.Remove(subagentPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: subagentPath, EventKind: "rename", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, subagentPath, changed[0].DisplayPath) + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "org", "ws", "cowork-clientdata-cache.json"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: transcript, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestCoworkProviderParse(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000102" + _, transcript := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000102", + cliSessionID: cli, + encodedProject: "-sessions-demo", + title: "Parse title", + transcriptLines: coworkTranscriptLines(cli), + }) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Empty(t, outcome.ExcludedSessionIDs) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "cowork:"+cli, result.Result.Session.ID) + assert.Equal(t, AgentCowork, result.Result.Session.Agent) + assert.Equal(t, "cowork", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, transcript, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "Parse title", result.Result.Session.SessionName) + assert.Equal(t, "hello there", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCoworkProviderMetadataRemovalRejectsAmbiguousMainTranscripts(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000104" + metaPath, transcript := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000104", + cliSessionID: cli, + encodedProject: "-sessions-demo", + transcriptLines: coworkTranscriptLines(cli), + }) + otherPath := filepath.Join( + filepath.Dir(filepath.Dir(transcript)), + "-sessions-other", + "c0000000-0000-4000-8000-000000000105.jsonl", + ) + writeSourceFile( + t, + otherPath, + strings.Join(coworkTranscriptLines("c0000000-0000-4000-8000-000000000105"), "\n")+"\n", + ) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + assert.Empty(t, changed) +} + +func TestCoworkProviderMetadataRemovalIgnoresSymlinkEscape(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000106" + metaPath, _ := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000106", + cliSessionID: cli, + encodedProject: "-sessions-demo", + transcriptLines: coworkTranscriptLines(cli), + }) + sessionDir := strings.TrimSuffix(metaPath, ".json") + projectsDir := filepath.Join(sessionDir, ".claude", "projects") + outside := filepath.Join(root, "outside") + require.NoError(t, os.MkdirAll(outside, 0o755)) + writeSourceFile( + t, + filepath.Join(outside, "c0000000-0000-4000-8000-000000000107.jsonl"), + strings.Join(coworkTranscriptLines("c0000000-0000-4000-8000-000000000107"), "\n")+"\n", + ) + if err := os.Symlink(outside, filepath.Join(projectsDir, "-sessions-escape")); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, cli+".jsonl", filepath.Base(changed[0].DisplayPath)) +} + +func TestCoworkProviderMetadataRemovalIgnoresBrokenSymlinkAmbiguity(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000108" + metaPath, _ := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000108", + cliSessionID: cli, + encodedProject: "-sessions-demo", + transcriptLines: coworkTranscriptLines(cli), + }) + sessionDir := strings.TrimSuffix(metaPath, ".json") + projectsDir := filepath.Join(sessionDir, ".claude", "projects") + brokenDir := filepath.Join(projectsDir, "-sessions-broken") + require.NoError(t, os.MkdirAll(brokenDir, 0o755)) + if err := os.Symlink( + filepath.Join(root, "missing.jsonl"), + filepath.Join(brokenDir, "c0000000-0000-4000-8000-000000000109.jsonl"), + ); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, cli+".jsonl", filepath.Base(changed[0].DisplayPath)) +} + +func TestCoworkProviderFullSessionIDPrefixLookup(t *testing.T) { + root := t.TempDir() + cli := "c0000000-0000-4000-8000-000000000103" + _, transcript := writeCoworkSession(t, root, coworkFixture{ + org: "org", + workspace: "ws", + sessionUUID: "50000000-0000-4000-8000-000000000103", + cliSessionID: cli, + encodedProject: "-sessions-demo", + transcriptLines: coworkTranscriptLines(cli), + }) + + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) + + for _, id := range []string{"cowork:" + cli, "remote~cowork:" + cli} { + t.Run(strings.ReplaceAll(id, ":", "_"), func(t *testing.T) { + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: id, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, transcript, found.DisplayPath) + }) + } +} diff --git a/internal/parser/cowork_test.go b/internal/parser/cowork_test.go index eaf92b6bf..365bf2d87 100644 --- a/internal/parser/cowork_test.go +++ b/internal/parser/cowork_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "encoding/json" "os" "path/filepath" @@ -13,6 +14,54 @@ import ( "github.com/stretchr/testify/require" ) +// coworkProviderForRoot constructs a cowork provider rooted at root. +func coworkProviderForRoot(t *testing.T, root, machine string) Provider { + t.Helper() + provider, ok := NewProvider(AgentCowork, ProviderConfig{ + Roots: []string{root}, + Machine: machine, + }) + require.True(t, ok) + return provider +} + +// coworkDiscoveredPaths returns the transcript paths the provider discovers +// under root. +func coworkDiscoveredPaths(t *testing.T, root string) []string { + t.Helper() + sources, err := coworkProviderForRoot(t, root, "").Discover(context.Background()) + require.NoError(t, err) + paths := make([]string, len(sources)) + for i, source := range sources { + paths[i] = source.DisplayPath + } + return paths +} + +// coworkParseTranscript finds and parses a single cowork transcript through +// the provider, returning the parse results and any excluded session IDs. +func coworkParseTranscript( + t *testing.T, root, transcript, machine string, +) ([]ParseResult, []string) { + t.Helper() + provider := coworkProviderForRoot(t, root, machine) + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: transcript, + }) + require.NoError(t, err) + require.True(t, ok, "find source for %s", transcript) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Machine: machine, + }) + require.NoError(t, err) + results := make([]ParseResult, len(outcome.Results)) + for i, out := range outcome.Results { + results[i] = out.Result + } + return results, outcome.ExcludedSessionIDs +} + // All identifiers, titles, and content below are synthetic fixtures. // coworkFixture describes one cowork session to materialize on disk. @@ -91,7 +140,7 @@ func coworkTranscriptLines(cli string) []string { } } -func TestDiscoverCoworkSessions(t *testing.T) { +func TestCoworkProviderDiscoversSessions(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000001" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -104,13 +153,12 @@ func TestDiscoverCoworkSessions(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - got := DiscoverCoworkSessions(root) - require.Len(t, got, 1, "discovered files") - assert.Equal(t, transcript, got[0].Path, "Path") - assert.Equal(t, AgentCowork, got[0].Agent, "Agent") + got := coworkDiscoveredPaths(t, root) + require.Len(t, got, 1, "discovered sources") + assert.Equal(t, transcript, got[0], "DisplayPath") } -func TestDiscoverCoworkSessionsIgnoresNoise(t *testing.T) { +func TestCoworkProviderDiscoverIgnoresNoise(t *testing.T) { root := t.TempDir() wsDir := filepath.Join(root, "org", "ws") require.NoError(t, os.MkdirAll(wsDir, 0o755), "mkdir ws") @@ -145,10 +193,10 @@ func TestDiscoverCoworkSessionsIgnoresNoise(t *testing.T) { "write transcript-less meta", ) - assert.Empty(t, DiscoverCoworkSessions(root)) + assert.Empty(t, coworkDiscoveredPaths(t, root)) } -func TestParseCoworkSession(t *testing.T) { +func TestCoworkProviderParsesSession(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000002" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -163,8 +211,7 @@ func TestParseCoworkSession(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - results, excluded, err := ParseCoworkSession(transcript, "host-1") - require.NoError(t, err, "parse") + results, excluded := coworkParseTranscript(t, root, transcript, "host-1") require.Empty(t, excluded, "excluded") require.Len(t, results, 1, "results") @@ -185,7 +232,7 @@ func TestParseCoworkSession(t *testing.T) { assert.Equal(t, 12, sess.PeakContextTokens, "PeakContextTokens (input+cacheRead)") } -func TestParseCoworkSessionTitleFallsBackToAITitle(t *testing.T) { +func TestCoworkProviderParseTitleFallsBackToAITitle(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000003" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -198,14 +245,13 @@ func TestParseCoworkSessionTitleFallsBackToAITitle(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - results, _, err := ParseCoworkSession(transcript, "host-1") - require.NoError(t, err, "parse") + results, _ := coworkParseTranscript(t, root, transcript, "host-1") require.Len(t, results, 1, "results") assert.Equal(t, "Auto title", results[0].Session.SessionName, "falls back to ai-title event") } -func TestParseCoworkSessionProjectFromSelectedFolder(t *testing.T) { +func TestCoworkProviderParseProjectFromSelectedFolder(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000004" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -219,14 +265,13 @@ func TestParseCoworkSessionProjectFromSelectedFolder(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - results, _, err := ParseCoworkSession(transcript, "host-1") - require.NoError(t, err, "parse") + results, _ := coworkParseTranscript(t, root, transcript, "host-1") require.Len(t, results, 1, "results") assert.Equal(t, "my_app", results[0].Session.Project, "project derived from userSelectedFolders") } -func TestFindCoworkSourceFile(t *testing.T) { +func TestCoworkProviderFindsSourceFile(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000005" _, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -239,11 +284,22 @@ func TestFindCoworkSourceFile(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) - assert.Equal(t, transcript, FindCoworkSourceFile(root, cli), "found") - assert.Empty(t, FindCoworkSourceFile(root, "nonexistent-id"), "missing") + provider := coworkProviderForRoot(t, root, "") + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: cli, + }) + require.NoError(t, err) + require.True(t, ok, "found") + assert.Equal(t, transcript, found.DisplayPath) + + _, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "nonexistent-id", + }) + require.NoError(t, err) + assert.False(t, ok, "missing") } -func TestClassifyCoworkPath(t *testing.T) { +func TestCoworkProviderClassifiesChangedPath(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000006" metaPath, transcript := writeCoworkSession(t, root, coworkFixture{ @@ -256,20 +312,34 @@ func TestClassifyCoworkPath(t *testing.T) { transcriptLines: coworkTranscriptLines(cli), }) + provider := coworkProviderForRoot(t, root, "") + classify := func(path string) (string, bool) { + sources, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + if len(sources) == 0 { + return "", false + } + require.Len(t, sources, 1) + return sources[0].DisplayPath, true + } + // A transcript change classifies to itself. - got, ok := ClassifyCoworkPath(root, transcript) + got, ok := classify(transcript) require.True(t, ok, "transcript classified") assert.Equal(t, transcript, got, "transcript path") // A metadata change resolves to the session's transcript. - got, ok = ClassifyCoworkPath(root, metaPath) + got, ok = classify(metaPath) require.True(t, ok, "metadata classified") assert.Equal(t, transcript, got, "metadata resolves to transcript") // Unrelated and outside-root paths are ignored. - _, ok = ClassifyCoworkPath(root, filepath.Join(root, "org", "ws", "artifacts.json")) + _, ok = classify(filepath.Join(root, "org", "ws", "artifacts.json")) assert.False(t, ok, "cache file ignored") - _, ok = ClassifyCoworkPath(root, "/some/other/place.jsonl") + _, ok = classify("/some/other/place.jsonl") assert.False(t, ok, "outside root ignored") } @@ -310,7 +380,7 @@ func TestCoworkSessionMtime(t *testing.T) { "transcript mtime when metadata missing") } -func TestDiscoverCoworkSessionsIncludesSubagents(t *testing.T) { +func TestCoworkProviderDiscoverIncludesSubagents(t *testing.T) { root := t.TempDir() cli := "c0000000-0000-4000-8000-000000000008" enc := "-sessions-demo" @@ -344,29 +414,27 @@ func TestDiscoverCoworkSessionsIncludesSubagents(t *testing.T) { "write subagent", ) - got := DiscoverCoworkSessions(root) - paths := make([]string, len(got)) - for i, f := range got { - paths[i] = f.Path - assert.Equal(t, AgentCowork, f.Agent, "Agent") - } + paths := coworkDiscoveredPaths(t, root) assert.Contains(t, paths, transcript, "main transcript discovered") assert.Contains(t, paths, subPath, "subagent transcript discovered") // The subagent parses into a cowork-namespaced subagent session whose // parent is the main session. - results, _, err := ParseCoworkSession(subPath, "host-1") - require.NoError(t, err, "parse subagent") + results, _ := coworkParseTranscript(t, root, subPath, "host-1") require.Len(t, results, 1, "results") sub := results[0].Session assert.Equal(t, "cowork:agent-0000000000000001", sub.ID, "subagent ID") assert.Equal(t, "cowork:"+cli, sub.ParentSessionID, "parent prefixed") assert.Equal(t, RelSubagent, sub.RelationshipType, "RelSubagent") - // FindCoworkSourceFile resolves the subagent by its raw ID too. - assert.Equal(t, subPath, - FindCoworkSourceFile(root, "agent-0000000000000001"), - "find subagent source") + // The provider resolves the subagent by its raw ID too. + provider := coworkProviderForRoot(t, root, "") + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "agent-0000000000000001", + }) + require.NoError(t, err) + require.True(t, ok, "find subagent source") + assert.Equal(t, subPath, found.DisplayPath) } func TestResolveCoworkSessionRejectsSymlinkEscape(t *testing.T) { diff --git a/internal/parser/cursor.go b/internal/parser/cursor.go index f570b07f7..fa42d25d0 100644 --- a/internal/parser/cursor.go +++ b/internal/parser/cursor.go @@ -17,10 +17,10 @@ import ( // under 500 KB; 10 MB provides generous headroom. const maxCursorTranscriptSize = 10 << 20 -// ParseCursorSession parses a Cursor agent transcript file. -// Transcripts are plain text with "user:" and "assistant:" role -// markers, tool calls, and thinking blocks. -func ParseCursorSession( +// parseSession parses a Cursor agent transcript file. Transcripts are plain +// text with "user:" and "assistant:" role markers, tool calls, and thinking +// blocks. +func (p *cursorProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { // Open with O_NOFOLLOW (Unix) to reject symlinks at the diff --git a/internal/parser/cursor_provider.go b/internal/parser/cursor_provider.go new file mode 100644 index 000000000..67ecc210f --- /dev/null +++ b/internal/parser/cursor_provider.go @@ -0,0 +1,609 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*cursorProvider)(nil) + +type cursorProviderFactory struct { + def AgentDef +} + +func newCursorProviderFactory(def AgentDef) ProviderFactory { + return cursorProviderFactory{def: cloneAgentDef(def)} +} + +func (f cursorProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f cursorProviderFactory) Capabilities() Capabilities { + return cursorProviderCapabilities() +} + +func (f cursorProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &cursorProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: cursorProviderCapabilities(), + Config: cfg, + }, + sources: newCursorSourceSet(cfg.Roots), + } +} + +type cursorProvider struct { + ProviderBase + sources cursorSourceSet +} + +func (p *cursorProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *cursorProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *cursorProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *cursorProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *cursorProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *cursorProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("cursor source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type cursorSource struct { + Root string + Path string +} + +type cursorSourceSet struct { + roots []string +} + +func newCursorSourceSet(roots []string) cursorSourceSet { + return cursorSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s cursorSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, path := range s.discoverTranscriptPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverTranscriptPaths walks a Cursor projects root and returns the primary +// transcript file paths. All paths resolve within the canonical root, +// preventing symlink escapes. Symlinked project directory entries are rejected. +// Cursor uses two layouts: flat (agent-transcripts/.{txt,jsonl}) and +// nested (agent-transcripts//.{txt,jsonl}); when both .jsonl and +// .txt exist for the same stem, .jsonl is preferred. +func (s cursorSourceSet) discoverTranscriptPaths(projectsDir string) []string { + if projectsDir == "" { + return nil + } + + // Canonicalize root once for containment checks. + resolvedRoot, err := filepath.EvalSymlinks(projectsDir) + if err != nil { + return nil + } + + entries, err := os.ReadDir(projectsDir) + if err != nil { + return nil + } + + var paths []string + for _, entry := range entries { + if !entry.IsDir() { + continue + } + // Reject symlinked project directory entries. + if entry.Type()&os.ModeSymlink != 0 { + continue + } + + transcriptsDir := filepath.Join( + projectsDir, entry.Name(), "agent-transcripts", + ) + + // Verify the transcripts directory resolves within + // the canonical root. + resolvedDir, err := filepath.EvalSymlinks(transcriptsDir) + if err != nil { + continue + } + if !isContainedIn(resolvedDir, resolvedRoot) { + continue + } + + transcripts, err := os.ReadDir(transcriptsDir) + if err != nil { + continue + } + + // Collect valid transcripts, deduping by basename + // stem. When both .jsonl and .txt exist for the + // same session, prefer .jsonl. + // + // Cursor uses two layouts: + // flat: agent-transcripts/.{txt,jsonl} + // nested: agent-transcripts//.{txt,jsonl} + seen := make(map[string]string) // stem -> path + for _, sf := range transcripts { + if !sf.IsDir() { + // Flat layout: file directly in + // agent-transcripts/. + name := sf.Name() + if !IsCursorTranscriptExt(name) { + continue + } + fullPath := filepath.Join(transcriptsDir, name) + if !IsRegularFile(fullPath) { + continue + } + cursorAddSeen(seen, name, fullPath) + continue + } + + // Nested layout: agent-transcripts// + // containing .{txt,jsonl}. + subDir := filepath.Join(transcriptsDir, sf.Name()) + subEntries, err := os.ReadDir(subDir) + if err != nil { + continue + } + dirName := sf.Name() + for _, sub := range subEntries { + if sub.IsDir() { + continue + } + name := sub.Name() + if !IsCursorTranscriptExt(name) { + continue + } + // Only accept files whose stem matches + // the parent directory name, e.g. + // /.jsonl. + stem := strings.TrimSuffix(name, filepath.Ext(name)) + if stem != dirName { + continue + } + fullPath := filepath.Join(subDir, name) + if !IsRegularFile(fullPath) { + continue + } + cursorAddSeen(seen, name, fullPath) + } + } + for _, path := range seen { + paths = append(paths, path) + } + } + return paths +} + +// cursorAddSeen inserts a transcript path into the seen map, preferring .jsonl +// over .txt when both exist for the same stem. +func cursorAddSeen(seen map[string]string, name, fullPath string) { + stem := strings.TrimSuffix(name, filepath.Ext(name)) + if prev, ok := seen[stem]; ok { + if strings.HasSuffix(prev, ".txt") && + strings.HasSuffix(name, ".jsonl") { + seen[stem] = fullPath + } + return + } + seen[stem] = fullPath +} + +func (s cursorSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "*.txt"}, + DebounceKey: string(AgentCursor) + ":transcripts:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s cursorSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + if !s.hasRoot(root) { + return nil, nil + } + source, ok := s.sourceForPathInRoot(root, req.Path) + if !ok { + return nil, nil + } + return []SourceRef{source}, nil + } + for _, root := range s.roots { + source, ok := s.sourceForPathInRoot(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s cursorSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + if source, ok := s.sourceForPath(path); ok { + return source, true, nil + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := cursorFindSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// cursorFindSourceFile finds a Cursor transcript file by session UUID across a +// projects root, preferring .jsonl over .txt. Returns "" if no matching file +// resolves within the canonical root. +func cursorFindSourceFile(projectsDir, sessionID string) string { + if projectsDir == "" || !IsValidSessionID(sessionID) { + return "" + } + + entries, err := os.ReadDir(projectsDir) + if err != nil { + return "" + } + + resolvedRoot, err := filepath.EvalSymlinks(projectsDir) + if err != nil { + return "" + } + + for _, ext := range []string{".jsonl", ".txt"} { + target := sessionID + ext + for _, entry := range entries { + if !entry.IsDir() { + continue + } + // Nested layout first (matches discovery + // precedence), then flat layout. + candidates := []string{ + filepath.Join( + projectsDir, entry.Name(), + "agent-transcripts", sessionID, target, + ), + filepath.Join( + projectsDir, entry.Name(), + "agent-transcripts", target, + ), + } + for _, candidate := range candidates { + if !IsRegularFile(candidate) { + continue + } + resolved, err := filepath.EvalSymlinks(candidate) + if err != nil { + continue + } + rel, err := filepath.Rel(resolvedRoot, resolved) + sep := string(filepath.Separator) + if err != nil || rel == ".." || + strings.HasPrefix(rel, ".."+sep) { + continue + } + return candidate + } + } + } + return "" +} + +func (s cursorSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("cursor source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash := "" + if info.Size() <= maxCursorTranscriptSize { + hash, err = hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Hash: hash, + }, nil +} + +func (s cursorSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case cursorSource: + return src.Path, src.Path != "" + case *cursorSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if ref, ok := s.sourceForPath(candidate); ok { + src := ref.Opaque.(cursorSource) + return src.Path, true + } + } + return "", false +} + +func (s cursorSourceSet) sourceForPath(path string) (SourceRef, bool) { + for _, root := range s.roots { + if source, ok := s.sourceForPathInRoot(root, path); ok { + return source, true + } + } + return SourceRef{}, false +} + +func (s cursorSourceSet) sourceForPathInRoot( + root string, + path string, +) (SourceRef, bool) { + rawID, ok := cursorRawSessionIDFromPath(root, path) + if !ok { + return SourceRef{}, false + } + projectDir, ok := cursorProjectDirFromPath(root, path) + if !ok { + return SourceRef{}, false + } + selected := cursorFindSourceFileInProject(root, projectDir, rawID) + if selected == "" { + return SourceRef{}, false + } + return s.sourceRef(root, selected) +} + +func (s cursorSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !IsRegularFile(path) { + return SourceRef{}, false + } + rawID, ok := cursorRawSessionIDFromPath(root, path) + if !ok { + return SourceRef{}, false + } + projectDir, ok := cursorProjectDirFromPath(root, path) + if !ok { + return SourceRef{}, false + } + selected := cursorFindSourceFileInProject(root, projectDir, rawID) + if selected == "" || !samePath(selected, path) { + return SourceRef{}, false + } + project := DecodeCursorProjectDir(projectDir) + if project == "" { + project = "unknown" + } + return SourceRef{ + Provider: AgentCursor, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: cursorSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s cursorSourceSet) hasRoot(root string) bool { + for _, configured := range s.roots { + if samePath(root, configured) { + return true + } + } + return false +} + +func cursorFindSourceFileInProject(root, projectDir, rawID string) string { + if root == "" || projectDir == "" || !IsValidSessionID(rawID) { + return "" + } + resolvedRoot, err := filepath.EvalSymlinks(root) + if err != nil { + return "" + } + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + for _, ext := range []string{".jsonl", ".txt"} { + target := rawID + ext + candidates := []string{ + filepath.Join(transcriptsDir, rawID, target), + filepath.Join(transcriptsDir, target), + } + for _, candidate := range candidates { + if !IsRegularFile(candidate) { + continue + } + resolved, err := filepath.EvalSymlinks(candidate) + if err != nil || !isContainedIn(resolved, resolvedRoot) { + continue + } + return candidate + } + } + return "" +} + +func cursorRawSessionIDFromPath(root, path string) (string, bool) { + rel, ok := cursorRelPath(root, path) + if !ok { + return "", false + } + parts := strings.Split(rel, string(filepath.Separator)) + switch len(parts) { + case 3: + return strings.TrimSuffix(parts[2], filepath.Ext(parts[2])), true + case 4: + return parts[2], true + default: + return "", false + } +} + +func cursorProjectDirFromPath(root, path string) (string, bool) { + rel, ok := cursorRelPath(root, path) + if !ok { + return "", false + } + return ParseCursorTranscriptRelPath(rel) +} + +func cursorRelPath(root, path string) (string, bool) { + rel, err := filepath.Rel(filepath.Clean(root), filepath.Clean(path)) + if err != nil { + return "", false + } + if _, ok := ParseCursorTranscriptRelPath(rel); !ok { + return "", false + } + return rel, true +} + +func cursorProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/cursor_provider_test.go b/internal/parser/cursor_provider_test.go new file mode 100644 index 000000000..c023bc2eb --- /dev/null +++ b/internal/parser/cursor_provider_test.go @@ -0,0 +1,282 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCursorProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCursor) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCursorProviderSourceMethods(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + flatTxt := cursorProviderWriteTranscript(t, transcriptsDir, "flat.txt", "old") + flatJSONL := cursorProviderWriteJSONLTranscript(t, transcriptsDir, "flat.jsonl", "new") + nestedTxt := cursorProviderWriteTranscript(t, transcriptsDir, filepath.Join("nested", "nested.txt"), "old") + nestedJSONL := cursorProviderWriteJSONLTranscript( + t, transcriptsDir, filepath.Join("nested", "nested.jsonl"), "new", + ) + cursorProviderWriteJSONLTranscript( + t, transcriptsDir, filepath.Join("nested", "subagents", "child.jsonl"), "child", + ) + cursorProviderWriteJSONLTranscript(t, transcriptsDir, filepath.Join("mismatch", "other.jsonl"), "other") + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl", "*.txt"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{flatJSONL, nestedJSONL}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + for _, source := range discovered { + assert.Equal(t, AgentCursor, source.Provider) + assert.Equal(t, DecodeCursorProjectDir(projectDir), source.ProjectHint) + } + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~cursor:flat", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, flatJSONL, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: flatTxt, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, flatJSONL, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "nested", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, nestedJSONL, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, nestedJSONL, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + want string + }{ + {name: "flat txt promotes to jsonl", path: flatTxt, want: flatJSONL}, + {name: "flat jsonl", path: flatJSONL, want: flatJSONL}, + {name: "nested txt promotes to jsonl", path: nestedTxt, want: nestedJSONL}, + {name: "nested jsonl", path: nestedJSONL, want: nestedJSONL}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tc.path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tc.want, changed[0].DisplayPath) + }) + } + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(transcriptsDir, "nested", "subagents", "child.jsonl"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: flatJSONL, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestCursorProviderResolvesDuplicateStemsWithinProject(t *testing.T) { + root := t.TempDir() + firstProject := "Users-fiona-Documents-first" + secondProject := "Users-fiona-Documents-second" + firstDir := filepath.Join(root, firstProject, "agent-transcripts") + secondDir := filepath.Join(root, secondProject, "agent-transcripts") + firstJSONL := cursorProviderWriteJSONLTranscript(t, firstDir, "shared.jsonl", "first") + secondTxt := cursorProviderWriteTranscript(t, secondDir, "shared.txt", "second old") + secondJSONL := cursorProviderWriteJSONLTranscript(t, secondDir, "shared.jsonl", "second new") + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + assert.ElementsMatch(t, []string{firstJSONL, secondJSONL}, sourceDisplayPaths(discovered)) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: secondTxt, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, secondJSONL, found.DisplayPath) + assert.Equal(t, DecodeCursorProjectDir(secondProject), found.ProjectHint) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: secondTxt, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, secondJSONL, changed[0].DisplayPath) + assert.Equal(t, DecodeCursorProjectDir(secondProject), changed[0].ProjectHint) +} + +func TestCursorProviderParse(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + sourcePath := cursorProviderWriteJSONLTranscript( + t, transcriptsDir, "parse.jsonl", "parse question", + ) + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "cursor:parse", result.Result.Session.ID) + assert.Equal(t, AgentCursor, result.Result.Session.Agent) + assert.Equal(t, DecodeCursorProjectDir(projectDir), result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sourcePath, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCursorProviderFingerprintSkipsOversizedTranscriptHash(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + sourcePath := filepath.Join(transcriptsDir, "oversized.jsonl") + require.NoError(t, os.MkdirAll(transcriptsDir, 0o755)) + file, err := os.Create(sourcePath) + require.NoError(t, err) + require.NoError(t, file.Truncate(maxCursorTranscriptSize+1)) + require.NoError(t, file.Close()) + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Equal(t, int64(maxCursorTranscriptSize+1), fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.Empty(t, fingerprint.Hash) + + _, err = provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "file too large") +} + +func cursorProviderWriteTranscript( + t *testing.T, + dir string, + name string, + firstMessage string, +) string { + t.Helper() + path := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile( + path, + []byte("user:\n"+firstMessage+"\nassistant:\nDone.\n"), + 0o644, + )) + return path +} + +func cursorProviderWriteJSONLTranscript( + t *testing.T, + dir string, + name string, + firstMessage string, +) string { + t.Helper() + path := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile( + path, + []byte(`{"role":"user","message":{"content":"`+firstMessage+`"}}`+"\n"+ + `{"role":"assistant","message":{"content":"Done."}}`+"\n"), + 0o644, + )) + return path +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index fb8219760..09cdc1495 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -436,9 +436,13 @@ func ResolveCodexShallowWatchRoots(root string) []string { return []string{parent} } -// DiscoverClaudeProjects finds all project directories under the -// Claude projects dir and returns their JSONL session files. -func DiscoverClaudeProjects(projectsDir string) []DiscoveredFile { +// ClaudeProjectSessionFiles finds all project directories under the +// Claude projects dir and returns their JSONL session files. It is the +// provider-owned enumeration body shared by the Claude provider source +// set (full-sync discovery) and the engine's duplicate-candidate +// expansion. The name carries no legacy entrypoint verb so the +// provider can call it without shimming a Discover* free function. +func ClaudeProjectSessionFiles(projectsDir string) []DiscoveredFile { if strings.HasPrefix(projectsDir, "s3://") { return discoverClaudeS3(projectsDir) } @@ -569,9 +573,12 @@ func DiscoverCodexSessions(sessionsDir string) []DiscoveredFile { return files } -// FindClaudeSourceFile finds the original JSONL file for a Claude -// session ID by searching all project directories. -func FindClaudeSourceFile( +// claudeFindSourceFile finds the original JSONL file for a Claude +// session ID by searching all project directories. It is the +// provider-owned lookup body used by the Claude provider source set's +// FindSource. The name carries no legacy entrypoint verb so the +// provider can call it without shimming a Find* free function. +func claudeFindSourceFile( projectsDir, sessionID string, ) string { if !IsValidSessionID(sessionID) { @@ -600,7 +607,7 @@ func FindClaudeSourceFile( // //subagents/**/agent-.jsonl if strings.HasPrefix(sessionID, "agent-") { for _, entry := range entries { - if !entry.IsDir() { + if !isDirOrSymlink(entry, projectsDir) { continue } projDir := filepath.Join( @@ -987,219 +994,6 @@ func confirmGeminiSessionID( return GeminiSessionID(data) == sessionID } -// DiscoverCursorSessions finds all agent transcript files under -// the Cursor projects dir (//agent-transcripts/.txt). -// All discovered paths are validated to resolve within the -// canonical projectsDir, preventing symlink escapes. -// cursorAddSeen inserts a transcript path into the seen map, -// preferring .jsonl over .txt when both exist for the same stem. -func cursorAddSeen( - seen map[string]string, name, fullPath string, -) { - stem := strings.TrimSuffix(name, filepath.Ext(name)) - if prev, ok := seen[stem]; ok { - if strings.HasSuffix(prev, ".txt") && - strings.HasSuffix(name, ".jsonl") { - seen[stem] = fullPath - } - return - } - seen[stem] = fullPath -} - -func DiscoverCursorSessions( - projectsDir string, -) []DiscoveredFile { - if projectsDir == "" { - return nil - } - - // Canonicalize root once for containment checks. - resolvedRoot, err := filepath.EvalSymlinks(projectsDir) - if err != nil { - return nil - } - - entries, err := os.ReadDir(projectsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if !entry.IsDir() { - continue - } - // Reject symlinked project directory entries. - if entry.Type()&os.ModeSymlink != 0 { - continue - } - - transcriptsDir := filepath.Join( - projectsDir, entry.Name(), "agent-transcripts", - ) - - // Verify the transcripts directory resolves within - // the canonical root. - resolvedDir, err := filepath.EvalSymlinks( - transcriptsDir, - ) - if err != nil { - continue - } - if !isContainedIn(resolvedDir, resolvedRoot) { - continue - } - - transcripts, err := os.ReadDir(transcriptsDir) - if err != nil { - continue - } - - project := DecodeCursorProjectDir(entry.Name()) - if project == "" { - project = "unknown" - } - - // Collect valid transcripts, deduping by basename - // stem. When both .jsonl and .txt exist for the - // same session, prefer .jsonl. - // - // Cursor uses two layouts: - // flat: agent-transcripts/.{txt,jsonl} - // nested: agent-transcripts//.{txt,jsonl} - seen := make(map[string]string) // stem -> path - for _, sf := range transcripts { - if !sf.IsDir() { - // Flat layout: file directly in - // agent-transcripts/. - name := sf.Name() - if !IsCursorTranscriptExt(name) { - continue - } - fullPath := filepath.Join( - transcriptsDir, name, - ) - if !IsRegularFile(fullPath) { - continue - } - cursorAddSeen(seen, name, fullPath) - continue - } - - // Nested layout: agent-transcripts// - // containing .{txt,jsonl}. - subDir := filepath.Join( - transcriptsDir, sf.Name(), - ) - subEntries, err := os.ReadDir(subDir) - if err != nil { - continue - } - dirName := sf.Name() - for _, sub := range subEntries { - if sub.IsDir() { - continue - } - name := sub.Name() - if !IsCursorTranscriptExt(name) { - continue - } - // Only accept files whose stem matches - // the parent directory name, e.g. - // /.jsonl. - stem := strings.TrimSuffix( - name, filepath.Ext(name), - ) - if stem != dirName { - continue - } - fullPath := filepath.Join( - subDir, name, - ) - if !IsRegularFile(fullPath) { - continue - } - cursorAddSeen(seen, name, fullPath) - } - } - for _, path := range seen { - files = append(files, DiscoveredFile{ - Path: path, - Project: project, - Agent: AgentCursor, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindCursorSourceFile finds a Cursor transcript file by -// session UUID. Prefers .jsonl over .txt. -func FindCursorSourceFile( - projectsDir, sessionID string, -) string { - if projectsDir == "" || !IsValidSessionID(sessionID) { - return "" - } - - entries, err := os.ReadDir(projectsDir) - if err != nil { - return "" - } - - resolvedRoot, err := filepath.EvalSymlinks(projectsDir) - if err != nil { - return "" - } - - for _, ext := range []string{".jsonl", ".txt"} { - target := sessionID + ext - for _, entry := range entries { - if !entry.IsDir() { - continue - } - // Nested layout first (matches discovery - // precedence), then flat layout. - candidates := []string{ - filepath.Join( - projectsDir, entry.Name(), - "agent-transcripts", sessionID, target, - ), - filepath.Join( - projectsDir, entry.Name(), - "agent-transcripts", target, - ), - } - for _, candidate := range candidates { - if !IsRegularFile(candidate) { - continue - } - resolved, err := filepath.EvalSymlinks( - candidate, - ) - if err != nil { - continue - } - rel, err := filepath.Rel( - resolvedRoot, resolved, - ) - sep := string(filepath.Separator) - if err != nil || rel == ".." || - strings.HasPrefix(rel, ".."+sep) { - continue - } - return candidate - } - } - } - return "" -} - // geminiProjectsFile holds the structure of // ~/.gemini/projects.json. type geminiProjectsFile struct { @@ -1784,81 +1578,3 @@ func extractIflowBaseSessionID(sessionID string) string { // If we didn't find 5 hyphens, this is not a fork ID return sessionID } - -// DiscoverVibeSessions finds all Vibe session files under the given root directory. -// Vibe stores sessions in: ~/.vibe/logs/session/session_YYYYMMDD_HHMMSS_uuid/ -// Each session directory contains messages.jsonl -func DiscoverVibeSessions(root string) []DiscoveredFile { - var results []DiscoveredFile - - entries, err := os.ReadDir(root) - if err != nil { - return results - } - - for _, entry := range entries { - if !isDirOrSymlink(entry, root) { - continue - } - - // Vibe session directories match pattern: session_YYYYMMDD_HHMMSS_uuid - // The uuid part can contain hyphens - if !strings.HasPrefix(entry.Name(), "session_") || !strings.Contains(entry.Name(), "_") { - continue - } - - sessionDir := filepath.Join(root, entry.Name()) - messagesPath := filepath.Join(sessionDir, "messages.jsonl") - - if info, err := os.Stat(messagesPath); err == nil && !info.IsDir() { - results = append(results, DiscoveredFile{ - Path: messagesPath, - Agent: AgentVibe, - Project: entry.Name(), - }) - } - } - - return results -} - -// FindVibeSourceFile locates a specific Vibe session file by ID. The ID is the -// session_id recorded in meta.json (a uuid), which usually differs from the -// session directory name. Sessions without meta.json fall back to the directory -// name, so a direct path is tried first before scanning meta.json files. -func FindVibeSourceFile(root, sessionID string) string { - // Fast path: sessionID is the directory name (no-meta fallback). - if messagesPath := filepath.Join(root, sessionID, "messages.jsonl"); isVibeMessagesFile(messagesPath) { - return messagesPath - } - - // Otherwise sessionID is a meta.json session_id; scan session - // directories and match on their recorded session_id. - entries, err := os.ReadDir(root) - if err != nil { - return "" - } - for _, entry := range entries { - if !isDirOrSymlink(entry, root) || !strings.HasPrefix(entry.Name(), "session_") { - continue - } - messagesPath := filepath.Join(root, entry.Name(), "messages.jsonl") - if !isVibeMessagesFile(messagesPath) { - continue - } - metaPath := filepath.Join(root, entry.Name(), "meta.json") - if meta, err := parseVibeMetadata(metaPath); err == nil && meta.SessionID == sessionID { - return messagesPath - } - } - return "" -} - -// isVibeMessagesFile reports whether path is an existing regular file. -func isVibeMessagesFile(path string) bool { - info, err := os.Stat(path) - if err != nil || info == nil { - return false - } - return !info.IsDir() -} diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 72f4c98ba..b44bb3c4b 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -141,7 +141,7 @@ func TestDiscoverClaudeProjects(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverClaudeProjects(dir) + files := ClaudeProjectSessionFiles(dir) assertDiscoveredFiles(t, files, tt.wantFiles, AgentClaude) @@ -156,7 +156,7 @@ func TestDiscoverClaudeProjects(t *testing.T) { t.Run("Nonexistent", func(t *testing.T) { dir := filepath.Join(t.TempDir(), "does-not-exist") - files := DiscoverClaudeProjects(dir) + files := ClaudeProjectSessionFiles(dir) assert.Nil(t, files, "expected nil") }) } @@ -307,7 +307,7 @@ func TestFindClaudeSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindClaudeSourceFile(dir, tt.targetID) + got := claudeFindSourceFile(dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -321,8 +321,8 @@ func TestFindClaudeSourceFile(t *testing.T) { dir := t.TempDir() tests := []string{"", "../etc/passwd", "a/b", "a b"} for _, id := range tests { - got := FindClaudeSourceFile(dir, id) - assert.Emptyf(t, got, "FindClaudeSourceFile(%q)", id) + got := claudeFindSourceFile(dir, id) + assert.Emptyf(t, got, "claudeFindSourceFile(%q)", id) } }) } @@ -1048,7 +1048,7 @@ func TestFindClaudeSourceFile_Symlink(t *testing.T) { t.Skipf("symlink not supported: %v", err) } - got := FindClaudeSourceFile(searchDir, "sess-abc") + got := claudeFindSourceFile(searchDir, "sess-abc") require.NotEmpty(t, got, "expected to find session via symlink") assert.Equal(t, linkDir, filepath.Dir(got), "expected path through symlink") @@ -1152,11 +1152,9 @@ func TestDiscoverCursorSessions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCursorSessions(dir) - require.Len(t, files, tt.wantCount, "files count") - for _, f := range files { - assert.Equal(t, AgentCursor, f.Agent, "agent") - } + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, tt.wantCount, "paths count") }) } } @@ -1225,11 +1223,9 @@ func TestDiscoverCursorSessions_NestedLayout(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCursorSessions(dir) - require.Len(t, files, tt.wantCount, "files count") - for _, f := range files { - assert.Equal(t, AgentCursor, f.Agent, "agent") - } + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, tt.wantCount, "paths count") }) } } @@ -1243,10 +1239,11 @@ func TestDiscoverCursorSessions_DedupPrefersJsonl(t *testing.T) { filepath.Join(transcripts, "sess.txt"): "user:\nhi", filepath.Join(transcripts, "sess.jsonl"): `{"role":"user"}`, }) - files := DiscoverCursorSessions(dir) - require.Len(t, files, 1, "files count") - assert.True(t, strings.HasSuffix(files[0].Path, ".jsonl"), - "expected .jsonl path, got %q", files[0].Path) + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, 1, "paths count") + assert.True(t, strings.HasSuffix(paths[0], ".jsonl"), + "expected .jsonl path, got %q", paths[0]) } func TestParseCursorTranscriptRelPath(t *testing.T) { @@ -1321,7 +1318,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess1.txt"): "data", }) - got := FindCursorSourceFile(dir, "sess1") + got := cursorFindSourceFile(dir, "sess1") assert.NotEmpty(t, got, "expected to find .txt file") }) @@ -1330,7 +1327,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess2.jsonl"): "{}", }) - got := FindCursorSourceFile(dir, "sess2") + got := cursorFindSourceFile(dir, "sess2") assert.NotEmpty(t, got, "expected to find .jsonl file") }) @@ -1343,7 +1340,7 @@ func TestFindCursorSourceFile(t *testing.T) { jsonlPath := filepath.Join( dir, cursorTranscripts, "sess3.jsonl", ) - got := FindCursorSourceFile(dir, "sess3") + got := cursorFindSourceFile(dir, "sess3") assert.Equal(t, jsonlPath, got, "(.jsonl preferred)") }) @@ -1352,7 +1349,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess4", "sess4.jsonl"): "{}", }) - got := FindCursorSourceFile(dir, "sess4") + got := cursorFindSourceFile(dir, "sess4") require.NotEmpty(t, got, "expected to find nested .jsonl file") assert.True(t, strings.HasSuffix(got, filepath.Join("sess4", "sess4.jsonl")), "unexpected path %q", got) @@ -1364,14 +1361,14 @@ func TestFindCursorSourceFile(t *testing.T) { filepath.Join(cursorTranscripts, "sess5", "sess5.txt"): "old", filepath.Join(cursorTranscripts, "sess5", "sess5.jsonl"): "new", }) - got := FindCursorSourceFile(dir, "sess5") + got := cursorFindSourceFile(dir, "sess5") assert.True(t, strings.HasSuffix(got, "sess5.jsonl"), "expected .jsonl path, got %q", got) }) t.Run("NotFound", func(t *testing.T) { dir := t.TempDir() - got := FindCursorSourceFile(dir, "nonexistent") + got := cursorFindSourceFile(dir, "nonexistent") assert.Empty(t, got, "expected empty") }) } @@ -1427,7 +1424,7 @@ func TestIsPiSessionFile(t *testing.T) { func TestDiscoverVibeSessionsIntegration(t *testing.T) { // Test discovery with testdata - files := DiscoverVibeSessions("testdata/vibe") + files := discoverVibeTestSessions(t, "testdata/vibe") // Should find all session directories with messages.jsonl require.NotEmpty(t, files) @@ -1445,7 +1442,7 @@ func TestDiscoverVibeSessionsIntegration(t *testing.T) { func TestFindVibeSourceFileIntegration(t *testing.T) { // Test with actual testdata sessionID := "session_basic" - result := FindVibeSourceFile("testdata/vibe", sessionID) + result := findVibeTestSourceFile(t, "testdata/vibe", sessionID) expected := filepath.Join("testdata", "vibe", sessionID, "messages.jsonl") assert.Equal(t, expected, result) diff --git a/internal/parser/fork_test.go b/internal/parser/fork_test.go index 68e7f618e..6d2724730 100644 --- a/internal/parser/fork_test.go +++ b/internal/parser/fork_test.go @@ -14,7 +14,7 @@ import ( func parseTestContent(t *testing.T, name, content string, expectedLen int) []ParseResult { t.Helper() path := createTestFile(t, name, content) - results, err := ParseClaudeSession(path, "proj", "local") + results, err := parseClaudeSession(path, "proj", "local") require.NoError(t, err, "ParseClaudeSession") require.Len(t, results, expectedLen) return results diff --git a/internal/parser/hermes.go b/internal/parser/hermes.go index 7c2d42989..819a25dd5 100644 --- a/internal/parser/hermes.go +++ b/internal/parser/hermes.go @@ -54,17 +54,19 @@ type hermesStateMessage struct { codexMessageItems string } -// ParseHermesArchive parses a Hermes root directory. If a state.db is -// present, it uses that database for session metadata and usage while -// selecting the richest available message stream. Without state.db it -// falls back to the transcript-file parser. -func ParseHermesArchive(root, project, machine string) ([]ParseResult, error) { +// parseArchive parses a Hermes root directory. If a state.db is present, it +// uses that database for session metadata and usage while selecting the richest +// available message stream. Without state.db it falls back to the +// transcript-file parser. It owns the archive on-disk shape (state.db plus the +// sessions transcript directory) for the Hermes provider; the package-level +// entrypoint was folded onto the provider. +func (p *hermesProvider) parseArchive(root, project, machine string) ([]ParseResult, error) { stateDB, sessionsDir, ok := hermesStatePaths(root) if !ok { - return parseHermesTranscriptArchive(root, project, machine) + return p.parseTranscriptArchive(root, project, machine) } - results, err := parseHermesStateDB( + results, err := p.parseStateDB( stateDB, sessionsDir, project, machine, ) if err == nil { @@ -74,12 +76,12 @@ func ParseHermesArchive(root, project, machine string) ([]ParseResult, error) { "hermes: state db parse failed for %s: %v; falling back to transcripts", stateDB, err, ) - return parseHermesTranscriptArchive( + return p.parseTranscriptArchive( sessionsDir, project, machine, ) } -func parseHermesTranscriptArchive( +func (p *hermesProvider) parseTranscriptArchive( root, project, machine string, ) ([]ParseResult, error) { var results []ParseResult @@ -88,7 +90,7 @@ func parseHermesTranscriptArchive( if project != "" { fileProject = project } - sess, msgs, err := ParseHermesSession( + sess, msgs, err := p.parseSession( file.Path, fileProject, machine, ) if err != nil { @@ -103,7 +105,9 @@ func parseHermesTranscriptArchive( return results, nil } -// ParseHermesSession parses a Hermes Agent JSONL session file. +// parseSession parses a Hermes Agent session file. It owns the on-disk shape +// (flat JSONL transcripts plus session_*.json snapshots) for the Hermes +// provider; the package-level entrypoint was folded onto the provider. // // Hermes stores sessions as flat JSONL files in ~/.hermes/sessions/ // with filenames like 20260403_153620_5a3e2ff1.jsonl. @@ -114,7 +118,7 @@ func parseHermesTranscriptArchive( // - Assistant messages: {"role":"assistant", "content":"...", "reasoning":"...", // "finish_reason":"tool_calls|stop", "tool_calls":[...], "timestamp":"..."} // - Tool results: {"role":"tool", "content":"...", "tool_call_id":"...", "timestamp":"..."} -func ParseHermesSession(path, project, machine string) (*ParsedSession, []ParsedMessage, error) { +func (p *hermesProvider) parseSession(path, project, machine string) (*ParsedSession, []ParsedMessage, error) { if strings.HasSuffix(path, ".json") { return parseHermesJSONSession(path, project, machine) } @@ -546,7 +550,7 @@ func hermesStatePaths(root string) (stateDB, sessionsDir string, ok bool) { return "", "", false } -func parseHermesStateDB( +func (p *hermesProvider) parseStateDB( stateDB, sessionsDir, project, machine string, ) ([]ParseResult, error) { conn, err := sql.Open("sqlite3", "file:"+stateDB+"?mode=ro") @@ -580,7 +584,7 @@ func parseHermesStateDB( if _, ok := seen[rawID]; ok { continue } - sess, msgs, err := ParseHermesSession( + sess, msgs, err := p.parseSession( file.Path, file.Project, machine, ) if err != nil { @@ -1043,10 +1047,11 @@ func HermesSessionID(name string) string { return name } -// DiscoverHermesSessions finds Hermes session sources. When a sibling -// state.db exists, it prefers that archive root; otherwise it returns -// transcript files from the sessions directory. -func DiscoverHermesSessions(sessionsDir string) []DiscoveredFile { +// discoverHermesSessions finds Hermes session sources under root. When a +// sibling state.db exists, it prefers that archive root; otherwise it returns +// transcript files from the sessions directory. It is the provider-owned +// discovery body folded off the package-level entrypoint. +func discoverHermesSessions(sessionsDir string) []DiscoveredFile { if sessionsDir == "" { return nil } @@ -1112,8 +1117,10 @@ func discoverHermesTranscriptFiles(sessionsDir string) []DiscoveredFile { return files } -// FindHermesSourceFile finds a Hermes session file by session ID. -func FindHermesSourceFile(sessionsDir, sessionID string) string { +// findHermesSourceFile finds a Hermes transcript file by session ID under +// sessionsDir. It is the provider-owned find-source body folded off the +// package-level entrypoint. +func findHermesSourceFile(sessionsDir, sessionID string) string { if !IsValidSessionID(sessionID) { return "" } diff --git a/internal/parser/hermes_provider.go b/internal/parser/hermes_provider.go new file mode 100644 index 000000000..e486193d4 --- /dev/null +++ b/internal/parser/hermes_provider.go @@ -0,0 +1,699 @@ +package parser + +import ( + "context" + "crypto/sha256" + "database/sql" + "fmt" + "hash" + "io" + "log" + "os" + "path/filepath" + "sort" + "strings" +) + +var _ Provider = (*hermesProvider)(nil) + +type hermesProviderFactory struct { + def AgentDef +} + +func newHermesProviderFactory(def AgentDef) ProviderFactory { + return hermesProviderFactory{def: cloneAgentDef(def)} +} + +func (f hermesProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f hermesProviderFactory) Capabilities() Capabilities { + return hermesProviderCapabilities() +} + +func (f hermesProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &hermesProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: hermesProviderCapabilities(), + Config: cfg, + }, + sources: newHermesSourceSet(cfg.Roots), + } +} + +type hermesProvider struct { + ProviderBase + sources hermesSourceSet +} + +func (p *hermesProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *hermesProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *hermesProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *hermesProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *hermesProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *hermesProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("hermes source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + if filepath.Base(path) == "state.db" { + results, err := p.parseArchive(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + // Mirror the legacy engine's stampHermesArchiveResults: every archive + // session's stored file identity is the state.db path with the + // aggregate (state.db plus transcripts) size and mtime, so a + // transcript-only change still refreshes the archive's freshness. + size, mtime := hermesArchiveEffectiveFileInfo(path) + out := make([]ParseResultOutcome, 0, len(results)) + for i := range results { + results[i].Session.File.Path = path + results[i].Session.File.Size = size + results[i].Session.File.Mtime = mtime + out = append(out, ParseResultOutcome{ + Result: results[i], + DataVersion: DataVersionCurrent, + }) + } + return ParseOutcome{ + Results: out, + ResultSetComplete: true, + ForceReplace: true, + }, nil + } + + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type hermesSource struct { + Root string + Path string +} + +type hermesSourceSet struct { + roots []string +} + +func newHermesSourceSet(roots []string) hermesSourceSet { + return hermesSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s hermesSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, file := range discoverHermesSessions(root) { + source, ok := s.sourceRef(root, file.Path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s hermesSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, hermesWatchRoots(root)...) + } + return WatchPlan{Roots: roots}, nil +} + +func (s hermesSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + allowMissing := jsonlMissingPathFallbackAllowed(req) + if req.WatchRoot != "" { + watchRoot := filepath.Clean(req.WatchRoot) + for _, root := range s.roots { + if !hermesWatchRootMatches(root, watchRoot) { + continue + } + source, ok := s.sourceForChangedPath(root, req.Path, allowMissing) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil + } + for _, root := range s.roots { + source, ok := s.sourceForChangedPath(root, req.Path, allowMissing) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s hermesSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceForPath(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + if stateDB, _, ok := hermesStatePaths(root); ok && + IsValidSessionID(req.RawSessionID) { + found, err := hermesStateDBHasSession(stateDB, req.RawSessionID) + switch { + case err != nil: + // Mirror parseArchive: an unreadable or schema-incompatible + // state.db falls back to transcripts rather than aborting the + // lookup, so a valid transcript session next to a bad state.db + // stays resolvable for resync. + log.Printf( + "hermes: state db lookup failed for %s: %v; "+ + "falling back to transcripts", stateDB, err, + ) + case !found: + continue + default: + if source, ok := s.sourceRef(root, stateDB); ok { + return source, true, nil + } + } + } + transcriptRoot := hermesTranscriptRoot(root) + path := findHermesSourceFile(transcriptRoot, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func hermesStateDBHasSession(stateDB string, rawID string) (bool, error) { + conn, err := sql.Open("sqlite3", "file:"+stateDB+"?mode=ro") + if err != nil { + return false, fmt.Errorf("open hermes state db: %w", err) + } + defer conn.Close() + + var found int + err = conn.QueryRow( + "SELECT 1 FROM sessions WHERE id = ? LIMIT 1", + rawID, + ).Scan(&found) + if err == nil { + return true, nil + } + if err == sql.ErrNoRows { + return false, nil + } + return false, fmt.Errorf("query hermes session %s: %w", rawID, err) +} + +func (s hermesSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("hermes source path unavailable") + } + if filepath.Base(path) == "state.db" { + return hermesArchiveFingerprint(source, path) + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash, err := hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Hash: hash, + }, nil +} + +func (s hermesSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case hermesSource: + return src.Path, src.Path != "" + case *hermesSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + for _, root := range s.roots { + if ref, ok := s.sourceForPath(root, candidate); ok { + src := ref.Opaque.(hermesSource) + return src.Path, true + } + } + } + return "", false +} + +func (s hermesSourceSet) sourceForPath(root, path string) (SourceRef, bool) { + return s.sourceForChangedPath(root, path, false) +} + +func (s hermesSourceSet) sourceForChangedPath( + root, + path string, + allowMissing bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if stateDB, sessionsDir, ok := hermesStatePaths(root); ok { + if samePath(path, stateDB) || hermesPathInTranscriptDir(sessionsDir, path) { + return hermesArchiveSourceRef(root, stateDB) + } + return SourceRef{}, false + } + if allowMissing { + if stateDB, sessionsDir, ok := hermesArchivePathsForEvent(root, path); ok && + (samePath(path, stateDB) || hermesPathInTranscriptDir(sessionsDir, path)) { + return hermesArchiveSourceRef(root, stateDB) + } + transcriptRoot := hermesTranscriptRoot(root) + if hermesPathInTranscriptDir(transcriptRoot, path) { + return hermesTranscriptSourceRef(root, path) + } + } + return s.sourceRef(root, path) +} + +func (s hermesSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if stateDB, _, ok := hermesStatePaths(root); ok && samePath(path, stateDB) { + return hermesArchiveSourceRef(root, stateDB) + } + transcriptRoot := hermesTranscriptRoot(root) + if !hermesPathInTranscriptDir(transcriptRoot, path) || !IsRegularFile(path) { + return SourceRef{}, false + } + return hermesTranscriptSourceRef(root, path) +} + +func hermesArchiveSourceRef(root, stateDB string) (SourceRef, bool) { + root = filepath.Clean(root) + stateDB = filepath.Clean(stateDB) + return SourceRef{ + Provider: AgentHermes, + Key: stateDB, + DisplayPath: stateDB, + FingerprintKey: stateDB, + Opaque: hermesSource{ + Root: root, + Path: stateDB, + }, + }, true +} + +func hermesTranscriptSourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + return SourceRef{ + Provider: AgentHermes, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: hermesSource{ + Root: root, + Path: path, + }, + }, true +} + +func hermesWatchRoots(root string) []WatchRoot { + root = filepath.Clean(root) + if stateDB, sessionsDir, ok := hermesArchiveRootPaths(root); ok { + watchRoots := []WatchRoot{{ + Path: filepath.Dir(stateDB), + Recursive: false, + IncludeGlobs: []string{"state.db"}, + DebounceKey: string(AgentHermes) + ":archive:" + root, + }} + watchRoots = append(watchRoots, WatchRoot{ + Path: sessionsDir, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "session_*.json"}, + DebounceKey: string(AgentHermes) + ":sessions:" + root, + }) + return watchRoots + } + return []WatchRoot{{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"state.db", "*.jsonl", "session_*.json"}, + DebounceKey: string(AgentHermes) + ":sessions:" + root, + }} +} + +func ResolveHermesWatchRoots(root string) []string { + root = filepath.Clean(root) + if _, sessionsDir, ok := hermesArchiveRootPaths(root); ok { + return []string{sessionsDir} + } + return []string{root} +} + +func ResolveHermesShallowWatchRoots(root string) []string { + root = filepath.Clean(root) + if stateDB, _, ok := hermesArchiveRootPaths(root); ok { + return []string{filepath.Dir(stateDB)} + } + return nil +} + +func hermesWatchRootMatches(root, watchRoot string) bool { + root = filepath.Clean(root) + watchRoot = filepath.Clean(watchRoot) + if samePath(root, watchRoot) { + return true + } + if stateDB, sessionsDir, ok := hermesArchiveRootPaths(root); ok { + return samePath(watchRoot, filepath.Dir(stateDB)) || + samePath(watchRoot, sessionsDir) + } + switch filepath.Base(root) { + case "state.db": + return samePath(watchRoot, filepath.Dir(root)) || + samePath(watchRoot, filepath.Join(filepath.Dir(root), "sessions")) + case "sessions": + return samePath(watchRoot, filepath.Dir(root)) + default: + return samePath(watchRoot, filepath.Join(root, "sessions")) + } +} + +func hermesArchivePathsForEvent(root, path string) (stateDB, sessionsDir string, ok bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + switch { + case filepath.Base(root) == "state.db": + stateDB = root + sessionsDir = filepath.Join(filepath.Dir(root), "sessions") + case filepath.Base(root) == "sessions": + stateDB = filepath.Join(filepath.Dir(root), "state.db") + sessionsDir = root + case samePath(path, filepath.Join(root, "state.db")) || + IsRegularFile(filepath.Join(root, "state.db")): + stateDB = filepath.Join(root, "state.db") + sessionsDir = filepath.Join(root, "sessions") + default: + return "", "", false + } + return stateDB, sessionsDir, true +} + +func hermesArchiveRootPaths(root string) (stateDB, sessionsDir string, ok bool) { + root = filepath.Clean(root) + if stateDB, sessionsDir, ok := hermesStatePaths(root); ok { + return stateDB, sessionsDir, true + } + switch filepath.Base(root) { + case "state.db": + return root, filepath.Join(filepath.Dir(root), "sessions"), true + case "sessions": + return filepath.Join(filepath.Dir(root), "state.db"), root, true + default: + stateDB = filepath.Join(root, "state.db") + sessionsDir = filepath.Join(root, "sessions") + if IsRegularFile(stateDB) { + return stateDB, sessionsDir, true + } + if info, err := os.Stat(sessionsDir); err == nil && info.IsDir() { + return stateDB, sessionsDir, true + } + return "", "", false + } +} + +func hermesTranscriptRoot(root string) string { + root = filepath.Clean(root) + if _, sessionsDir, ok := hermesStatePaths(root); ok { + return sessionsDir + } + childSessions := filepath.Join(root, "sessions") + if info, err := os.Stat(childSessions); err == nil && info.IsDir() { + return childSessions + } + return root +} + +func hermesPathInTranscriptDir(dir, path string) bool { + dir = filepath.Clean(dir) + path = filepath.Clean(path) + if !samePath(filepath.Dir(path), dir) { + return false + } + name := filepath.Base(path) + if strings.HasSuffix(name, ".jsonl") { + return true + } + return strings.HasSuffix(name, ".json") && strings.HasPrefix(name, "session_") +} + +func hermesArchiveFingerprint(source SourceRef, stateDB string) (SourceFingerprint, error) { + stateInfo, err := os.Stat(stateDB) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", stateDB, err) + } + if stateInfo.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", stateDB) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString( + source.FingerprintKey, + source.Key, + stateDB, + ), + Size: stateInfo.Size(), + MTimeNS: stateInfo.ModTime().UnixNano(), + } + h := sha256.New() + if err := addHermesFingerprintPart(h, "state", stateDB, stateInfo); err != nil { + return SourceFingerprint{}, err + } + _, sessionsDir, _ := hermesStatePaths(stateDB) + for _, file := range discoverHermesTranscriptFiles(sessionsDir) { + info, err := os.Stat(file.Path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", file.Path, err) + } + fingerprint.Size += info.Size() + if mtime := info.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if err := addHermesFingerprintPart(h, "transcript", file.Path, info); err != nil { + return SourceFingerprint{}, err + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +// hermesArchiveEffectiveFileInfo returns the aggregate size and mtime of a +// Hermes archive: the state.db plus every transcript file in its sessions +// directory. It reproduces the legacy engine's hermesArchiveEffectiveInfo so a +// transcript-only change shifts the stored archive freshness even though the +// state.db itself is unchanged. The transcript set matches the legacy +// hermesArchiveTranscriptFiles: every .jsonl and session_*.json file directly +// under the sessions directory, without the .jsonl/.json dedup used elsewhere. +func hermesArchiveEffectiveFileInfo(stateDB string) (int64, int64) { + info, err := os.Stat(stateDB) + if err != nil { + return 0, 0 + } + size := info.Size() + mtime := info.ModTime().UnixNano() + _, sessionsDir, ok := hermesStatePaths(stateDB) + if !ok { + return size, mtime + } + for _, path := range hermesArchiveTranscriptFiles(sessionsDir) { + fileInfo, err := os.Stat(path) + if err != nil || fileInfo == nil || fileInfo.IsDir() { + continue + } + size += fileInfo.Size() + if fileMtime := fileInfo.ModTime().UnixNano(); fileMtime > mtime { + mtime = fileMtime + } + } + return size, mtime +} + +// hermesArchiveTranscriptFiles lists every .jsonl and session_*.json file +// directly under sessionsDir, sorted by path. It mirrors the legacy engine +// helper of the same name so the provider's effective-info aggregation matches +// historical behavior exactly. +func hermesArchiveTranscriptFiles(sessionsDir string) []string { + if sessionsDir == "" { + return nil + } + entries, err := os.ReadDir(sessionsDir) + if err != nil { + return nil + } + paths := make([]string, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() { + continue + } + name := entry.Name() + if strings.HasSuffix(name, ".jsonl") || + strings.HasPrefix(name, "session_") && strings.HasSuffix(name, ".json") { + paths = append(paths, filepath.Join(sessionsDir, name)) + } + } + sort.Strings(paths) + return paths +} + +func addHermesFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func hermesProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Relationships: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/hermes_provider_test.go b/internal/parser/hermes_provider_test.go new file mode 100644 index 000000000..ba9d2a160 --- /dev/null +++ b/internal/parser/hermes_provider_test.go @@ -0,0 +1,502 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHermesProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentHermes) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestHermesProviderTranscriptSourceMethods(t *testing.T) { + root := t.TempDir() + jsonlPath := filepath.Join(root, "child.jsonl") + jsonPath := filepath.Join(root, "session_jsononly.json") + writeSourceFile(t, jsonlPath, hermesProviderJSONLFixture("jsonl question")) + writeSourceFile(t, jsonPath, hermesProviderJSONFixture("json question")) + writeSourceFile(t, filepath.Join(root, "scratch.json"), "{}\n") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db", "*.jsonl", "session_*.json"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{jsonlPath, jsonPath}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~hermes:child", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, jsonlPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "jsononly", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, jsonPath, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, jsonPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: jsonlPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, jsonlPath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(jsonlPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: jsonlPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, jsonlPath, changed[0].DisplayPath) + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "scratch.json"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) +} + +func TestHermesProviderStateDBSourceMethods(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + createHermesStateDB(t, root) + transcriptPath := filepath.Join(sessionsDir, "session_child.json") + writeSourceFile(t, transcriptPath, hermesProviderJSONFixture("transcript question")) + stateDB := filepath.Join(root, "state.db") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, stateDB, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~hermes:child", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, stateDB, found.DisplayPath) + + stateInfo, err := os.Stat(stateDB) + require.NoError(t, err) + transcriptInfo, err := os.Stat(transcriptPath) + require.NoError(t, err) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, stateDB, fingerprint.Key) + assert.Equal(t, stateInfo.Size()+transcriptInfo.Size(), fingerprint.Size) + assert.Equal( + t, + max(stateInfo.ModTime().UnixNano(), transcriptInfo.ModTime().UnixNano()), + fingerprint.MTimeNS, + ) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + }{ + {name: "state db", path: stateDB}, + {name: "archive transcript", path: transcriptPath}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tc.path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) + } + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + + require.NoError(t, os.Remove(transcriptPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: transcriptPath, EventKind: "remove", WatchRoot: sessionsDir}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + + require.NoError(t, os.Remove(stateDB)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) +} + +func TestHermesProviderArchiveWatchRoots(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + createHermesStateDB(t, root) + stateDB := filepath.Join(root, "state.db") + + for _, tc := range []struct { + name string + configRoot string + }{ + {name: "archive parent", configRoot: root}, + {name: "sessions directory", configRoot: sessionsDir}, + {name: "state db file", configRoot: stateDB}, + } { + t.Run(tc.name, func(t *testing.T) { + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{tc.configRoot}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) + } +} + +func TestHermesProviderArchiveWatchRootsBeforeArchiveComplete(t *testing.T) { + t.Run("state db exists before sessions directory", func(t *testing.T) { + root := t.TempDir() + createHermesStateDB(t, root) + stateDB := filepath.Join(root, "state.db") + sessionsDir := filepath.Join(root, "sessions") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) + + t.Run("direct state db root before file exists", func(t *testing.T) { + root := t.TempDir() + stateDB := filepath.Join(root, "state.db") + sessionsDir := filepath.Join(root, "sessions") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{stateDB}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + createHermesStateDB(t, root) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) + + t.Run("sessions directory root before state db exists", func(t *testing.T) { + root := t.TempDir() + stateDB := filepath.Join(root, "state.db") + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{sessionsDir}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"state.db"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, sessionsDir, plan.Roots[1].Path) + assert.True(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"*.jsonl", "session_*.json"}, plan.Roots[1].IncludeGlobs) + + createHermesStateDB(t, root) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: stateDB, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, stateDB, changed[0].DisplayPath) + }) +} + +func TestHermesProviderParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "child.jsonl") + writeSourceFile(t, sourcePath, hermesProviderJSONLFixture("parse question")) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "hermes:child", result.Result.Session.ID) + assert.Equal(t, AgentHermes, result.Result.Session.Agent) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sourcePath, result.Result.Session.File.Path) + assert.Equal(t, "abc123", result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestHermesProviderParseStateDB(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + createHermesStateDB(t, root) + transcriptPath := filepath.Join(sessionsDir, "session_child.json") + writeSourceFile( + t, + transcriptPath, + hermesProviderJSONFixture("archive transcript"), + ) + stateDB := filepath.Join(root, "state.db") + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: stateDB, Hash: "archive-hash"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.True(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "hermes:child", result.Result.Session.ID) + assert.Equal(t, "hermes:parent", result.Result.Session.ParentSessionID) + assert.Equal(t, RelContinuation, result.Result.Session.RelationshipType) + assert.Equal(t, "Child Session", result.Result.Session.SessionName) + assert.Equal(t, "hermes-state-db", result.Result.Session.SourceVersion) + assert.Equal(t, "devbox", result.Result.Session.Machine) + require.Len(t, result.Result.UsageEvents, 1) + assert.Len(t, result.Result.Messages, 2) + + // The provider reproduces the legacy engine's stampHermesArchiveResults: + // every archive session's stored file identity is the state.db path with + // the aggregate (state.db plus transcripts) size and mtime, so a + // transcript-only change still refreshes the archive's freshness. + stateInfo, err := os.Stat(stateDB) + require.NoError(t, err) + transcriptInfo, err := os.Stat(transcriptPath) + require.NoError(t, err) + assert.Equal(t, stateDB, result.Result.Session.File.Path) + assert.Equal( + t, + stateInfo.Size()+transcriptInfo.Size(), + result.Result.Session.File.Size, + ) + assert.Equal( + t, + max(stateInfo.ModTime().UnixNano(), transcriptInfo.ModTime().UnixNano()), + result.Result.Session.File.Mtime, + ) +} + +func TestHermesProviderFindSourceDoesNotReturnStateDBForMissingRawID(t *testing.T) { + root := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(root, "sessions"), 0o755)) + createHermesStateDB(t, root) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "missing-valid-id", + }) + + require.NoError(t, err) + assert.False(t, ok) + assert.Empty(t, source) +} + +func TestHermesProviderFindSourceFallsBackToTranscriptWhenStateDBUnreadable(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + + // A present-but-unreadable state.db: hermesStateDBHasSession opens it + // lazily, then errors on the first query because the bytes are not a + // SQLite database. parseArchive logs and falls back to transcripts in this + // case, so FindSource must do the same rather than aborting the lookup. + stateDB := filepath.Join(root, "state.db") + writeSourceFile(t, stateDB, "not a sqlite database") + + transcriptPath := filepath.Join(sessionsDir, "freshchild.jsonl") + writeSourceFile(t, transcriptPath, hermesProviderJSONLFixture("transcript question")) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "freshchild", + }) + + require.NoError(t, err, "unreadable state.db must not abort transcript lookup") + require.True(t, ok, "valid transcript next to a bad state.db must be found") + assert.Equal(t, transcriptPath, source.DisplayPath) +} + +func hermesProviderJSONLFixture(firstMessage string) string { + return `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}` + "\n" + + `{"role":"user","content":"` + firstMessage + `","timestamp":"2026-05-14T10:01:00.000000"}` + "\n" + + `{"role":"assistant","content":"Done.","timestamp":"2026-05-14T10:02:00.000000"}` + "\n" +} + +func hermesProviderJSONFixture(firstMessage string) string { + return `{ + "platform":"cli", + "session_start":"2026-05-14T10:00:00Z", + "last_updated":"2026-05-14T10:02:00Z", + "messages":[ + {"role":"user","content":"` + firstMessage + `","timestamp":"2026-05-14T10:01:00Z"}, + {"role":"assistant","content":"Done.","timestamp":"2026-05-14T10:02:00Z"} + ] + }` +} diff --git a/internal/parser/hermes_test.go b/internal/parser/hermes_test.go index feae3e089..8c523c55a 100644 --- a/internal/parser/hermes_test.go +++ b/internal/parser/hermes_test.go @@ -13,6 +13,57 @@ import ( "github.com/stretchr/testify/require" ) +// newHermesTestProvider builds a concrete hermesProvider for the given roots so +// package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods. +func newHermesTestProvider(t *testing.T, roots ...string) *hermesProvider { + t.Helper() + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + hp, ok := provider.(*hermesProvider) + require.True(t, ok) + return hp +} + +// parseHermesTestSession parses a Hermes transcript at path through the +// provider-owned parse method, replacing the removed package-level +// ParseHermesSession entrypoint. +func parseHermesTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newHermesTestProvider(t).parseSession(path, project, machine) +} + +// parseHermesTestArchive parses a Hermes archive root through the provider-owned +// archive method, replacing the removed package-level ParseHermesArchive +// entrypoint. +func parseHermesTestArchive( + t *testing.T, root, project, machine string, +) ([]ParseResult, error) { + t.Helper() + return newHermesTestProvider(t).parseArchive(root, project, machine) +} + +// discoverHermesTestSessions discovers Hermes sources under root through the +// provider source set, replacing the removed package-level +// DiscoverHermesSessions entrypoint. +func discoverHermesTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + return discoverHermesSessions(root) +} + +// findHermesTestSourceFile resolves a Hermes session ID to a transcript path +// through the provider source set, replacing the removed package-level +// FindHermesSourceFile entrypoint. +func findHermesTestSourceFile(t *testing.T, sessionsDir, sessionID string) string { + t.Helper() + return findHermesSourceFile(sessionsDir, sessionID) +} + func runHermesJSONLTest( t *testing.T, filename, content string, ) (*ParsedSession, []ParsedMessage) { @@ -21,8 +72,8 @@ func runHermesJSONLTest( filename = "20260403_153620_5a3e2ff1.jsonl" } path := createTestFile(t, filename, content) - sess, msgs, err := ParseHermesSession( - path, "", "local", + sess, msgs, err := parseHermesTestSession( + t, path, "", "local", ) require.NoError(t, err) return sess, msgs @@ -36,8 +87,8 @@ func runHermesJSONTest( filename = "session_20260403_153620_5a3e2ff1.json" } path := createTestFile(t, filename, content) - sess, msgs, err := ParseHermesSession( - path, "", "local", + sess, msgs, err := parseHermesTestSession( + t, path, "", "local", ) require.NoError(t, err) return sess, msgs @@ -47,7 +98,10 @@ func createHermesStateDB(t *testing.T, root string) { t.Helper() db, err := sql.Open("sqlite3", filepath.Join(root, "state.db")) require.NoError(t, err) - t.Cleanup(func() { _ = db.Close() }) + // Close the setup handle when this helper returns rather than at test + // cleanup. Tests delete state.db mid-run to exercise deletion handling, and + // Windows refuses to remove a file still held open by this process. + defer func() { _ = db.Close() }() _, err = db.Exec(` CREATE TABLE sessions ( id TEXT PRIMARY KEY, @@ -135,7 +189,7 @@ func TestParseHermesArchive_StateDBMetadataUsageAndTranscriptChoice( 0o644, )) - results, err := ParseHermesArchive(root, "", "local") + results, err := parseHermesTestArchive(t, root, "", "local") require.NoError(t, err) require.Len(t, results, 1) @@ -183,7 +237,7 @@ func TestParseHermesArchive_FallsBackToTranscriptsWhenStateDBUnreadable( 0o644, )) - results, err := ParseHermesArchive(root, "override-project", "local") + results, err := parseHermesTestArchive(t, root, "override-project", "local") require.NoError(t, err) require.Len(t, results, 1) @@ -211,7 +265,7 @@ func TestParseHermesArchive_UsesStateMessagesWhenJSONLIsLowerQuality( 0o644, )) - results, err := ParseHermesArchive(root, "", "local") + results, err := parseHermesTestArchive(t, root, "", "local") require.NoError(t, err) require.Len(t, results, 1) @@ -240,7 +294,7 @@ func TestParseHermesArchiveIncludesTranscriptsMissingFromStateDB( 0o644, )) - results, err := ParseHermesArchive(root, "", "local") + results, err := parseHermesTestArchive(t, root, "", "local") require.NoError(t, err) require.Len(t, results, 2) @@ -408,7 +462,7 @@ func TestDiscoverHermesSessionsFindsTranscriptOnlyRoot( path := filepath.Join(sessionsDir, "session_child.json") require.NoError(t, os.WriteFile(path, []byte(`{"messages":[]}`), 0o644)) - files := DiscoverHermesSessions(root) + files := discoverHermesTestSessions(t, root) require.Len(t, files, 1) assert.Equal(t, path, files[0].Path) } @@ -530,8 +584,8 @@ func TestParseHermesSession_JSONL_ExplicitProject(t *testing.T) { path := createTestFile( t, "20260403_153620_abc.jsonl", content, ) - sess, _, err := ParseHermesSession( - path, "my-project", "local", + sess, _, err := parseHermesTestSession( + t, path, "my-project", "local", ) require.NoError(t, err) require.NotNil(t, sess) @@ -638,8 +692,8 @@ func TestParseHermesSession_JSONL_FirstMessageTruncation(t *testing.T) { func TestParseHermesSession_JSONL_Errors(t *testing.T) { t.Run("missing file", func(t *testing.T) { - _, _, err := ParseHermesSession( - "/nonexistent/file.jsonl", "", "local", + _, _, err := parseHermesTestSession( + t, "/nonexistent/file.jsonl", "", "local", ) assert.Error(t, err) }) @@ -767,8 +821,8 @@ func TestParseHermesSession_JSON_MessageTimestampsExtendBounds( func TestParseHermesSession_JSON_Errors(t *testing.T) { t.Run("missing file", func(t *testing.T) { - _, _, err := ParseHermesSession( - "/nonexistent/file.json", "", "local", + _, _, err := parseHermesTestSession( + t, "/nonexistent/file.json", "", "local", ) assert.Error(t, err) }) @@ -777,7 +831,7 @@ func TestParseHermesSession_JSON_Errors(t *testing.T) { path := createTestFile( t, "session_bad.json", `"just a string"`, ) - _, _, err := ParseHermesSession(path, "", "local") + _, _, err := parseHermesTestSession(t, path, "", "local") assert.Error(t, err) assert.Contains(t, err.Error(), "invalid JSON") }) @@ -1025,7 +1079,7 @@ func TestDiscoverHermesSessions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverHermesSessions(dir) + files := discoverHermesTestSessions(t, dir) assertDiscoveredFiles( t, files, tt.wantFiles, AgentHermes, ) @@ -1033,13 +1087,13 @@ func TestDiscoverHermesSessions(t *testing.T) { } t.Run("empty string dir", func(t *testing.T) { - files := DiscoverHermesSessions("") + files := discoverHermesTestSessions(t, "") assert.Nil(t, files) }) t.Run("nonexistent dir", func(t *testing.T) { - files := DiscoverHermesSessions( - filepath.Join(t.TempDir(), "nope"), + files := discoverHermesTestSessions( + t, filepath.Join(t.TempDir(), "nope"), ) assert.Nil(t, files) }) @@ -1088,7 +1142,7 @@ func TestFindHermesSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindHermesSourceFile(dir, tt.sessionID) + got := findHermesTestSourceFile(t, dir, tt.sessionID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -1105,7 +1159,7 @@ func TestFindHermesSourceFile(t *testing.T) { "20260403_aaa.jsonl": "{}", }) for _, id := range []string{"", "../etc/passwd", "a/b", "a b"} { - got := FindHermesSourceFile(dir, id) + got := findHermesTestSourceFile(t, dir, id) if got != "" { t.Errorf( "FindHermesSourceFile(%q) = %q, want empty", @@ -1172,8 +1226,13 @@ func TestHermesRegistryEntry(t *testing.T) { assert.Equal(t, "hermes:", found.IDPrefix) assert.True(t, found.FileBased) assert.Contains(t, found.DefaultDirs, ".hermes/sessions") - assert.NotNil(t, found.DiscoverFunc) - assert.NotNil(t, found.FindSourceFunc) + // Hermes is provider-authoritative: discovery and source lookup live on the + // hermesProvider, not on legacy AgentDef hooks. The watch-root resolvers + // stay because they are provider-owned and consumed by watcher setup. + assert.Nil(t, found.DiscoverFunc) + assert.Nil(t, found.FindSourceFunc) + assert.NotNil(t, found.WatchRootsFunc) + assert.NotNil(t, found.ShallowWatchRootsFunc) } // --- File info --- @@ -1190,7 +1249,7 @@ func TestParseHermesSession_FileInfo(t *testing.T) { info, err := os.Stat(path) require.NoError(t, err) - sess, _, err := ParseHermesSession(path, "", "local") + sess, _, err := parseHermesTestSession(t, path, "", "local") require.NoError(t, err) require.NotNil(t, sess) diff --git a/internal/parser/openhands.go b/internal/parser/openhands.go index 0945e015b..c2644ad6c 100644 --- a/internal/parser/openhands.go +++ b/internal/parser/openhands.go @@ -7,7 +7,6 @@ import ( "fmt" "os" "path/filepath" - "sort" "strings" "time" @@ -20,82 +19,6 @@ const ( openHandsObservationEvent = "ObservationEvent" ) -// DiscoverOpenHandsSessions finds OpenHands CLI conversation -// directories under ~/.openhands/conversations. -func DiscoverOpenHandsSessions( - conversationsDir string, -) []DiscoveredFile { - entries, err := os.ReadDir(conversationsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if !entry.IsDir() || !IsValidSessionID(entry.Name()) { - continue - } - sessionDir := filepath.Join( - conversationsDir, entry.Name(), - ) - if !isOpenHandsSessionDir(sessionDir) { - continue - } - files = append(files, DiscoveredFile{ - Path: sessionDir, - Agent: AgentOpenHands, - }) - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindOpenHandsSourceFile locates an OpenHands conversation -// directory by its raw session ID. -func FindOpenHandsSourceFile( - conversationsDir, rawID string, -) string { - if conversationsDir == "" || !IsValidSessionID(rawID) { - return "" - } - - candidates := []string{rawID} - stripped := strings.ReplaceAll(rawID, "-", "") - if stripped != rawID { - candidates = append(candidates, stripped) - } - - for _, cand := range candidates { - sessionDir := filepath.Join(conversationsDir, cand) - if isOpenHandsSessionDir(sessionDir) { - return sessionDir - } - } - - entries, err := os.ReadDir(conversationsDir) - if err != nil { - return "" - } - for _, entry := range entries { - if !entry.IsDir() { - continue - } - sessionDir := filepath.Join( - conversationsDir, entry.Name(), - ) - if !isOpenHandsSessionDir(sessionDir) { - continue - } - if normalizeOpenHandsSessionID(entry.Name()) == normalizeOpenHandsSessionID(rawID) { - return sessionDir - } - } - return "" -} - // OpenHandsSnapshot computes synthetic file metadata for an // OpenHands conversation directory by hashing the relevant // metadata of base_state.json, TASKS.json, and events/*.json. @@ -184,9 +107,9 @@ func OpenHandsSnapshot(path string) (FileInfo, error) { }, nil } -// ParseOpenHandsSession parses a single OpenHands CLI -// conversation directory into a session and messages. -func ParseOpenHandsSession( +// parseSession parses a single OpenHands CLI conversation +// directory into a session and messages. +func (p *openHandsProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, error) { sessionDir, err := normalizeOpenHandsSessionPath(path) diff --git a/internal/parser/openhands_provider.go b/internal/parser/openhands_provider.go new file mode 100644 index 000000000..42cc2dcdb --- /dev/null +++ b/internal/parser/openhands_provider.go @@ -0,0 +1,414 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*openHandsProvider)(nil) + +type openHandsProviderFactory struct { + def AgentDef +} + +func newOpenHandsProviderFactory(def AgentDef) ProviderFactory { + return openHandsProviderFactory{def: cloneAgentDef(def)} +} + +func (f openHandsProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f openHandsProviderFactory) Capabilities() Capabilities { + return openHandsProviderCapabilities() +} + +func (f openHandsProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &openHandsProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: openHandsProviderCapabilities(), + Config: cfg, + }, + sources: newOpenHandsSourceSet(cfg.Roots), + } +} + +type openHandsProvider struct { + ProviderBase + sources openHandsSourceSet +} + +func (p *openHandsProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *openHandsProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *openHandsProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *openHandsProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = ProviderFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *openHandsProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *openHandsProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("openhands source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type openHandsSource struct { + Root string + Path string +} + +type openHandsSourceSet struct { + roots []string +} + +func newOpenHandsSourceSet(roots []string) openHandsSourceSet { + return openHandsSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s openHandsSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + entries, err := os.ReadDir(root) + if err != nil { + continue + } + for _, entry := range entries { + if !entry.IsDir() || !IsValidSessionID(entry.Name()) { + continue + } + sessionDir := filepath.Join(root, entry.Name()) + source, ok := s.sourceRef(root, sessionDir) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s openHandsSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: false, + DebounceKey: string(AgentOpenHands) + ":dir:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s openHandsSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + if !s.hasRoot(root) { + return nil, nil + } + source, ok := s.sourceForPathInRoot(root, req.Path) + if !ok { + return nil, nil + } + return []SourceRef{source}, nil + } + for _, root := range s.roots { + source, ok := s.sourceForPathInRoot(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s openHandsSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + if source, ok := s.sourceForPath(path); ok { + return source, true, nil + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.sessionDirForID(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// sessionDirForID locates an OpenHands conversation directory under +// root by its raw session ID. It first tries the raw ID and its +// dash-stripped form as literal directory names, then falls back to +// matching any session directory whose normalized ID equals the +// normalized raw ID. +func (s openHandsSourceSet) sessionDirForID(root, rawID string) string { + if root == "" || !IsValidSessionID(rawID) { + return "" + } + + candidates := []string{rawID} + stripped := strings.ReplaceAll(rawID, "-", "") + if stripped != rawID { + candidates = append(candidates, stripped) + } + for _, cand := range candidates { + sessionDir := filepath.Join(root, cand) + if isOpenHandsSessionDir(sessionDir) { + return sessionDir + } + } + + entries, err := os.ReadDir(root) + if err != nil { + return "" + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + sessionDir := filepath.Join(root, entry.Name()) + if !isOpenHandsSessionDir(sessionDir) { + continue + } + if normalizeOpenHandsSessionID(entry.Name()) == + normalizeOpenHandsSessionID(rawID) { + return sessionDir + } + } + return "" +} + +func (s openHandsSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("openhands source path unavailable") + } + snapshot, err := OpenHandsSnapshot(path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: snapshot.Size, + MTimeNS: snapshot.Mtime, + Hash: snapshot.Hash, + }, nil +} + +func (s openHandsSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case openHandsSource: + return src.Path, src.Path != "" + case *openHandsSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if ref, ok := s.sourceForPath(candidate); ok { + src := ref.Opaque.(openHandsSource) + return src.Path, true + } + } + return "", false +} + +func (s openHandsSourceSet) sourceForPath(path string) (SourceRef, bool) { + for _, root := range s.roots { + if source, ok := s.sourceForPathInRoot(root, path); ok { + return source, true + } + } + return SourceRef{}, false +} + +func (s openHandsSourceSet) sourceForPathInRoot( + root string, + path string, +) (SourceRef, bool) { + sessionDir, ok := openHandsSessionDirForPath(root, path) + if !ok { + return SourceRef{}, false + } + return s.sourceRef(root, sessionDir) +} + +func (s openHandsSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !isOpenHandsSessionDir(path) { + return SourceRef{}, false + } + rel, err := filepath.Rel(root, path) + if err != nil || rel == "." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) || + strings.Contains(rel, string(filepath.Separator)) { + return SourceRef{}, false + } + if !IsValidSessionID(rel) { + return SourceRef{}, false + } + return SourceRef{ + Provider: AgentOpenHands, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: openHandsSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s openHandsSourceSet) hasRoot(root string) bool { + for _, configured := range s.roots { + if samePath(root, configured) { + return true + } + } + return false +} + +func openHandsSessionDirForPath(root, path string) (string, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, err := filepath.Rel(root, path) + if err != nil || rel == "." || rel == ".." || + strings.HasPrefix(rel, ".."+string(filepath.Separator)) { + return "", false + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) == 0 || !IsValidSessionID(parts[0]) { + return "", false + } + switch len(parts) { + case 1: + case 2: + if parts[1] != "base_state.json" && parts[1] != "TASKS.json" { + return "", false + } + case 3: + if parts[1] != "events" || filepath.Ext(parts[2]) != ".json" { + return "", false + } + default: + return "", false + } + return filepath.Join(root, parts[0]), true +} + +func openHandsProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/openhands_provider_test.go b/internal/parser/openhands_provider_test.go new file mode 100644 index 000000000..5e07f6ba1 --- /dev/null +++ b/internal/parser/openhands_provider_test.go @@ -0,0 +1,209 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOpenHandsProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentOpenHands) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestOpenHandsProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "086c7ecf-6cb7-46b6-9fbc-b900358d1247" + dirName := "086c7ecf6cb746b69fbcb900358d1247" + sessionDir := openHandsProviderWriteSession( + t, root, dirName, sessionID, "provider question", + ) + openHandsProviderWriteInvalidSession(t, root, "missing-events") + writeSourceFile(t, filepath.Join(root, "notes.txt"), "{}\n") + + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.NotEmpty(t, plan.Roots[0].DebounceKey) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentOpenHands, discovered[0].Provider) + assert.Equal(t, sessionDir, discovered[0].Key) + assert.Equal(t, sessionDir, discovered[0].DisplayPath) + assert.Equal(t, sessionDir, discovered[0].FingerprintKey) + assert.Empty(t, discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~openhands:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: dirName, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: sessionDir, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) + + snapshot, err := OpenHandsSnapshot(sessionDir) + require.NoError(t, err) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sessionDir, fingerprint.Key) + assert.Equal(t, snapshot.Size, fingerprint.Size) + assert.Equal(t, snapshot.Mtime, fingerprint.MTimeNS) + assert.Equal(t, snapshot.Hash, fingerprint.Hash) + + for _, changedPath := range []string{ + sessionDir, + filepath.Join(sessionDir, "base_state.json"), + filepath.Join(sessionDir, "TASKS.json"), + filepath.Join(sessionDir, "events", "event-00000-user.json"), + } { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: changedPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1, changedPath) + assert.Equal(t, sessionDir, changed[0].DisplayPath) + } + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(sessionDir, "events", "notes.txt"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: sessionDir, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestOpenHandsProviderParse(t *testing.T) { + root := t.TempDir() + sessionID := "086c7ecf-6cb7-46b6-9fbc-b900358d1247" + sessionDir := openHandsProviderWriteSession( + t, root, "086c7ecf6cb746b69fbcb900358d1247", sessionID, "parse question", + ) + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "openhands:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentOpenHands, result.Result.Session.Agent) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sessionDir, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 1) +} + +func openHandsProviderWriteSession( + t *testing.T, + root string, + dirName string, + sessionID string, + firstMessage string, +) string { + t.Helper() + sessionDir := filepath.Join(root, dirName) + eventsDir := filepath.Join(sessionDir, "events") + require.NoError(t, os.MkdirAll(eventsDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(sessionDir, "base_state.json"), + []byte(`{"id":"`+sessionID+`","agent":{"llm":{"model":"test-model"}}}`), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(sessionDir, "TASKS.json"), + []byte(`[]`), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(eventsDir, "event-00000-user.json"), + []byte(`{ + "id":"e0", + "timestamp":"2026-04-02T15:25:40.706887", + "source":"user", + "llm_message":{"role":"user","content":[{"type":"text","text":"`+firstMessage+`"}]}, + "kind":"MessageEvent" + }`), + 0o644, + )) + return sessionDir +} + +func openHandsProviderWriteInvalidSession( + t *testing.T, + root string, + dirName string, +) { + t.Helper() + require.NoError(t, os.MkdirAll(filepath.Join(root, dirName), 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(root, dirName, "base_state.json"), + []byte(`{}`), + 0o644, + )) +} diff --git a/internal/parser/openhands_test.go b/internal/parser/openhands_test.go index 4d8b4bc6f..3e6109d69 100644 --- a/internal/parser/openhands_test.go +++ b/internal/parser/openhands_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "encoding/json" "os" "path/filepath" @@ -25,19 +26,30 @@ func TestDiscoverAndFindOpenHandsSessions(t *testing.T) { 0o644, )) - files := DiscoverOpenHandsSessions(root) - require.Len(t, files, 1) - assert.Equal(t, sessionDir, files[0].Path) - assert.Equal(t, AgentOpenHands, files[0].Agent) + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{root}, + }) + require.True(t, ok) - assert.Equal( - t, sessionDir, - FindOpenHandsSourceFile(root, sessionID), - ) - assert.Equal( - t, sessionDir, - FindOpenHandsSourceFile(root, dirName), - ) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, sessionDir, sources[0].DisplayPath) + assert.Equal(t, AgentOpenHands, sources[0].Provider) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: dirName, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionDir, found.DisplayPath) } func TestParseOpenHandsSession(t *testing.T) { @@ -116,10 +128,27 @@ func TestParseOpenHandsSession(t *testing.T) { )) } - sess, msgs, err := ParseOpenHandsSession( - sessionDir, "local", - ) + provider, ok := NewProvider(AgentOpenHands, ProviderConfig{ + Roots: []string{root}, + Machine: "local", + }) + require.True(t, ok) + source, found, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: sessionDir, + }) require.NoError(t, err) + require.True(t, found) + fingerprint, err := provider.Fingerprint(context.Background(), source) + require.NoError(t, err) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + + sess := &outcome.Results[0].Result.Session + msgs := outcome.Results[0].Result.Messages require.NotNil(t, sess) require.Len(t, msgs, 4) diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index 653532c2f..adff7c24a 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -790,7 +790,7 @@ func TestClaudeSessionTimestampSemantics(t *testing.T) { buf := captureLog(t) path := createTestFile(t, "ts-long-invalid.jsonl", content) - _, err := ParseClaudeSession( + _, err := parseClaudeSession( path, "proj", "local", ) require.NoError(t, err, "ParseClaudeSession") @@ -1303,7 +1303,7 @@ func TestClaudeUserMessageCount(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { path := createTestFile(t, "test.jsonl", tt.content) - results, err := ParseClaudeSession( + results, err := parseClaudeSession( path, "test-proj", "local", ) require.NoError(t, err, "ParseClaudeSession") @@ -1324,7 +1324,7 @@ func TestParseClaudeToolResults(t *testing.T) { content := strings.Join(lines, "\n") + "\n" path := createTestFile(t, "tool-results.jsonl", content) - results, err := ParseClaudeSession(path, "test-project", "local") + results, err := parseClaudeSession(path, "test-project", "local") require.NoError(t, err, "ParseClaudeSession") require.NotEmpty(t, results, "ParseClaudeSession returned no results") msgs := results[0].Messages diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 4f205e0ee..7a54cf967 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -279,6 +279,11 @@ type IncrementalRequest struct { Offset int64 StartOrdinal int Machine string + // LastEntryUUID is the UUID of the last entry stored for this + // session, used by DAG-aware parsers (Claude) to detect when an + // appended tail forks away from the stored tip and must trigger a + // full reparse instead of a naive append. + LastEntryUUID string } // IncrementalOutcome is the append-only parse output. @@ -349,12 +354,20 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { switch def.Type { case AgentAmp: return newAmpProviderFactory(def) + case AgentClaude: + return newClaudeProviderFactory(def) case AgentCommandCode: return newCommandCodeProviderFactory(def) + case AgentCowork: + return newCoworkProviderFactory(def) case AgentCortex: return newCortexProviderFactory(def) + case AgentCursor: + return newCursorProviderFactory(def) case AgentDeepSeekTUI: return newDeepSeekTUIProviderFactory(def) + case AgentHermes: + return newHermesProviderFactory(def) case AgentIflow: return newIflowProviderFactory(def) case AgentGptme: @@ -369,6 +382,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIcodemateProviderFactory(def) case AgentOpenCode: return newOpenCodeProviderFactory(def) + case AgentOpenHands: + return newOpenHandsProviderFactory(def) case AgentOpenClaw: return newOpenClawProviderFactory(def) case AgentOMP, AgentPi: @@ -377,6 +392,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newQwenPawProviderFactory(def) case AgentQClaw: return newQClawProviderFactory(def) + case AgentVibe: + return newVibeProviderFactory(def) case AgentWorkBuddy: return newWorkBuddyProviderFactory(def) case AgentQwen: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 093f9ac0c..6a91112ef 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -17,13 +17,13 @@ const ( ) var providerMigrationModes = map[AgentType]ProviderMigrationMode{ - AgentClaude: ProviderMigrationLegacyOnly, - AgentCowork: ProviderMigrationLegacyOnly, + AgentClaude: ProviderMigrationProviderAuthoritative, + AgentCowork: ProviderMigrationProviderAuthoritative, AgentCodex: ProviderMigrationLegacyOnly, AgentCopilot: ProviderMigrationLegacyOnly, AgentGemini: ProviderMigrationLegacyOnly, - AgentOpenHands: ProviderMigrationLegacyOnly, - AgentCursor: ProviderMigrationLegacyOnly, + AgentOpenHands: ProviderMigrationProviderAuthoritative, + AgentCursor: ProviderMigrationProviderAuthoritative, AgentMiMoCode: ProviderMigrationProviderAuthoritative, AgentOpenCode: ProviderMigrationProviderAuthoritative, AgentKilo: ProviderMigrationProviderAuthoritative, @@ -45,7 +45,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentKiro: ProviderMigrationLegacyOnly, AgentKiroIDE: ProviderMigrationLegacyOnly, AgentCortex: ProviderMigrationProviderAuthoritative, - AgentHermes: ProviderMigrationLegacyOnly, + AgentHermes: ProviderMigrationProviderAuthoritative, AgentWorkBuddy: ProviderMigrationProviderAuthoritative, AgentForge: ProviderMigrationLegacyOnly, AgentPiebald: ProviderMigrationLegacyOnly, @@ -53,7 +53,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentPositron: ProviderMigrationLegacyOnly, AgentAntigravity: ProviderMigrationLegacyOnly, AgentAntigravityCLI: ProviderMigrationLegacyOnly, - AgentVibe: ProviderMigrationLegacyOnly, + AgentVibe: ProviderMigrationProviderAuthoritative, AgentZed: ProviderMigrationLegacyOnly, AgentQwenPaw: ProviderMigrationProviderAuthoritative, AgentGptme: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 204c96081..8e4aa92c4 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -29,6 +29,11 @@ var legacyEntrypointVerb = regexp.MustCompile(`^(Discover|Find|Parse|Process|Cla var providerNeutralEntrypoints = map[string]bool{ "ParseVirtualSourcePath": true, "ParseVirtualSourcePathForBase": true, + // ParseCursorTranscriptRelPath is a pure rel-path shape validator with no + // filesystem or provider state. It is shared by the engine's path + // classification/enrichment and the Cursor provider's source set, so it + // stays a free helper rather than moving onto the provider. + "ParseCursorTranscriptRelPath": true, } // pendingShimProviderFiles are provider files whose behavior has not yet been @@ -44,20 +49,14 @@ var providerNeutralEntrypoints = map[string]bool{ var pendingShimProviderFiles = map[string]bool{ "antigravity_cli_provider.go": true, "antigravity_provider.go": true, - "claude_provider.go": true, "codex_provider.go": true, "copilot_provider.go": true, - "cowork_provider.go": true, - "cursor_provider.go": true, "db_backed_provider.go": true, "gemini_provider.go": true, - "hermes_provider.go": true, "kiro_ide_provider.go": true, "kiro_provider.go": true, - "openhands_provider.go": true, "positron_provider.go": true, "shelley_provider.go": true, - "vibe_provider.go": true, "visualstudio_copilot_provider.go": true, "vscode_copilot_provider.go": true, "zed_provider.go": true, diff --git a/internal/parser/s3_discovery_test.go b/internal/parser/s3_discovery_test.go new file mode 100644 index 000000000..f1f5322f3 --- /dev/null +++ b/internal/parser/s3_discovery_test.go @@ -0,0 +1,109 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestClaudeSourceSetDiscoversS3Sessions verifies the Claude source set +// enumerates s3:// roots through its provider Discover path and carries the +// durable object metadata (including folded tool-result sidecar size/mtime/ +// fingerprint) in the S3DiscoveredSource opaque, rather than dropping the remote +// object at the local IsRegularFile gate. +func TestClaudeSourceSetDiscoversS3Sessions(t *testing.T) { + oldList := listS3Objects + t.Cleanup(func() { listS3Objects = oldList }) + + root := "s3://bucket/laptop/raw/claude" + sessionURI := root + "/proj/session.jsonl" + sessionMtime := time.Unix(100, 0) + sidecarMtime := time.Unix(200, 0) + listS3Objects = func(got string) ([]S3Object, error) { + require.Equal(t, root, got) + return []S3Object{ + { + URI: sessionURI, + Size: 11, + LastModified: sessionMtime, + Fingerprint: "s3-meta:session", + }, + { + URI: root + "/proj/session/tool-results/out.txt", + Size: 22, + LastModified: sidecarMtime, + Fingerprint: "s3-meta:sidecar", + }, + }, nil + } + + sources, err := newClaudeSourceSet([]string{root}).Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + src := sources[0] + assert.Equal(t, AgentClaude, src.Provider) + assert.Equal(t, sessionURI, src.DisplayPath) + assert.Equal(t, sessionURI, src.FingerprintKey) + assert.Equal(t, "proj", src.ProjectHint) + + s3, ok := src.Opaque.(S3DiscoveredSource) + require.True(t, ok, "s3 source carries S3DiscoveredSource opaque") + assert.Equal(t, sessionURI, s3.URI) + assert.Equal(t, "laptop", s3.Machine) + assert.Equal(t, "proj", s3.Project) + // Session plus its tool-result sidecar fold into one freshness identity. + assert.Equal(t, int64(33), s3.Size) + assert.Equal(t, sidecarMtime.UnixNano(), s3.MtimeNS) + assert.Contains(t, s3.Fingerprint, "session") + assert.Contains(t, s3.Fingerprint, "sidecar") +} + +// TestClaudeSourceSetMixedLocalAndS3Roots verifies a config that mixes a local +// projects root and an s3:// root discovers sources from both, with only the +// remote object carrying the S3DiscoveredSource opaque. +func TestClaudeSourceSetMixedLocalAndS3Roots(t *testing.T) { + oldList := listS3Objects + t.Cleanup(func() { listS3Objects = oldList }) + + localRoot := t.TempDir() + localProj := filepath.Join(localRoot, "localproj") + require.NoError(t, os.MkdirAll(localProj, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(localProj, "11111111-1111-4111-8111-111111111111.jsonl"), + []byte("{}\n"), 0o644, + )) + + s3Root := "s3://bucket/laptop/raw/claude" + s3URI := s3Root + "/remoteproj/22222222-2222-4222-8222-222222222222.jsonl" + listS3Objects = func(string) ([]S3Object, error) { + return []S3Object{{ + URI: s3URI, + Size: 11, + LastModified: time.Unix(100, 0), + Fingerprint: "s3-meta:remote", + }}, nil + } + + sources, err := newClaudeSourceSet([]string{localRoot, s3Root}). + Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 2) + + var s3Count, localCount int + for _, src := range sources { + if _, ok := src.Opaque.(S3DiscoveredSource); ok { + s3Count++ + assert.Equal(t, s3URI, src.DisplayPath) + } else { + localCount++ + } + } + assert.Equal(t, 1, s3Count, "exactly one remote source") + assert.Equal(t, 1, localCount, "exactly one local source") +} diff --git a/internal/parser/test_helpers_test.go b/internal/parser/test_helpers_test.go index 587114297..9c2778d66 100644 --- a/internal/parser/test_helpers_test.go +++ b/internal/parser/test_helpers_test.go @@ -145,10 +145,28 @@ func parseClaudeTestFile( ) (ParsedSession, []ParsedMessage) { t.Helper() path := createTestFile(t, name, content) - results, err := ParseClaudeSession( + results, err := parseClaudeSession( path, project, "local", ) - require.NoError(t, err, "ParseClaudeSession") - require.NotEmpty(t, results, "ParseClaudeSession returned no results") + require.NoError(t, err, "parseClaudeSession") + require.NotEmpty(t, results, "parseClaudeSession returned no results") return results[0].Session, results[0].Messages } + +// parseClaudeSession parses a standalone Claude transcript through the Claude +// provider's upload entry point, honoring the explicit project. It is the +// test harness replacement for the former ParseClaudeSession free function, +// exercising the same provider-owned parse body that production uploads use. +func parseClaudeSession( + path, project, machine string, +) ([]ParseResult, error) { + provider, ok := NewProvider(AgentClaude, ProviderConfig{Machine: machine}) + if !ok { + return nil, fmt.Errorf("claude provider unavailable") + } + uploader, ok := provider.(ClaudeUploadParser) + if !ok { + return nil, fmt.Errorf("claude provider does not support upload parsing") + } + return uploader.ParseUploadedTranscript(path, project, machine) +} diff --git a/internal/parser/types.go b/internal/parser/types.go index 17d1a96c5..2821dc0d3 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -106,20 +106,16 @@ var Registry = []AgentDef{ DefaultDirs: []string{".claude/projects"}, IDPrefix: "", FileBased: true, - DiscoverFunc: DiscoverClaudeProjects, - FindSourceFunc: FindClaudeSourceFile, }, { - Type: AgentCowork, - DisplayName: "Claude Cowork", - EnvVar: "COWORK_DIR", - ConfigKey: "cowork_dirs", - DefaultDirs: coworkDefaultDirs(), - IDPrefix: "cowork:", - FileBased: true, - ShallowWatch: true, - DiscoverFunc: DiscoverCoworkSessions, - FindSourceFunc: FindCoworkSourceFile, + Type: AgentCowork, + DisplayName: "Claude Cowork", + EnvVar: "COWORK_DIR", + ConfigKey: "cowork_dirs", + DefaultDirs: coworkDefaultDirs(), + IDPrefix: "cowork:", + FileBased: true, + ShallowWatch: true, }, { Type: AgentCodex, @@ -206,27 +202,23 @@ var Registry = []AgentDef{ WatchRootsFunc: ResolveKiloWatchRoots, }, { - Type: AgentOpenHands, - DisplayName: "OpenHands CLI", - EnvVar: "OPENHANDS_CONVERSATIONS_DIR", - ConfigKey: "openhands_dirs", - DefaultDirs: []string{".openhands/conversations"}, - IDPrefix: "openhands:", - FileBased: true, - ShallowWatch: true, - DiscoverFunc: DiscoverOpenHandsSessions, - FindSourceFunc: FindOpenHandsSourceFile, + Type: AgentOpenHands, + DisplayName: "OpenHands CLI", + EnvVar: "OPENHANDS_CONVERSATIONS_DIR", + ConfigKey: "openhands_dirs", + DefaultDirs: []string{".openhands/conversations"}, + IDPrefix: "openhands:", + FileBased: true, + ShallowWatch: true, }, { - Type: AgentCursor, - DisplayName: "Cursor", - EnvVar: "CURSOR_PROJECTS_DIR", - ConfigKey: "cursor_project_dirs", - DefaultDirs: []string{".cursor/projects"}, - IDPrefix: "cursor:", - FileBased: true, - DiscoverFunc: DiscoverCursorSessions, - FindSourceFunc: FindCursorSourceFile, + Type: AgentCursor, + DisplayName: "Cursor", + EnvVar: "CURSOR_PROJECTS_DIR", + ConfigKey: "cursor_project_dirs", + DefaultDirs: []string{".cursor/projects"}, + IDPrefix: "cursor:", + FileBased: true, }, { Type: AgentAmp, @@ -434,15 +426,15 @@ var Registry = []AgentDef{ FileBased: true, }, { - Type: AgentHermes, - DisplayName: "Hermes Agent", - EnvVar: "HERMES_SESSIONS_DIR", - ConfigKey: "hermes_sessions_dirs", - DefaultDirs: []string{".hermes/sessions"}, - IDPrefix: "hermes:", - FileBased: true, - DiscoverFunc: DiscoverHermesSessions, - FindSourceFunc: FindHermesSourceFile, + Type: AgentHermes, + DisplayName: "Hermes Agent", + EnvVar: "HERMES_SESSIONS_DIR", + ConfigKey: "hermes_sessions_dirs", + DefaultDirs: []string{".hermes/sessions"}, + IDPrefix: "hermes:", + FileBased: true, + WatchRootsFunc: ResolveHermesWatchRoots, + ShallowWatchRootsFunc: ResolveHermesShallowWatchRoots, }, { Type: AgentWorkBuddy, @@ -578,15 +570,13 @@ var Registry = []AgentDef{ FindSourceFunc: FindShelleySourceFile, }, { - Type: AgentVibe, - DisplayName: "Mistral Vibe", - EnvVar: "VIBE_SESSIONS_DIR", - ConfigKey: "vibe_session_dirs", - DefaultDirs: []string{".vibe/logs/session"}, - IDPrefix: "vibe:", - FileBased: true, - DiscoverFunc: DiscoverVibeSessions, - FindSourceFunc: FindVibeSourceFile, + Type: AgentVibe, + DisplayName: "Mistral Vibe", + EnvVar: "VIBE_SESSIONS_DIR", + ConfigKey: "vibe_session_dirs", + DefaultDirs: []string{".vibe/logs/session"}, + IDPrefix: "vibe:", + FileBased: true, }, { // Aider has no central session store. It writes one Markdown diff --git a/internal/parser/types_test.go b/internal/parser/types_test.go index ea96bda83..e25b95cf7 100644 --- a/internal/parser/types_test.go +++ b/internal/parser/types_test.go @@ -523,8 +523,10 @@ func TestCoworkRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentCowork) require.True(t, ok, "AgentCowork missing from Registry") require.True(t, def.FileBased, "Cowork FileBased") - require.NotNil(t, def.DiscoverFunc, "Cowork DiscoverFunc") - require.NotNil(t, def.FindSourceFunc, "Cowork FindSourceFunc") + // Cowork is a migrated, provider-authoritative agent: source discovery + // and lookup live on the concrete provider, not on legacy AgentDef hooks. + require.Nil(t, def.DiscoverFunc, "Cowork DiscoverFunc") + require.Nil(t, def.FindSourceFunc, "Cowork FindSourceFunc") assert.Equal(t, "COWORK_DIR", def.EnvVar) assert.Equal(t, "cowork_dirs", def.ConfigKey) assert.Equal(t, "cowork:", def.IDPrefix) diff --git a/internal/parser/vibe.go b/internal/parser/vibe.go index 49aad1018..4f16f325a 100644 --- a/internal/parser/vibe.go +++ b/internal/parser/vibe.go @@ -67,8 +67,10 @@ type VibeStats struct { LastTurnTotalTokens int `json:"last_turn_total_tokens"` } -// ParseVibeSession parses a Mistral Vibe messages.jsonl file -func ParseVibeSession(path string, fileInfo FileInfo) (ParseResult, error) { +// parseVibeResult parses a Mistral Vibe messages.jsonl file into a ParseResult. +// It owns the on-disk shape (messages.jsonl plus the sibling meta.json) for the +// Vibe provider; the package-level entrypoint was folded onto the provider. +func parseVibeResultFile(path string, fileInfo FileInfo) (ParseResult, error) { result := ParseResult{ Session: ParsedSession{ Agent: AgentVibe, @@ -386,11 +388,11 @@ func vibeToolArguments(args json.RawMessage) string { return string(args) } -// ParseVibeSessionWrapper wraps ParseVibeSession and returns the session, -// messages, and usage events in the shape the sync engine consumes: -// (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error). It stats the -// file to build FileInfo and optionally overrides the project and machine. -func ParseVibeSessionWrapper(path, project, machine string) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { +// parseSession parses a Vibe session at path and returns the session, messages, +// and usage events in the shape the provider consumes: (*ParsedSession, +// []ParsedMessage, []ParsedUsageEvent, error). It stats the file to build +// FileInfo and optionally overrides the project and machine. +func parseVibeSession(path, project, machine string) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) if err != nil { return nil, nil, nil, fmt.Errorf("stat %s: %w", path, err) @@ -402,7 +404,7 @@ func ParseVibeSessionWrapper(path, project, machine string) (*ParsedSession, []P Mtime: info.ModTime().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeResultFile(path, fileInfo) if err != nil { return nil, nil, nil, err } diff --git a/internal/parser/vibe_provider.go b/internal/parser/vibe_provider.go new file mode 100644 index 000000000..29ad56f2c --- /dev/null +++ b/internal/parser/vibe_provider.go @@ -0,0 +1,304 @@ +package parser + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// Vibe stores each session in /session___/, with a +// messages.jsonl transcript and a sibling meta.json. It is a single-file +// provider: one transcript parses into one session, with a composite fingerprint +// folding in meta.json and a fallback-ID exclusion when meta.json later supplies +// a different session_id. All behavior is wired into the shared single-file base +// via options. +func newVibeProviderFactory(def AgentDef) ProviderFactory { + return NewSingleFileProviderFactory( + def, + vibeProviderCapabilities(), + func(cfg ProviderConfig) singleFileSourceSet { + return NewSingleFileSourceSet( + AgentVibe, + cfg.Roots, + WithFileDiscovery(vibeDiscoverFiles), + WithFileWatchRoots(vibeWatchRoots), + WithFileChangedPathClassifier(vibeClassifyPath), + WithFileLookup(vibeFindFile), + WithFileFingerprint(vibeFingerprintSource), + WithFileParse(vibeParseFile), + ) + }, + ) +} + +func vibeDiscoverFiles(root string) []singleFileMatch { + var out []singleFileMatch + for _, path := range discoverVibeSessionPaths(root) { + if match, ok := vibeStrictMatch(root, path); ok { + out = append(out, match) + } + } + return out +} + +// discoverVibeSessionPaths finds all Vibe messages.jsonl paths under root. +// Symlinked session directories are followed (matching the watcher), but only +// session_-prefixed directories that hold a regular messages.jsonl qualify. +func discoverVibeSessionPaths(root string) []string { + entries, err := os.ReadDir(root) + if err != nil { + return nil + } + var paths []string + for _, entry := range entries { + if !isDirOrSymlink(entry, root) { + continue + } + if !isVibeSessionDirName(entry.Name()) { + continue + } + messagesPath := filepath.Join(root, entry.Name(), "messages.jsonl") + if isVibeMessagesFile(messagesPath) { + paths = append(paths, messagesPath) + } + } + return paths +} + +func vibeWatchRoots(roots []string) []WatchRoot { + out := make([]WatchRoot, 0, len(roots)) + for _, root := range roots { + out = append(out, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"messages.jsonl", "meta.json"}, + DebounceKey: string(AgentVibe) + ":sessions:" + root, + }) + } + return out +} + +// vibeClassifyPath maps a messages.jsonl or meta.json event path to its session +// transcript. Under allowMissing a transcript that does not (yet) exist still +// classifies via the session directory name, so a metadata-only event or a +// deletion still resolves. +func vibeClassifyPath( + root, path string, allowMissing bool, +) (singleFileMatch, bool) { + rel, ok := vibeRelPath(root, path) + if !ok { + return singleFileMatch{}, false + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) != 2 || !isVibeSessionDirName(parts[0]) { + return singleFileMatch{}, false + } + messagesPath := filepath.Join(filepath.Clean(root), parts[0], "messages.jsonl") + switch parts[1] { + case "messages.jsonl": + if allowMissing { + return vibeMatchFromSessionDir(parts[0], messagesPath) + } + return vibeStrictMatch(root, messagesPath) + case "meta.json": + if allowMissing && !isVibeMessagesFile(messagesPath) { + return vibeMatchFromSessionDir(parts[0], messagesPath) + } + return vibeStrictMatch(root, messagesPath) + default: + return singleFileMatch{}, false + } +} + +// vibeStrictMatch requires the messages.jsonl to exist as a regular file under a +// session directory before classifying it. +func vibeStrictMatch(root, path string) (singleFileMatch, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !isVibeMessagesFile(path) { + return singleFileMatch{}, false + } + rel, ok := vibeRelPath(root, path) + if !ok { + return singleFileMatch{}, false + } + parts := strings.Split(rel, string(filepath.Separator)) + if len(parts) != 2 || !isVibeSessionDirName(parts[0]) || + parts[1] != "messages.jsonl" { + return singleFileMatch{}, false + } + return vibeMatchFromSessionDir(parts[0], path) +} + +func vibeMatchFromSessionDir(sessionDir, path string) (singleFileMatch, bool) { + if !isVibeSessionDirName(sessionDir) { + return singleFileMatch{}, false + } + return singleFileMatch{Path: path, ProjectHint: sessionDir}, true +} + +func vibeFindFile(root, rawID string) (singleFileMatch, bool) { + path := findVibeSourceFile(root, rawID) + if path == "" { + return singleFileMatch{}, false + } + return vibeStrictMatch(root, path) +} + +// findVibeSourceFile locates a Vibe session by ID under root. The ID is the +// session_id from meta.json (a uuid), which usually differs from the session +// directory name, so a direct directory-name path is tried before scanning +// meta.json files. +func findVibeSourceFile(root, sessionID string) string { + if messagesPath := filepath.Join( + root, sessionID, "messages.jsonl", + ); isVibeMessagesFile(messagesPath) { + return messagesPath + } + entries, err := os.ReadDir(root) + if err != nil { + return "" + } + for _, entry := range entries { + if !isDirOrSymlink(entry, root) || + !strings.HasPrefix(entry.Name(), "session_") { + continue + } + messagesPath := filepath.Join(root, entry.Name(), "messages.jsonl") + if !isVibeMessagesFile(messagesPath) { + continue + } + metaPath := filepath.Join(root, entry.Name(), "meta.json") + if meta, err := parseVibeMetadata(metaPath); err == nil && + meta.SessionID == sessionID { + return messagesPath + } + } + return "" +} + +// isVibeMessagesFile reports whether path is an existing regular file. +func isVibeMessagesFile(path string) bool { + info, err := os.Stat(path) + if err != nil || info == nil { + return false + } + return !info.IsDir() +} + +func vibeFingerprintSource(src singleFileSource) (SourceFingerprint, error) { + info, err := os.Stat(src.Path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", src.Path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf( + "stat %s: source is a directory", src.Path, + ) + } + size := info.Size() + mtime := info.ModTime().UnixNano() + metaPath := vibeMetaPath(src.Path) + if metaInfo, err := os.Stat(metaPath); err == nil { + size += metaInfo.Size() + if metaMTime := metaInfo.ModTime().UnixNano(); metaMTime > mtime { + mtime = metaMTime + } + } + hash, err := hashJSONLSourceFile(src.Path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Size: size, + MTimeNS: mtime, + Hash: hash, + }, nil +} + +func vibeParseFile( + src singleFileSource, req ParseRequest, +) ([]ParseResult, []string, error) { + sess, msgs, usageEvents, err := parseVibeSession(src.Path, "", req.Machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + excluded := vibeProviderExcludedSessionIDs(src.Path, sess.ID) + return []ParseResult{{ + Session: *sess, + Messages: msgs, + UsageEvents: usageEvents, + }}, excluded, nil +} + +func vibeRelPath(root, path string) (string, bool) { + rel, err := filepath.Rel(filepath.Clean(root), filepath.Clean(path)) + if err != nil || rel == "." || rel == "" { + return "", false + } + if strings.HasPrefix(rel, ".."+string(filepath.Separator)) || rel == ".." { + return "", false + } + for part := range strings.SplitSeq(rel, string(filepath.Separator)) { + if part == "" || part == "." || part == ".." { + return "", false + } + } + return rel, true +} + +func isVibeSessionDirName(name string) bool { + return strings.HasPrefix(name, "session_") && strings.Contains(name, "_") +} + +func vibeMetaPath(messagesPath string) string { + return filepath.Join(filepath.Dir(messagesPath), "meta.json") +} + +func vibeProviderExcludedSessionIDs(path, currentID string) []string { + fallbackID := string(AgentVibe) + ":" + filepath.Base(filepath.Dir(path)) + if currentID == "" || currentID == fallbackID { + return nil + } + return []string{fallbackID} +} + +func vibeProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilitySupported, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Relationships: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/vibe_provider_test.go b/internal/parser/vibe_provider_test.go new file mode 100644 index 000000000..8dbae35f2 --- /dev/null +++ b/internal/parser/vibe_provider_test.go @@ -0,0 +1,297 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestVibeProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentVibe) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestVibeProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionDir := "session_20260613_123456_abc123def" + messagesPath := filepath.Join(root, sessionDir, "messages.jsonl") + metaPath := filepath.Join(root, sessionDir, "meta.json") + writeSourceFile(t, messagesPath, vibeProviderMessagesFixture("provider question")) + writeSourceFile(t, metaPath, vibeProviderMetaFixture("uuid-1234", "Provider title")) + writeSourceFile(t, filepath.Join(root, "scratch", "messages.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "session_missing_messages", "meta.json"), "{}\n") + nestedPath := filepath.Join(root, "nested", "session_20260613_123456_nested", "messages.jsonl") + writeSourceFile(t, nestedPath, vibeProviderMessagesFixture("nested")) + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"messages.jsonl", "meta.json"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + source := discovered[0] + assert.Equal(t, AgentVibe, source.Provider) + assert.Equal(t, messagesPath, source.DisplayPath) + assert.Equal(t, messagesPath, source.FingerprintKey) + assert.Equal(t, sessionDir, source.ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~vibe:uuid-1234", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, messagesPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionDir, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, messagesPath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: messagesPath, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, messagesPath, found.DisplayPath) + + messageInfo, err := os.Stat(messagesPath) + require.NoError(t, err) + metaInfo, err := os.Stat(metaPath) + require.NoError(t, err) + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, messagesPath, fingerprint.Key) + assert.Equal(t, messageInfo.Size()+metaInfo.Size(), fingerprint.Size) + assert.Equal( + t, + max(messageInfo.ModTime().UnixNano(), metaInfo.ModTime().UnixNano()), + fingerprint.MTimeNS, + ) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + want string + }{ + {name: "messages", path: messagesPath, want: messagesPath}, + {name: "meta sidecar", path: metaPath, want: messagesPath}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tc.path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tc.want, changed[0].DisplayPath) + }) + } + + require.NoError(t, os.Remove(metaPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, messagesPath, changed[0].DisplayPath) + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "scratch", "messages.jsonl"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + nested, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: nestedPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + assert.Empty(t, nested) + + require.NoError(t, os.Remove(messagesPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: messagesPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, messagesPath, changed[0].DisplayPath) + assert.Equal(t, sessionDir, changed[0].ProjectHint) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: messagesPath, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestVibeProviderDiscoversSymlinkedSessionDirectory(t *testing.T) { + root := t.TempDir() + targetRoot := t.TempDir() + sessionDir := "session_20260613_123456_symlinked" + targetDir := filepath.Join(targetRoot, sessionDir) + sourceDir := filepath.Join(root, sessionDir) + sourcePath := filepath.Join(sourceDir, "messages.jsonl") + writeSourceFile( + t, + filepath.Join(targetDir, "messages.jsonl"), + vibeProviderMessagesFixture("from symlink"), + ) + if err := os.Symlink(targetDir, sourceDir); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionDir, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) +} + +func TestVibeProviderParse(t *testing.T) { + root := t.TempDir() + sessionDir := "session_20260613_123456_abc123def" + messagesPath := filepath.Join(root, sessionDir, "messages.jsonl") + metaPath := filepath.Join(root, sessionDir, "meta.json") + writeSourceFile(t, messagesPath, vibeProviderMessagesFixture("parse question")) + writeSourceFile(t, metaPath, vibeProviderMetaFixture("uuid-1234", "Provider title")) + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "vibe:uuid-1234", result.Result.Session.ID) + assert.Equal(t, AgentVibe, result.Result.Session.Agent) + assert.Equal(t, "vibe", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, messagesPath, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Size, result.Result.Session.File.Size) + assert.Equal(t, fingerprint.MTimeNS, result.Result.Session.File.Mtime) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "Provider title", result.Result.Session.SessionName) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Contains(t, outcome.ExcludedSessionIDs, "vibe:"+sessionDir) + assert.Len(t, result.Result.Messages, 2) +} + +// TestVibeProviderParseEmitsUsageEvents locks in the usage-event and +// excluded-ID behavior the deleted shadow-baseline test asserted: when +// meta.json carries a model and token stats, Parse must surface a single +// session-level usage event and exclude the directory-name fallback ID. +func TestVibeProviderParseEmitsUsageEvents(t *testing.T) { + root := t.TempDir() + sessionDir := "session_20260616_083518_abc123" + sessionID := "uuid-1234" + messagesPath := filepath.Join(root, sessionDir, "messages.jsonl") + metaPath := filepath.Join(root, sessionDir, "meta.json") + writeSourceFile(t, messagesPath, vibeProviderMessagesFixture("provider question")) + writeSourceFile(t, metaPath, vibeProviderMetaWithStatsFixture(sessionID, "Provider title")) + + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, "vibe:"+sessionID, result.Result.Session.ID) + assert.Equal(t, []string{"vibe:" + sessionDir}, outcome.ExcludedSessionIDs) + + require.Len(t, result.Result.UsageEvents, 1) + usageEvent := result.Result.UsageEvents[0] + assert.Equal(t, "vibe:"+sessionID, usageEvent.SessionID) + assert.Equal(t, "mistral-medium-3.5", usageEvent.Model) + assert.Equal(t, 100, usageEvent.InputTokens) + assert.Equal(t, 40, usageEvent.OutputTokens) +} + +func vibeProviderMessagesFixture(firstMessage string) string { + return `{"role":"user","content":"` + firstMessage + `"}` + "\n" + + `{"role":"assistant","content":"Done."}` + "\n" +} + +func vibeProviderMetaFixture(sessionID, title string) string { + return `{"session_id":"` + sessionID + `","title":"` + title + `"}` +} + +func vibeProviderMetaWithStatsFixture(sessionID, title string) string { + return `{"session_id":"` + sessionID + `","title":"` + title + `",` + + `"config":{"active_model":"mistral-medium-3.5"},` + + `"stats":{"session_prompt_tokens":100,"session_completion_tokens":40}}` +} diff --git a/internal/parser/vibe_test.go b/internal/parser/vibe_test.go index 7d7af4630..89b3b6085 100644 --- a/internal/parser/vibe_test.go +++ b/internal/parser/vibe_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "encoding/json" "os" "path/filepath" @@ -11,6 +12,55 @@ import ( "github.com/stretchr/testify/require" ) +// newVibeTestProvider builds a Vibe provider for the given roots so package +// tests can exercise discovery through the Provider interface. +func newVibeTestProvider(t *testing.T, roots ...string) Provider { + t.Helper() + provider, ok := NewProvider(AgentVibe, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + return provider +} + +// parseVibeTestSession parses a Vibe messages.jsonl file at path into a +// ParseResult through the folded free function, replacing the removed +// package-level ParseVibeSession entrypoint. +func parseVibeTestSession(t *testing.T, path string, fileInfo FileInfo) (ParseResult, error) { + t.Helper() + return parseVibeResultFile(path, fileInfo) +} + +// discoverVibeTestSessions discovers Vibe sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path + project) the +// tests assert against. +func discoverVibeTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newVibeTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Project: source.ProjectHint, + Agent: AgentVibe, + }) + } + return files +} + +// findVibeTestSourceFile resolves a Vibe session ID to a messages.jsonl path, +// replacing the removed FindVibeSourceFile. +func findVibeTestSourceFile(t *testing.T, root, sessionID string) string { + t.Helper() + return findVibeSourceFile(root, sessionID) +} + func TestDiscoverVibeSessions(t *testing.T) { tmpDir := t.TempDir() @@ -29,7 +79,7 @@ func TestDiscoverVibeSessions(t *testing.T) { require.NoError(t, os.MkdirAll(otherDir, 0755)) // Run discovery - discovered := DiscoverVibeSessions(tmpDir) + discovered := discoverVibeTestSessions(t, tmpDir) // Verify results require.Len(t, discovered, 1) @@ -53,7 +103,7 @@ func TestDiscoverVibeSessionsMultiple(t *testing.T) { require.NoError(t, os.MkdirAll(invalidDir, 0755)) // Run discovery - discovered := DiscoverVibeSessions(tmpDir) + discovered := discoverVibeTestSessions(t, tmpDir) // Verify results - should find only 3 valid sessions require.Len(t, discovered, 3) @@ -69,7 +119,7 @@ func TestDiscoverVibeSessionsEmptyDir(t *testing.T) { tmpDir := t.TempDir() // Run discovery on empty directory - files := DiscoverVibeSessions(tmpDir) + files := discoverVibeTestSessions(t, tmpDir) // Should return empty slice assert.Len(t, files, 0) @@ -77,7 +127,7 @@ func TestDiscoverVibeSessionsEmptyDir(t *testing.T) { func TestDiscoverVibeSessionsNonExistentDir(t *testing.T) { // Run discovery on non-existent directory - files := DiscoverVibeSessions("/nonexistent/path") + files := discoverVibeTestSessions(t, "/nonexistent/path") // Should return empty slice without error assert.Len(t, files, 0) @@ -92,7 +142,7 @@ func TestFindVibeSourceFile(t *testing.T) { // When the ID matches the directory name (no meta.json), the file is // resolved directly. - result := FindVibeSourceFile(root, sessionID) + result := findVibeTestSourceFile(t, root, sessionID) expected := filepath.Join(root, sessionID, "messages.jsonl") assert.Equal(t, expected, result) } @@ -104,7 +154,7 @@ func TestFindVibeSourceFileWithSpecialChars(t *testing.T) { filepath.Join(sessionID, "messages.jsonl"): "test", }) - result := FindVibeSourceFile(root, sessionID) + result := findVibeTestSourceFile(t, root, sessionID) expected := filepath.Join(root, sessionID, "messages.jsonl") assert.Equal(t, expected, result) } @@ -119,12 +169,12 @@ func TestFindVibeSourceFileByMetaSessionID(t *testing.T) { // The canonical ID is the meta.json session_id, which differs from the // directory name; the lookup must scan meta.json to resolve it. - result := FindVibeSourceFile(root, "uuid-1234") + result := findVibeTestSourceFile(t, root, "uuid-1234") expected := filepath.Join(root, dirName, "messages.jsonl") assert.Equal(t, expected, result) // An unknown ID resolves to nothing. - assert.Empty(t, FindVibeSourceFile(root, "does-not-exist")) + assert.Empty(t, findVibeTestSourceFile(t, root, "does-not-exist")) } func TestParseVibeSession(t *testing.T) { @@ -134,7 +184,7 @@ func TestParseVibeSession(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Verify session metadata @@ -192,7 +242,7 @@ func TestParseVibeSessionWithTools(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Verify messages @@ -254,7 +304,7 @@ func TestParseVibeSessionEmpty(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Empty file should have no messages @@ -281,7 +331,7 @@ func TestParseVibeSessionMalformedLines(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Should have parsed 2 valid messages and counted 1 malformed line @@ -307,7 +357,7 @@ func TestParseVibeSessionWithoutMeta(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Should have parsed messages but no metadata from meta.json. The ID @@ -358,7 +408,7 @@ func TestParseVibeSessionEmptyStats(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Should have parsed messages and metadata but no usage events due to empty stats @@ -406,7 +456,7 @@ func TestParseVibeSessionModelFromMessages(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) // Should have parsed messages and metadata @@ -460,7 +510,7 @@ func TestParseVibeSessionModelFromConfig(t *testing.T) { path := filepath.Join(tmpDir, "session_test", "messages.jsonl") fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) require.Len(t, result.UsageEvents, 1) @@ -488,7 +538,7 @@ func TestParseVibeSessionInjectedUserExcluded(t *testing.T) { path := filepath.Join(tmpDir, "session_test", "messages.jsonl") fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) require.Len(t, result.Messages, 3) @@ -507,7 +557,7 @@ func TestParseVibeSessionToolResultNotCountedAsUser(t *testing.T) { path := "testdata/vibe/session_with_tools/messages.jsonl" fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) assert.Equal(t, 1, result.Session.UserMessageCount) @@ -533,7 +583,7 @@ func TestParseVibeSessionMalformedMetaRecoversID(t *testing.T) { path := filepath.Join(tmpDir, "session_dir", "messages.jsonl") fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - result, err := ParseVibeSession(path, fileInfo) + result, err := parseVibeTestSession(t, path, fileInfo) require.NoError(t, err) assert.Equal(t, "vibe:uuid-canonical-1", result.Session.ID) @@ -560,7 +610,7 @@ func TestParseVibeSessionCorruptMetaReturnsError(t *testing.T) { path := filepath.Join(tmpDir, "session_dir", "messages.jsonl") fileInfo := FileInfo{Path: path, Mtime: time.Now().UnixNano()} - _, err := ParseVibeSession(path, fileInfo) + _, err := parseVibeTestSession(t, path, fileInfo) require.Error(t, err) assert.Contains(t, err.Error(), "meta.json") } @@ -575,8 +625,10 @@ func TestVibeAgentByType(t *testing.T) { assert.Equal(t, "vibe_session_dirs", def.ConfigKey) assert.Equal(t, "vibe:", def.IDPrefix) assert.True(t, def.FileBased) - assert.NotNil(t, def.DiscoverFunc) - assert.NotNil(t, def.FindSourceFunc) + // Vibe is provider-authoritative: discovery and source lookup live on the + // vibeProvider, not on legacy AgentDef hooks. + assert.Nil(t, def.DiscoverFunc) + assert.Nil(t, def.FindSourceFunc) } func TestVibeAgentByPrefix(t *testing.T) { @@ -642,7 +694,7 @@ func TestParseRealVibeSession(t *testing.T) { Mtime: time.Now().UnixNano(), } - result, err := ParseVibeSession(messagesPath, fileInfo) + result, err := parseVibeTestSession(t, messagesPath, fileInfo) require.NoError(t, err) // Verify basic session metadata diff --git a/internal/server/huma_routes_sessions.go b/internal/server/huma_routes_sessions.go index 313087079..b1cfcae92 100644 --- a/internal/server/huma_routes_sessions.go +++ b/internal/server/huma_routes_sessions.go @@ -884,7 +884,21 @@ func (s *Server) humaUploadSession( return nil, apiError(http.StatusInternalServerError, "failed to save upload") } defer func() { _ = os.RemoveAll(upload.tempDir) }() - results, err := parser.ParseClaudeSession(upload.tempPath, project, machine) + provider, ok := parser.NewProvider( + parser.AgentClaude, parser.ProviderConfig{Machine: machine}, + ) + if !ok { + return nil, apiError(http.StatusInternalServerError, + "claude provider unavailable") + } + uploader, ok := provider.(parser.ClaudeUploadParser) + if !ok { + return nil, apiError(http.StatusInternalServerError, + "claude provider does not support uploads") + } + results, err := uploader.ParseUploadedTranscript( + upload.tempPath, project, machine, + ) if err != nil { return nil, apiError(http.StatusBadRequest, fmt.Sprintf("parsing session: %v", err)) @@ -892,7 +906,6 @@ func (s *Server) humaUploadSession( if len(results) == 0 { return nil, apiError(http.StatusBadRequest, "no sessions parsed from upload") } - parser.InferRelationshipTypes(results) for i := range results { results[i].Session.File.Path = upload.finalPath } diff --git a/internal/ssh/resolve.go b/internal/ssh/resolve.go index 9d072e7af..7b168120e 100644 --- a/internal/ssh/resolve.go +++ b/internal/ssh/resolve.go @@ -67,14 +67,15 @@ func buildAiderResolveSnippet(envVar string) string { // "agentType:path\n" per agent target, plus "@file:path\n" lines for sibling // metadata files such as Codex's session_index.jsonl. // -// Only includes agents where FileBased is true and DiscoverFunc -// is non-nil. For each agent with an EnvVar, the script checks -// the env var first and falls back to the default dir. Dirs (and +// Only includes file-based agents that have on-disk sources to +// resolve: either a legacy DiscoverFunc or a provider facade that has +// left legacy-only mode. For each agent with an EnvVar, the script +// checks the env var first and falls back to the default dir. Dirs (and // files) that don't exist on the remote are skipped. func buildResolveScript() string { var b strings.Builder for _, def := range parser.Registry { - if !def.FileBased || def.DiscoverFunc == nil { + if !resolveAgentHasOnDiskSource(def) { continue } if def.Type == parser.AgentAider { @@ -153,6 +154,28 @@ func remoteDefaultRootTail(rel string) string { return "" } +// resolveAgentHasOnDiskSource reports whether a file-based agent has +// on-disk sources the resolve script should probe: either a legacy +// DiscoverFunc or a provider facade that has left legacy-only mode. +// Provider-migrated agents drop their DiscoverFunc but still have a +// configurable directory, so they must stay in the remote resolve set. +func resolveAgentHasOnDiskSource(def parser.AgentDef) bool { + if !def.FileBased { + return false + } + if def.DiscoverFunc != nil { + return true + } + switch parser.ProviderMigrationModes()[def.Type] { + case parser.ProviderMigrationShadowCompare, + parser.ProviderMigrationProviderAuthoritative: + _, ok := parser.ProviderFactoryByType(def.Type) + return ok + default: + return false + } +} + // parseResolvedDirs parses script output into a map of agent type to transfer // target paths plus a deduplicated list of extra files (records tagged with // resolveFilePrefix). Generated resolver output is NUL-delimited so remote diff --git a/internal/sync/classify_openhands_test.go b/internal/sync/classify_openhands_test.go index a23e4e563..580319fd0 100644 --- a/internal/sync/classify_openhands_test.go +++ b/internal/sync/classify_openhands_test.go @@ -34,11 +34,15 @@ func TestClassifyOnePath_OpenHands(t *testing.T) { )) eng := &Engine{ + db: openTestDB(t), agentDirs: map[parser.AgentType][]string{ parser.AgentOpenHands: {root}, }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentOpenHands: parser.ProviderMigrationProviderAuthoritative, + }, } - geminiMap := make(map[string]map[string]string) tests := []struct { name string @@ -80,12 +84,15 @@ func TestClassifyOnePath_OpenHands(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) - assert.Equal(t, tt.want, ok) - if ok { - assert.Equal(t, parser.AgentOpenHands, got.Agent) - assert.Equal(t, tt.retPath, got.Path) + files := eng.classifyPaths([]string{tt.path}) + if !tt.want { + assert.Empty(t, files) + return } + require.Len(t, files, 1) + got := files[0] + assert.Equal(t, parser.AgentOpenHands, got.Agent) + assert.Equal(t, tt.retPath, got.Path) }) } } diff --git a/internal/sync/classify_vibe_test.go b/internal/sync/classify_vibe_test.go deleted file mode 100644 index 5b5cb7602..000000000 --- a/internal/sync/classify_vibe_test.go +++ /dev/null @@ -1,92 +0,0 @@ -package sync - -import ( - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.kenn.io/agentsview/internal/parser" -) - -func TestClassifyOnePath_Vibe(t *testing.T) { - dir := t.TempDir() - sessionDir := "session_20260616_083518_0107f266" - - // Vibe layout: /session__/messages.jsonl. - msgPath := filepath.Join(dir, sessionDir, "messages.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(msgPath), 0o755)) - require.NoError(t, os.WriteFile(msgPath, []byte("{}\n"), 0o644)) - - // A real meta.json sits beside messages.jsonl. Changes to it should - // route back to the sibling messages.jsonl, since title/model/usage - // stats are sourced from meta.json. - metaPath := filepath.Join(dir, sessionDir, "meta.json") - require.NoError(t, os.WriteFile(metaPath, []byte("{}\n"), 0o644)) - - deletedMetaDir := "session_20260616_083519_deleted" - deletedMetaMsgPath := filepath.Join(dir, deletedMetaDir, "messages.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(deletedMetaMsgPath), 0o755)) - require.NoError(t, os.WriteFile(deletedMetaMsgPath, []byte("{}\n"), 0o644)) - deletedMetaPath := filepath.Join(dir, deletedMetaDir, "meta.json") - - // A non-session directory must not classify. - otherPath := filepath.Join(dir, "scratch", "messages.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(otherPath), 0o755)) - require.NoError(t, os.WriteFile(otherPath, []byte("{}\n"), 0o644)) - - eng := &Engine{ - agentDirs: map[parser.AgentType][]string{ - parser.AgentVibe: {dir}, - }, - } - geminiMap := make(map[string]map[string]string) - - tests := []struct { - name string - path string - want bool - wantPath string - wantProject string - }{ - { - name: "messages.jsonl under session dir classifies", - path: msgPath, - want: true, - wantPath: msgPath, - wantProject: sessionDir, - }, - { - name: "messages.jsonl outside session dir ignored", - path: otherPath, - want: false, - }, - { - name: "meta.json routes to sibling messages.jsonl", - path: metaPath, - want: true, - wantPath: msgPath, - wantProject: sessionDir, - }, - { - name: "deleted meta.json routes to sibling messages.jsonl", - path: deletedMetaPath, - want: true, - wantPath: deletedMetaMsgPath, - wantProject: deletedMetaDir, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) - assert.Equal(t, tt.want, ok) - if ok { - assert.Equal(t, parser.AgentVibe, got.Agent) - assert.Equal(t, tt.wantPath, got.Path) - assert.Equal(t, tt.wantProject, got.Project) - } - }) - } -} diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 83e8e3253..e39a3b86d 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -859,7 +859,7 @@ func (e *Engine) expandClaudeDuplicateCandidates( out := files for _, claudeDir := range e.agentDirs[parser.AgentClaude] { - for _, candidate := range parser.DiscoverClaudeProjects(claudeDir) { + for _, candidate := range parser.ClaudeProjectSessionFiles(claudeDir) { sessionID := claudeSessionIDFromPath(candidate.Path) if _, ok := sessionIDs[sessionID]; !ok { continue @@ -907,20 +907,6 @@ func isUnder(dir, path string) (string, bool) { return rel, true } -// findContainingDir returns the first dir from dirs that is a -// parent of path, or "" if none match. -func findContainingDir(dirs []string, path string) string { - for _, d := range dirs { - if d == "" { - continue - } - if _, ok := isUnder(d, path); ok { - return d - } - } - return "" -} - // classifyContainerPath runs the container- and SQLite-style classifiers that // resolve a path whether or not it currently exists on disk (Kiro, Zed, // Shelley, and Vibe). Split out of classifyOnePath to keep that function @@ -937,9 +923,6 @@ func (e *Engine) classifyContainerPath( if df, ok := e.classifyShelleySQLitePath(path); ok { return df, true } - if df, ok := e.classifyVibePath(path); ok { - return df, true - } return parser.DiscoveredFile{}, false } @@ -973,66 +956,12 @@ func (e *Engine) classifyOnePath( return df, true } - // Claude: //.jsonl - // or: ///subagents/**/agent-.jsonl - for _, claudeDir := range e.agentDirs[parser.AgentClaude] { - if claudeDir == "" { - continue - } - if rel, ok := isUnder(claudeDir, path); ok { - if !strings.HasSuffix(path, ".jsonl") { - continue - } - parts := strings.Split(rel, sep) - - // Standard session: project/session.jsonl - if len(parts) == 2 { - stem := strings.TrimSuffix( - filepath.Base(path), ".jsonl", - ) - if strings.HasPrefix(stem, "agent-") { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentClaude, - }, true - } - - // Subagent: project/session/subagents/**/agent-*.jsonl - if len(parts) >= 4 && parts[2] == "subagents" { - stem := strings.TrimSuffix( - parts[len(parts)-1], ".jsonl", - ) - if !strings.HasPrefix(stem, "agent-") { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentClaude, - }, true - } - } - } - - // Cowork: ///local_/.claude/ - // projects//.jsonl (transcript), or the sibling - // local_.json metadata file (resolves to its transcript). - for _, coworkDir := range e.agentDirs[parser.AgentCowork] { - if coworkDir == "" { - continue - } - if transcript, ok := parser.ClassifyCoworkPath( - coworkDir, path, - ); ok { - return parser.DiscoveredFile{ - Path: transcript, - Agent: parser.AgentCowork, - }, true - } - } + // Claude change-path classification is provider-authoritative; the + // Claude provider's SourcesForChangedPath reproduces the + // //.jsonl and + // ///subagents/**/agent-.jsonl + // shapes, so the legacy block was removed when Claude was folded + // onto its provider. // Codex: either ////.jsonl // or /.jsonl for archived sessions. @@ -1139,62 +1068,6 @@ func (e *Engine) classifyOnePath( } } - // OpenHands CLI: - // //base_state.json - // //TASKS.json - // //events/*.json - for _, openHandsDir := range e.agentDirs[parser.AgentOpenHands] { - if openHandsDir == "" { - continue - } - if rel, ok := isUnder(openHandsDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) < 2 || !parser.IsValidSessionID(parts[0]) { - continue - } - switch { - case len(parts) == 2 && - (parts[1] == "base_state.json" || - parts[1] == "TASKS.json"): - case len(parts) == 3 && - parts[1] == "events" && - strings.HasSuffix(parts[2], ".json"): - default: - continue - } - return parser.DiscoveredFile{ - Path: filepath.Join( - openHandsDir, parts[0], - ), - Agent: parser.AgentOpenHands, - }, true - } - } - - // Cursor: - // //agent-transcripts/.{txt,jsonl} - // //agent-transcripts//.{txt,jsonl} - for _, cursorDir := range e.agentDirs[parser.AgentCursor] { - if cursorDir == "" { - continue - } - if rel, ok := isUnder(cursorDir, path); ok { - projectDir, ok := parser.ParseCursorTranscriptRelPath(rel) - if !ok { - continue - } - project := parser.DecodeCursorProjectDir(projectDir) - if project == "" { - project = "unknown" - } - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentCursor, - }, true - } - } - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { @@ -1411,55 +1284,6 @@ func (e *Engine) classifyAiderPath( return parser.DiscoveredFile{}, false } -// classifyVibePath handles Vibe's session directory layout: -// -// /session__/messages.jsonl -// /session__/meta.json -// -// meta.json changes route back to messages.jsonl because title, model, -// timestamps, and usage stats are sourced from the sidecar metadata file. -func (e *Engine) classifyVibePath( - path string, -) (parser.DiscoveredFile, bool) { - sep := string(filepath.Separator) - for _, vibeDir := range e.agentDirs[parser.AgentVibe] { - if vibeDir == "" { - continue - } - rel, ok := isUnder(vibeDir, path) - if !ok { - continue - } - parts := strings.Split(rel, sep) - if len(parts) != 2 || !strings.HasPrefix(parts[0], "session_") { - continue - } - switch parts[1] { - case "messages.jsonl": - if _, err := os.Stat(path); err != nil { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentVibe, - }, true - case "meta.json": - messagesPath := filepath.Join( - vibeDir, parts[0], "messages.jsonl", - ) - if _, err := os.Stat(messagesPath); err == nil { - return parser.DiscoveredFile{ - Path: messagesPath, - Project: parts[0], - Agent: parser.AgentVibe, - }, true - } - } - } - return parser.DiscoveredFile{}, false -} - // classifyAntigravitySidecarPath maps Antigravity sidecar events -- // IDE annotations/.pbtxt plus IDE and CLI brain//* artifacts // -- to every session source file that renders them. A CLI storage @@ -4039,13 +3863,6 @@ func (e *Engine) processFile( // Capture mtime once from the initial stat so all // downstream cache operations use a consistent value. mtime := info.ModTime().UnixNano() - if file.Agent == parser.AgentOpenHands { - snapshot, err := parser.OpenHandsSnapshot(file.Path) - if err != nil { - return processResult{err: err} - } - mtime = snapshot.Mtime - } if file.Agent == parser.AgentCowork { mtime = parser.CoworkSessionMtime(file.Path, mtime) } @@ -4092,13 +3909,10 @@ func (e *Engine) processFile( var res processResult switch file.Agent { case parser.AgentClaude: - if strings.HasPrefix(file.Path, "s3://") { - res = e.processS3Session(ctx, file, info) - } else { - res = e.processClaude(ctx, file, info) - } - case parser.AgentCowork: - res = e.processCowork(file, info) + // Non-S3 Claude is provider-authoritative and handled earlier by + // processProviderFile; only s3:// Claude sources fall through to the + // legacy dispatch, via the S3 sync path. + res = e.processS3Session(ctx, file, info) case parser.AgentCodex: if strings.HasPrefix(file.Path, "s3://") { res = e.processS3Session(ctx, file, info) @@ -4111,10 +3925,6 @@ func (e *Engine) processFile( res = e.processReasonix(file, info) case parser.AgentGemini: res = e.processGemini(file, info) - case parser.AgentOpenHands: - res = e.processOpenHands(file, info) - case parser.AgentCursor: - res = e.processCursor(file, info) case parser.AgentVSCodeCopilot: res = e.processVSCodeCopilot(file, info) case parser.AgentVSCopilot: @@ -4123,10 +3933,6 @@ func (e *Engine) processFile( res = e.processKiro(file, info) case parser.AgentKiroIDE: res = e.processKiroIDE(file, info) - case parser.AgentHermes: - res = e.processHermes(file, info) - case parser.AgentVibe: - res = e.processVibe(file, info) case parser.AgentPositron: res = e.processPositron(file, info) case parser.AgentZed: @@ -4192,6 +3998,12 @@ func (e *Engine) processProviderFile( if mode != parser.ProviderMigrationProviderAuthoritative { return processResult{}, false } + // S3 sources are not provider-owned: the provider source sets read local + // files, so s3:// paths use the legacy S3 sync path (processS3Session), + // which handles object fetch, fingerprinting, and per-agent skip logic. + if strings.HasPrefix(file.Path, "s3://") { + return processResult{}, false + } if file.ProviderSource != nil && !file.ProviderProcess { return processResult{}, false } @@ -4221,6 +4033,36 @@ func (e *Engine) processProviderFile( }, true } + // SyncSingleSession resolves a single session by ID and carries the + // caller-preferred project (typically the DB-preserved value, so a + // user override is not reverted) on file.Project without an explicit + // ProviderSource. Provider FindSource re-derives ProjectHint from the + // path, so honor the caller's project as the hint in that case. Full + // discovery and changed-path classification always supply + // file.ProviderSource, whose ProjectHint stays authoritative. + if file.ProviderSource == nil && file.Project != "" { + source.ProjectHint = file.Project + } + + // DB-freshness skip for single-session JSONL providers (Claude): + // when the stored session's size, mtime, and data version already + // match the source and its project does not need reparse, skip the + // parse entirely. This reproduces the legacy process arm's + // shouldSkipFile gate so an unchanged session is not re-parsed on + // every full sync. + if mtime, fresh := e.providerSingleSessionFresh(ctx, provider, source, file); fresh { + return processResult{ + skip: true, + mtime: mtime, + }, true + } + if freshMtime, fresh := e.providerCoworkSourceFresh(source, file); fresh { + return processResult{ + skip: true, + mtime: freshMtime, + }, true + } + fingerprint, err := provider.Fingerprint(ctx, source) if err != nil { return processResult{err: err}, true @@ -4241,6 +4083,41 @@ func (e *Engine) processProviderFile( } } + // Append-only incremental parse for already-synced JSONL files. + // When the incremental path declines but signals forceReplace, + // carry the flag onto the full parse so the write path replaces + // stored messages instead of appending on top of stale rows. + incRes, incOK := e.tryProviderIncrementalAppend( + ctx, provider, source, file, fingerprint, + ) + if incOK { + incRes.mtime = fingerprint.MTimeNS + incRes.cacheSkip = cacheSkip + incRes.cacheKey = cacheKey + return incRes, true + } + incForceReplace := incRes.forceReplace + + // DB-stored-file-info skip: a session whose persisted file_size/file_mtime + // already match the source fingerprint (and whose data_version is current) + // is unchanged and need not be reparsed. This reproduces the legacy + // shouldSkipByPath behavior the per-agent process methods provided before the + // migration, so a repeat full/periodic sync of an untouched + // provider-authoritative session (OpenHands, Cursor, Hermes, Vibe, ...) + // skips instead of rewriting. It only skips on an exact size+mtime match, so + // a provider whose fingerprint mtime differs from the stored value simply + // reparses, matching the prior behavior. Claude and Cowork have their own + // earlier freshness checks; this is the generic fallback for the rest. + if !e.forceParse && !file.ForceParse && + e.providerSourceUnchangedInDB(source, fingerprint) { + return processResult{ + skip: true, + mtime: fingerprint.MTimeNS, + cacheSkip: cacheSkip, + cacheKey: cacheKey, + }, true + } + outcome, err := provider.Parse(ctx, parser.ParseRequest{ Source: source, Fingerprint: fingerprint, @@ -4288,9 +4165,15 @@ func (e *Engine) processProviderFile( cacheSkip: cacheSkip, cacheKey: cacheKey, noCacheSkip: !cleanCache, - forceReplace: outcome.ForceReplace, + forceReplace: outcome.ForceReplace || incForceReplace, suppressPresenceSweep: !outcome.ResultSetComplete, } + // Incremental-append providers (Claude) need the stored file + // identity so a later sync can detect an atomic file replacement + // (new inode/device) and fall back to a full parse instead of + // appending on top of stale state. Match the legacy process arm, + // which stamped inode/device from the source file stat. + e.stampProviderFileIdentity(provider, source, res.results) for _, result := range outcome.Results { if result.DataVersion == parser.DataVersionNeedsRetry { if res.retrySessionIDs == nil { @@ -4308,9 +4191,126 @@ func (e *Engine) processProviderFile( }) } } + e.applyProviderFilePathPolicies(provider, file.Agent, &res) return res, true } +// applyProviderFilePathPolicies reproduces the DB-aware, file-path-scoped +// session bookkeeping that a provider cannot do on its own (it has no database +// handle). It runs only for single-session-per-file providers whose canonical +// ID can change while the source path is unchanged (e.g. Vibe, whose ID flips +// between the meta.json session_id and the directory-name fallback as meta.json +// appears or is removed). Multi-session sources are skipped, where several +// distinct sessions legitimately share one path; for stable-ID providers it is +// a no-op because the stored ID always matches the freshly parsed one. +// +// Two policies are applied per result, keyed by the (path-rewritten) file_path: +// +// 1. Resurrection guard: if the user removed the session occupying this path — +// a trashed row at the same path, or an alternate identity for the path +// (the provider's excluded fallback ID, or a stale stored ID) that is now +// trashed or permanently excluded — the freshly parsed row must not be +// written under its new ID. The result is dropped and its ID is excluded. +// 2. Stale-row cleanup: any other live stored ID at the same path that the +// current parse no longer emits is added to the exclusion list so the +// superseded row is deleted. +func (e *Engine) applyProviderFilePathPolicies( + provider parser.Provider, + agent parser.AgentType, + res *processResult, +) { + if provider.Capabilities().Source.MultiSessionSource == parser.CapabilitySupported { + return + } + if len(res.results) == 0 { + return + } + + excluded := make(map[string]struct{}, len(res.excludedSessionIDs)) + for _, id := range e.applyIDPrefixToSessionIDs(res.excludedSessionIDs) { + excluded[id] = struct{}{} + } + addExclusion := func(id string) { + if id == "" { + return + } + if _, ok := excluded[id]; ok { + return + } + excluded[id] = struct{}{} + res.excludedSessionIDs = append(res.excludedSessionIDs, id) + } + + kept := res.results[:0] + for _, result := range res.results { + path := result.Session.File.Path + if path == "" { + kept = append(kept, result) + continue + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + currentID := result.Session.ID + currentPrefixedID := e.idPrefix + result.Session.ID + + existingIDs, err := e.db.ListSessionIDsByFilePath(lookupPath, string(agent)) + if err != nil { + log.Printf("list session IDs by file path: %v", err) + kept = append(kept, result) + continue + } + + // Resurrection guard. The path's identity is removed when a trashed row + // shares it, or when any alternate identity for the path (the + // provider's excluded fallback IDs or a stale stored ID) is trashed or + // permanently excluded. In that case the new row must not be written. + suppress := e.db.HasTrashedSessionByFilePath(lookupPath, string(agent)) + if !suppress { + for id := range excluded { + if id == currentID || id == currentPrefixedID { + continue + } + if e.db.IsSessionExcluded(id) || e.db.IsSessionTrashed(id) { + suppress = true + break + } + } + } + if !suppress { + for _, id := range existingIDs { + if id == currentID || id == currentPrefixedID { + continue + } + if e.db.IsSessionExcluded(id) || e.db.IsSessionTrashed(id) { + suppress = true + break + } + } + } + if suppress { + // Keep a trashed current ID trashed rather than converting it to a + // parser deletion; the upsert's trash guard already hides it. + if (currentPrefixedID == "" || !e.db.IsSessionTrashed(currentPrefixedID)) && + !e.db.IsSessionTrashed(currentID) { + addExclusion(currentID) + } + continue + } + + // Stale-row cleanup for live siblings the current parse supersedes. + for _, id := range existingIDs { + if id == currentID || id == currentPrefixedID { + continue + } + addExclusion(id) + } + kept = append(kept, result) + } + res.results = kept +} + func providerOutcomeAllowsCleanSkipCache(outcome parser.ParseOutcome) bool { if !outcome.ResultSetComplete { return false @@ -4679,13 +4679,10 @@ func (f fakeSnapshotInfo) ModTime() time.Time { func (f fakeSnapshotInfo) IsDir() bool { return false } func (f fakeSnapshotInfo) Sys() any { return nil } -func (e *Engine) processClaude( - ctx context.Context, - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - return e.processClaudeWithStoredSkip(ctx, file, info, true) -} - +// processClaudeWithStoredSkip parses a Claude Code JSONL session from a local +// file. Non-S3 Claude sources are provider-authoritative and never reach here; +// this remains the parse path for s3:// Claude sources, which the S3 sync path +// fetches to a local file and feeds in with allowStoredSkip=false. func (e *Engine) processClaudeWithStoredSkip( ctx context.Context, file parser.DiscoveredFile, info os.FileInfo, @@ -4764,48 +4761,243 @@ func (e *Engine) processClaudeWithStoredSkip( } } -// processCowork parses a Claude Desktop "cowork" (local agent mode) -// session. The transcript is a standard Claude Code JSONL file nested -// inside the cowork session directory, so the work is delegated to the -// Claude parser and rewritten into the cowork namespace by -// parser.ParseCoworkSession. Cowork session IDs are "cowork:"-prefixed, so -// the skip check keys off file_path rather than the bare filename stem. -func (e *Engine) processCowork( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - - // The session title lives in the sibling metadata file, so a rename - // changes only that file. Skip on the composite (transcript+metadata) - // mtime so renames are re-parsed instead of skipped as unchanged. - compositeMtime := parser.CoworkSessionMtime( - file.Path, info.ModTime().UnixNano(), - ) - fi := fakeSnapshotInfo{fSize: info.Size(), fMtime: compositeMtime} - if e.shouldSkipByPath(file.Path, fi) { - return processResult{skip: true} +// providerSingleSessionFresh reports whether a single-session JSONL +// provider's source (Claude) maps to a stored session that is already +// up to date: the source size and mtime match what is stored, the row +// is at the current parser data version, and its project does not need +// reparse. It reproduces the legacy Claude process arm's shouldSkipFile +// gate so an unchanged session is skipped instead of re-parsed every +// full sync. Providers without incremental append, multi-session +// sources, or sources that are not a single physical file are never +// considered fresh here and always fall through to the full parse. +func (e *Engine) providerSingleSessionFresh( + ctx context.Context, + provider parser.Provider, + source parser.SourceRef, + file parser.DiscoveredFile, +) (int64, bool) { + // Match the legacy shouldSkipFile gate, which keyed off the + // engine-wide forceParse (parse-diff) flag only. A per-file + // ForceParse (set by SyncSingleSession to bypass the error skip + // cache) must not defeat the DB-freshness skip: an unchanged session + // is still skipped so a single-session resync does not, for example, + // reapply a worktree project mapping to a file that has not changed. + if e.forceParse { + return 0, false + } + // Claude is the single-physical-file provider that takes the + // append-only incremental path. Its source stem is the session ID, + // so DB freshness can be checked by that ID even though a DAG fork + // can later split the file into several sessions. + if provider.Capabilities().Source.IncrementalAppend != + parser.CapabilitySupported { + return 0, false + } + path := providerDiscoveredPath(source) + if path == "" { + return 0, false + } + sessionID := claudeSessionIDFromPath(path) + if sessionID == "" { + return 0, false + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + info, err := os.Stat(lookupPath) + if err != nil { + info, err = os.Stat(path) + if err != nil { + return 0, false + } } + if !e.shouldSkipFile(sessionID, info) { + return 0, false + } + sess, _ := e.db.GetSession(ctx, e.idPrefix+sessionID) + return info.ModTime().UnixNano(), sess != nil && + sess.Project != "" && + !parser.NeedsProjectReparse(sess.Project) +} - results, excludedIDs, err := parser.ParseCoworkSession( - file.Path, e.machine, - ) +func (e *Engine) providerCoworkSourceFresh( + source parser.SourceRef, + file parser.DiscoveredFile, +) (int64, bool) { + if e.forceParse || file.ForceParse || file.Agent != parser.AgentCowork { + return 0, false + } + path := providerDiscoveredPath(source) + if path == "" { + return 0, false + } + lookupPath := path + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(path) + } + info, err := os.Stat(lookupPath) if err != nil { - return processResult{err: err} + info, err = os.Stat(path) + if err != nil { + return 0, false + } + } + mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if !e.shouldSkipByPath(path, effectiveInfo) { + return 0, false } + return mtime, true +} +// providerSourceUnchangedInDB reports whether a provider source's persisted +// file metadata already matches its current fingerprint, so a reparse would be +// redundant. It compares the stored file_size/file_mtime for the discovered +// path against the fingerprint and requires a current data_version, mirroring +// the legacy shouldSkipByPath gate. It returns false on a missing stored row, an +// empty key, or a non-fingerprint identity (no size and no mtime, e.g. a +// container source), so those callers fall through to a full parse. +func (e *Engine) providerSourceUnchangedInDB( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, +) bool { + if fingerprint.MTimeNS == 0 && fingerprint.Size == 0 { + return false + } + lookupPath := providerDiscoveredPath(source) + if lookupPath == "" { + return false + } + if e.pathRewriter != nil { + lookupPath = e.pathRewriter(lookupPath) + } + storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) + if !ok { + return false + } + if storedSize != fingerprint.Size || storedMtime != fingerprint.MTimeNS { + return false + } + // A stale stored project (e.g. a generated roborev CI worktree name) + // must defeat the unchanged-source skip so the corrected project is + // reparsed, mirroring shouldSkipCodexFingerprint and the in-memory + // skip-cache bypass in processProviderFile. + if project, ok := e.db.GetProjectByPath(lookupPath); ok && + parser.NeedsProjectReparse(project) { + return false + } + return e.db.GetDataVersionByPath(lookupPath) >= db.CurrentDataVersion() +} + +// stampProviderFileIdentity copies the source file's inode and device onto +// every parsed result for an incremental-append provider (Claude). The +// legacy process arm stamped this identity from the source stat so the +// incremental path can later detect an atomic file replacement and fall +// back to a full parse. Providers whose source is not a single physical +// file, or that do not support incremental append, are left untouched. +func (e *Engine) stampProviderFileIdentity( + provider parser.Provider, + source parser.SourceRef, + results []parser.ParseResult, +) { + if provider.Capabilities().Source.IncrementalAppend != + parser.CapabilitySupported { + return + } + path := providerDiscoveredPath(source) + if path == "" { + return + } + info, err := os.Stat(path) + if err != nil { + return + } inode, device := getFileIdentity(info) - hash, hashErr := ComputeFileHash(file.Path) for i := range results { results[i].Session.File.Inode = inode results[i].Session.File.Device = device - if hashErr == nil { - results[i].Session.File.Hash = hash - } } +} - return processResult{ - results: results, - excludedSessionIDs: excludedIDs, +// tryProviderIncrementalAppend reproduces the legacy incremental-append +// sync path for a provider-authoritative agent that supports append-only +// incremental parsing (Claude). The provider owns the byte-offset parse +// via ParseIncremental, but the engine still owns the DB-aware +// bookkeeping (session lookup, data-version and identity guards, ordinal +// resume, cross-sync split detection, and cumulative counters), so this +// drives the shared tryIncrementalJSONL with an adapter that calls the +// provider. Returns (result, true) when the incremental path produced a +// terminal result, or (result, false) to fall through to the full +// provider parse (carrying any forceReplace signal). +func (e *Engine) tryProviderIncrementalAppend( + ctx context.Context, + provider parser.Provider, + source parser.SourceRef, + file parser.DiscoveredFile, + fingerprint parser.SourceFingerprint, +) (processResult, bool) { + // Match the legacy tryIncrementalJSONL gate, which suppressed append + // deltas only under the engine-wide forceParse (parse-diff) flag. A + // per-file ForceParse does not disable incremental append. + if e.forceParse { + return processResult{}, false + } + if provider.Capabilities().Source.IncrementalAppend != + parser.CapabilitySupported { + return processResult{}, false } + path := providerDiscoveredPath(source) + if path == "" { + return processResult{}, false + } + info, err := os.Stat(path) + if err != nil { + return processResult{}, false + } + + parseFn := func( + _ string, offset int64, startOrdinal int, lastEntryUUID string, + ) ([]parser.ParsedMessage, time.Time, int64, error) { + outcome, status, perr := provider.ParseIncremental( + ctx, + parser.IncrementalRequest{ + Source: source, + Fingerprint: fingerprint, + SessionID: e.idPrefix + claudeSessionIDFromPath(path), + Offset: offset, + StartOrdinal: startOrdinal, + Machine: e.machine, + LastEntryUUID: lastEntryUUID, + }, + ) + if perr != nil { + return nil, time.Time{}, 0, perr + } + switch status { + case parser.IncrementalNeedsFullParse: + if outcome.ForceReplace { + // Signal the shared helper to fall back to a + // full parse that replaces stored messages. + return nil, time.Time{}, 0, + parser.ErrClaudeIncrementalNeedsFullParse + } + // A plain full-parse fallback (e.g. DAG detected): + // return a non-fallback error so the helper runs a + // normal full parse without forceReplace. + return nil, time.Time{}, 0, parser.ErrDAGDetected + case parser.IncrementalNoNewData: + return nil, time.Time{}, 0, nil + default: + return outcome.Messages, outcome.EndedAt, + outcome.ConsumedBytes, nil + } + } + + return e.tryIncrementalJSONL(file, info, file.Agent, parseFn) } // incrementalParseFunc reads new JSONL lines from a file @@ -4866,9 +5058,6 @@ func (e *Engine) tryIncrementalJSONL( } currentSize := info.Size() - if currentSize <= inc.FileSize { - return processResult{}, false - } // A prior sync that stored no message rows has no safe append // boundary. Rewritten files can grow in place and keep the same @@ -4895,9 +5084,23 @@ func (e *Engine) tryIncrementalJSONL( inc.FileInode, curInode, inc.FileDevice, curDevice, ) - return processResult{}, false + return processResult{forceReplace: true}, false } } + if currentSize < inc.FileSize { + log.Printf( + "incremental %s %s: file truncated from %d to %d, full parse", + agent, file.Path, inc.FileSize, currentSize, + ) + return processResult{forceReplace: true}, false + } + if currentSize == inc.FileSize { + log.Printf( + "incremental %s %s: file size unchanged at %d but changed since last sync, full parse", + agent, file.Path, currentSize, + ) + return processResult{forceReplace: true}, false + } // Persist the same effective file_mtime a full parse would store. For // Codex that folds in session_index.jsonl (parser.CodexEffectiveMtime), @@ -5968,139 +6171,6 @@ func (e *Engine) processKiroIDE( } } -func (e *Engine) processHermes( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - if filepath.Base(file.Path) == "state.db" { - results, err := parser.ParseHermesArchive( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - return processResult{results: results, forceReplace: true} - } - - sess, msgs, err := parser.ParseHermesSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - -func (e *Engine) processVibe( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Title/model/usage stats come from the sibling meta.json, so the - // skip check and stored file info must account for it too, or a - // meta.json-only update never refreshes those fields. - effectiveInfo := vibeEffectiveInfo(file.Path, info) - if e.shouldSkipByPath(file.Path, effectiveInfo) { - return processResult{skip: true} - } - - // Pass an empty project so the parser-derived project (from the - // session's working directory) is kept. file.Project holds the - // cryptic session directory name, which must not become the project. - sess, msgs, usageEvents, err := parser.ParseVibeSessionWrapper( - file.Path, "", e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - sess.File.Size = effectiveInfo.Size() - sess.File.Mtime = effectiveInfo.ModTime().UnixNano() - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - var excludedIDs []string - lookupPath := file.Path - if e.pathRewriter != nil { - lookupPath = e.pathRewriter(file.Path) - } - existingIDs, err := e.db.ListSessionIDsByFilePath( - lookupPath, string(parser.AgentVibe), - ) - if err != nil { - return processResult{err: err} - } - currentID := sess.ID - currentPrefixedID := e.idPrefix + sess.ID - fallbackID := "vibe:" + filepath.Base(filepath.Dir(file.Path)) - for _, id := range existingIDs { - if id != currentID && id != currentPrefixedID { - excludedIDs = append(excludedIDs, id) - } - } - - currentFallbackTrashed := sess.ID == fallbackID && e.isSessionTrashed(fallbackID) - if e.isSessionBlocked(fallbackID) || - (sess.ID == fallbackID && - e.db.HasTrashedSessionByFilePath(lookupPath, string(parser.AgentVibe))) { - if !currentFallbackTrashed && !slices.Contains(excludedIDs, sess.ID) { - excludedIDs = append(excludedIDs, sess.ID) - } - return processResult{excludedSessionIDs: excludedIDs} - } - - // Sessions parsed before meta.json existed (or was parseable) are stored - // under the directory-name fallback ID. Keep excluding that legacy row even - // if it predates file_path metadata and did not appear in the path lookup. - if sess.ID != fallbackID && !slices.Contains(excludedIDs, fallbackID) { - excludedIDs = append(excludedIDs, fallbackID) - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs, UsageEvents: usageEvents}, - }, - excludedSessionIDs: excludedIDs, - } -} - -func (e *Engine) isSessionBlocked(id string) bool { - if e.idPrefix != "" && !strings.HasPrefix(id, e.idPrefix) { - prefixed := e.idPrefix + id - return e.db.IsSessionExcluded(prefixed) || e.db.IsSessionTrashed(prefixed) - } - if e.db.IsSessionExcluded(id) || e.db.IsSessionTrashed(id) { - return true - } - return false -} - -func (e *Engine) isSessionTrashed(id string) bool { - if e.idPrefix != "" && !strings.HasPrefix(id, e.idPrefix) { - return e.db.IsSessionTrashed(e.idPrefix + id) - } - return e.db.IsSessionTrashed(id) -} - // vibeEffectiveInfo returns size/mtime for a Vibe session that account // for the sibling meta.json file: size is the sum of both files, and // mtime is the larger of the two. Returns info unchanged when meta.json @@ -6332,93 +6402,6 @@ func (e *Engine) processAntigravityCLI( } } -func (e *Engine) processOpenHands( - file parser.DiscoveredFile, _ os.FileInfo, -) processResult { - snapshot, err := parser.OpenHandsSnapshot(file.Path) - if err != nil { - return processResult{err: err} - } - - fi := fakeSnapshotInfo{ - fSize: snapshot.Size, fMtime: snapshot.Mtime, - } - if e.shouldSkipByPath(file.Path, fi) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseOpenHandsSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - -func (e *Engine) processCursor( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Skip .txt if a sibling .jsonl exists — .jsonl is the - // richer format and takes precedence. - if stem, ok := strings.CutSuffix(file.Path, ".txt"); ok { - if parser.IsRegularFile(stem + ".jsonl") { - return processResult{skip: true} - } - } - - sessionID := parser.CursorSessionID(file.Path) - - if e.shouldSkipFile(sessionID, info) { - return processResult{skip: true} - } - - // Re-validate containment immediately before parsing to - // close the TOCTOU window between discovery and read. - // The parser opens with O_NOFOLLOW (rejecting symlinked - // final components), and this check catches parent - // directory swaps. - if root := findContainingDir( - e.agentDirs[parser.AgentCursor], file.Path, - ); root != "" { - if err := validateCursorContainment( - root, file.Path, - ); err != nil { - return processResult{ - err: fmt.Errorf( - "containment check: %w", err, - ), - } - } - } - - sess, msgs, err := parser.ParseCursorSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - // Hash is computed inside ParseCursorSession from the - // already-read data to avoid re-opening the file by path. - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func commandCodeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { size := info.Size() mtime := info.ModTime().UnixNano() @@ -6432,31 +6415,6 @@ func commandCodeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -// validateCursorContainment re-resolves both root and path -// to verify the file still resides within the cursor projects -// directory. Returns an error if containment fails. -func validateCursorContainment( - cursorDir, path string, -) error { - resolvedRoot, err := filepath.EvalSymlinks(cursorDir) - if err != nil { - return fmt.Errorf("resolve root: %w", err) - } - resolvedPath, err := filepath.EvalSymlinks(path) - if err != nil { - return fmt.Errorf("resolve path: %w", err) - } - rel, err := filepath.Rel(resolvedRoot, resolvedPath) - sep := string(filepath.Separator) - if err != nil || rel == ".." || - strings.HasPrefix(rel, ".."+sep) { - return fmt.Errorf( - "%s escapes %s", path, cursorDir, - ) - } - return nil -} - // computeFinalStreak counts trailing consecutive failures // from the end of the tool call list. func computeFinalStreak(calls []signals.ToolCallRow) int { @@ -8696,23 +8654,6 @@ func (e *Engine) SyncSingleSessionContext( } return err } - if def.Type == parser.AgentHermes { - hermesProject := "" - if sess, _ := e.db.GetSession(ctx, sessionID); sess != nil && - sess.Project != "" && !parser.NeedsProjectReparse(sess.Project) { - hermesProject = sess.Project - } - ok, err := e.syncSingleHermesArchive( - sessionID, path, hermesProject, - ) - if err != nil { - return err - } - if ok { - return nil - } - } - agent := def.Type // Clear skip cache so explicit re-sync always processes @@ -8901,52 +8842,6 @@ func (e *Engine) SyncSingleSessionContext( return nil } -func (e *Engine) syncSingleHermesArchive( - sessionID, path, project string, -) (bool, error) { - stateDB := "" - if filepath.Base(path) == "state.db" { - stateDB = path - } else if filepath.Base(filepath.Dir(path)) == "sessions" { - candidate := filepath.Join( - filepath.Dir(filepath.Dir(path)), "state.db", - ) - if parser.IsRegularFile(candidate) { - stateDB = candidate - } - } - if stateDB == "" { - return false, nil - } - - results, err := parser.ParseHermesArchive( - stateDB, project, e.machine, - ) - if err != nil { - return true, err - } - for _, pr := range results { - if pr.Session.ID != sessionID { - continue - } - if err := e.writeSessionFull(pendingWrite{ - sess: pr.Session, - msgs: pr.Messages, - usageEvents: pr.UsageEvents, - }); err != nil && !isIntentionalSessionSkip(err) && - !errors.Is(err, errSessionPreserved) { - return true, fmt.Errorf( - "write session %s: %w", pr.Session.ID, err, - ) - } - return true, nil - } - return true, fmt.Errorf( - "session %s not found in Hermes archive %s", - sessionID, stateDB, - ) -} - func (e *Engine) applyWorktreeMappingToSingleSession( sessionID string, ) error { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 132b61347..3bf7bfb49 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -7318,6 +7318,106 @@ func TestIncrementalSync_ClaudeFileReplaced(t *testing.T) { assert.Equal(t, newInfo.Size(), *full.FileSize, "file_size = %v, want %d (full-parse size)", *full.FileSize, newInfo.Size()) } +func TestIncrementalSync_ClaudeTruncatedFileReplacesStoredMessages(t *testing.T) { + env := setupTestEnv(t) + + original := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("first", tsZero), + testjsonl.ClaudeAssistantJSON("stale assistant", tsZeroS5), + ) + path := env.writeClaudeSession( + t, "proj", "truncated-replace.jsonl", original, + ) + env.engine.SyncAll(context.Background(), nil) + assertSessionMessageCount(t, env.db, "truncated-replace", 2) + + replacement := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("replacement", tsZero), + ) + require.Less(t, len(replacement), len(original), "replacement must truncate file") + require.NoError(t, os.WriteFile(path, []byte(replacement), 0o644), "write truncated replacement") + + env.engine.SyncPaths([]string{path}) + + assertSessionMessageCount(t, env.db, "truncated-replace", 1) + msgs := fetchMessages(t, env.db, "truncated-replace") + require.Len(t, msgs, 1) + assert.Equal(t, "replacement", msgs[0].Content) +} + +func TestIncrementalSync_ClaudeSameSizeFileReplaceUsesFullParse(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("identity tracking is a no-op on Windows") + } + env := setupTestEnv(t) + + original := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("first", tsZero), + testjsonl.ClaudeAssistantJSON("alpha", tsZeroS5), + ) + path := env.writeClaudeSession( + t, "proj", "same-size-replace.jsonl", original, + ) + env.engine.SyncAll(context.Background(), nil) + + replacement := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("third", tsZero), + testjsonl.ClaudeAssistantJSON("bravo", tsZeroS5), + ) + require.Len(t, replacement, len(original), "replacement fixture must keep same byte size") + tmp := path + ".tmp" + require.NoError(t, os.WriteFile(tmp, []byte(replacement), 0o644), "write replacement") + require.NoError(t, os.Rename(tmp, path), "rename replacement") + + env.engine.SyncPaths([]string{path}) + + msgs := fetchMessages(t, env.db, "same-size-replace") + require.Len(t, msgs, 2) + assert.Equal(t, "third", msgs[0].Content) + assert.Equal(t, "bravo", msgs[1].Content) +} + +func TestIncrementalSync_ClaudeSameSizeInPlaceRewriteClearsStaleRows(t *testing.T) { + env := setupTestEnv(t) + + original := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON("first", tsZero), + testjsonl.ClaudeAssistantJSON("stale assistant", tsZeroS5), + ) + path := env.writeClaudeSession( + t, "proj", "same-size-in-place.jsonl", original, + ) + env.engine.SyncAll(context.Background(), nil) + assertSessionMessageCount(t, env.db, "same-size-in-place", 2) + + replacement := "" + for padding := range 4096 { + candidate := testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON( + "replacement"+strings.Repeat("x", padding), + tsZero, + ), + ) + if len(candidate) == len(original) { + replacement = candidate + break + } + } + require.NotEmpty(t, replacement, "failed to build same-size replacement fixture") + require.Len(t, replacement, len(original), "replacement fixture must keep same byte size") + + require.NoError(t, os.WriteFile(path, []byte(replacement), 0o644), "write in-place replacement") + now := time.Now().Add(time.Second) + require.NoError(t, os.Chtimes(path, now, now), "bump replacement mtime") + + env.engine.SyncPaths([]string{path}) + + assertSessionMessageCount(t, env.db, "same-size-in-place", 1) + msgs := fetchMessages(t, env.db, "same-size-in-place") + require.Len(t, msgs, 1) + assert.Contains(t, msgs[0].Content, "replacement") +} + // TestIncrementalSync_ClaudeMidStreamSplitFallsBackToFullParse covers // the cross-sync split case: the first sync stores a partial assistant // snapshot (one of several streaming snapshots) and the next sync diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index 7b3538587..c50a01440 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -1225,10 +1225,15 @@ func TestProcessAntigravityWALOnlyUpdateNotSkipped(t *testing.T) { func TestProcessVibeMetaOnlyUpdateNotSkipped(t *testing.T) { database := openTestDB(t) - e := &Engine{db: database} ctx := context.Background() root := t.TempDir() + e := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentVibe: {root}, + }, + }) + sessionDir := filepath.Join(root, "session_20260616_083518_0107f266") require.NoError(t, os.MkdirAll(sessionDir, 0o755)) @@ -1246,32 +1251,19 @@ func TestProcessVibeMetaOnlyUpdateNotSkipped(t *testing.T) { 0o644, )) - file := parser.DiscoveredFile{ - Agent: parser.AgentVibe, - Path: msgPath, - } - - res := e.processFile(ctx, file) - require.NoError(t, res.err) - require.False(t, res.skip) - require.Len(t, res.results, 1) - require.Equal(t, "Original title", res.results[0].Session.SessionName) + canonicalID := "vibe:abc" - pw := pendingWrite{ - sess: res.results[0].Session, - msgs: res.results[0].Messages, - } - written, _, failed := e.writeBatch( - []pendingWrite{pw}, syncWriteDefault, false, - ) - require.Equal(t, 0, failed) - require.Equal(t, 1, written) - - res = e.processFile(ctx, file) - require.True(t, res.skip, "unchanged session should skip") + e.SyncPaths([]string{msgPath}) + sess, err := database.GetSession(ctx, canonicalID) + require.NoError(t, err) + require.NotNil(t, sess) + require.NotNil(t, sess.DisplayName) + assert.Equal(t, "Original title", *sess.DisplayName) // meta.json-only update: messages.jsonl is untouched, but the title - // (sourced from meta.json) changes. + // (sourced from meta.json) changes. The Vibe provider's composite + // fingerprint folds the sibling meta.json mtime in, so the change busts + // the skip cache and triggers a reparse rather than a skip. info, err := os.Stat(msgPath) require.NoError(t, err) metaTime := info.ModTime().Add(5 * time.Second) @@ -1282,10 +1274,12 @@ func TestProcessVibeMetaOnlyUpdateNotSkipped(t *testing.T) { )) require.NoError(t, os.Chtimes(metaPath, metaTime, metaTime)) - res = e.processFile(ctx, file) - require.False(t, res.skip, "meta.json-only update must trigger a reparse") - require.Len(t, res.results, 1) - assert.Equal(t, "Renamed title", res.results[0].Session.SessionName) + e.SyncPaths([]string{msgPath}) + sess, err = database.GetSession(ctx, canonicalID) + require.NoError(t, err) + require.NotNil(t, sess) + require.NotNil(t, sess.DisplayName) + assert.Equal(t, "Renamed title", *sess.DisplayName) } func TestProcessAntigravityBrainOnlyUpdateNotSkipped(t *testing.T) { @@ -2776,10 +2770,14 @@ func TestEngine_ClassifyOnePathClaudeStatPermissionErrorStillClassifies( _ = os.Chmod(projectDir, 0o755) }() - got, ok := engine.classifyOnePath(path, nil) - require.True(t, ok, "expected path to classify despite stat permission error") - assert.Equal(t, path, got.Path) - assert.Equal(t, parser.AgentClaude, got.Agent) + // Claude is provider-authoritative, so classification flows through + // the provider's changed-path handling rather than the legacy + // classifyOnePath Claude block. A transient stat-permission error + // must still classify the path by shape so the change is not dropped. + files := engine.classifyPaths([]string{path}) + require.Len(t, files, 1, "expected path to classify despite stat permission error") + assert.Equal(t, path, files[0].Path) + assert.Equal(t, parser.AgentClaude, files[0].Agent) } func TestEngine_ClassifyPathsDedupesOpenCodeChildPaths(t *testing.T) { diff --git a/internal/sync/hermes_archive_test.go b/internal/sync/hermes_archive_test.go new file mode 100644 index 000000000..58026330c --- /dev/null +++ b/internal/sync/hermes_archive_test.go @@ -0,0 +1,318 @@ +package sync + +import ( + "context" + "database/sql" + "os" + "path/filepath" + "testing" + "time" + + _ "github.com/mattn/go-sqlite3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" +) + +// hermesArchiveAggregateFileInfo mirrors the legacy engine helper +// hermesArchiveEffectiveInfo for test assertions: the aggregate size and mtime +// of the state.db plus every transcript directly under its sessions directory. +// The Hermes provider now owns this aggregation; this helper only computes the +// expected values the engine must persist. +func hermesArchiveAggregateFileInfo(t *testing.T, stateDB string) (int64, int64) { + t.Helper() + info, err := os.Stat(stateDB) + require.NoError(t, err) + size := info.Size() + mtime := info.ModTime().UnixNano() + sessionsDir := filepath.Join(filepath.Dir(stateDB), "sessions") + entries, err := os.ReadDir(sessionsDir) + if err != nil { + return size, mtime + } + for _, entry := range entries { + if entry.IsDir() { + continue + } + name := entry.Name() + isJSONL := filepath.Ext(name) == ".jsonl" + isSessionJSON := filepath.Ext(name) == ".json" && + len(name) >= len("session_") && name[:len("session_")] == "session_" + if !isJSONL && !isSessionJSON { + continue + } + fileInfo, err := os.Stat(filepath.Join(sessionsDir, name)) + if err != nil || fileInfo.IsDir() { + continue + } + size += fileInfo.Size() + if fileMtime := fileInfo.ModTime().UnixNano(); fileMtime > mtime { + mtime = fileMtime + } + } + return size, mtime +} + +// TestHermesProviderFingerprintAggregatesDirectTranscripts confirms the +// provider-owned archive fingerprint folds the size and mtime of transcripts +// living directly under the sessions directory into the state.db's freshness +// identity, replacing the engine's removed hermesArchiveEffectiveInfo. +func TestHermesProviderFingerprintAggregatesDirectTranscripts(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile(transcriptPath, []byte("{}\n{}\n"), 0o644)) + + transcriptTime := time.Now().Add(2 * time.Second).Truncate(time.Second) + require.NoError(t, os.Chtimes(transcriptPath, transcriptTime, transcriptTime)) + + wantSize, wantMtime := hermesArchiveAggregateFileInfo(t, stateDB) + + provider, ok := parser.NewProvider(parser.AgentHermes, parser.ProviderConfig{ + Roots: []string{filepath.Join(root, "sessions")}, + Machine: "local", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + assert.Equal(t, wantSize, fingerprint.Size) + assert.Equal(t, wantMtime, fingerprint.MTimeNS) +} + +// TestHermesProviderFingerprintChangesWhenTranscriptRemoved confirms the +// archive fingerprint shrinks back to the state.db's own size when a direct +// transcript is removed, replacing the engine's removed effective-info logic. +func TestHermesProviderFingerprintChangesWhenTranscriptRemoved(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile(transcriptPath, []byte("{}\n{}\n"), 0o644)) + + provider, ok := parser.NewProvider(parser.AgentHermes, parser.ProviderConfig{ + Roots: []string{filepath.Join(root, "sessions")}, + Machine: "local", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + before, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + require.NoError(t, os.Remove(transcriptPath)) + after, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + stateInfo, err := os.Stat(stateDB) + require.NoError(t, err) + assert.NotEqual(t, before.Size, after.Size) + assert.Equal(t, stateInfo.Size(), after.Size) +} + +// TestProcessFileHermesArchiveSkipCacheUsesAggregateMtime confirms the +// provider-authoritative processFile path keys the skip cache on the aggregate +// archive mtime (state.db plus direct transcripts), so a cached entry stamped +// with that mtime short-circuits a reparse. +func TestProcessFileHermesArchiveSkipCacheUsesAggregateMtime(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile(transcriptPath, []byte("{}\n"), 0o644)) + transcriptTime := time.Now().Add(2 * time.Second).Truncate(time.Second) + require.NoError(t, os.Chtimes(transcriptPath, transcriptTime, transcriptTime)) + + _, wantMtime := hermesArchiveAggregateFileInfo(t, stateDB) + + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + engine.InjectSkipCache(map[string]int64{ + stateDB: wantMtime, + }) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: stateDB, + Agent: parser.AgentHermes, + }) + + require.NoError(t, res.err) + assert.True(t, res.skip) + assert.True(t, res.cacheSkip) + assert.Equal(t, wantMtime, res.mtime) +} + +// TestProcessFileHermesArchivePersistsAggregateFingerprint confirms the +// provider-authoritative processFile path stamps every archive session with the +// state.db path and the aggregate size and mtime, and that a second pass skips +// once the file info is persisted. This replaces the removed +// processHermes-based assertions. +func TestProcessFileHermesArchivePersistsAggregateFingerprint(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile( + transcriptPath, + []byte( + `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}`+"\n"+ + `{"role":"user","content":"new transcript","timestamp":"2026-05-14T10:01:00.000000"}`+"\n", + ), + 0o644, + )) + + wantSize, wantMtime := hermesArchiveAggregateFileInfo(t, stateDB) + database := dbtest.OpenTestDB(t) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: stateDB, + Agent: parser.AgentHermes, + }) + + require.NoError(t, res.err) + require.NotEmpty(t, res.results) + for _, result := range res.results { + assert.Equal(t, stateDB, result.Session.File.Path) + assert.Equal(t, wantSize, result.Session.File.Size) + assert.Equal(t, wantMtime, result.Session.File.Mtime) + } + + pending := make([]pendingWrite, 0, len(res.results)) + for _, result := range res.results { + pending = append(pending, pendingWrite{ + sess: result.Session, + msgs: result.Messages, + usageEvents: result.UsageEvents, + }) + } + written, _, failed := engine.writeBatch(pending, syncWriteDefault, true) + require.Equal(t, 0, failed) + require.NotZero(t, written) + + storedSize, storedMtime, ok := database.GetFileInfoByPath(stateDB) + require.True(t, ok) + assert.Equal(t, wantSize, storedSize) + assert.Equal(t, wantMtime, storedMtime) +} + +// TestSyncPathsHermesArchiveTranscriptPersistsAggregateFingerprint confirms that +// syncing a transcript path inside an archive routes through the provider, which +// reparses the whole archive and persists the aggregate file info under the +// state.db path. This replaces the removed syncSingleHermesArchive coverage. +func TestSyncPathsHermesArchiveTranscriptPersistsAggregateFingerprint(t *testing.T) { + root := t.TempDir() + stateDB := writeHermesArchiveStateDB(t, root) + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile( + transcriptPath, + []byte( + `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}`+"\n"+ + `{"role":"user","content":"new transcript","timestamp":"2026-05-14T10:01:00.000000"}`+"\n", + ), + 0o644, + )) + + wantSize, wantMtime := hermesArchiveAggregateFileInfo(t, stateDB) + database := dbtest.OpenTestDB(t) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + + engine.SyncPaths([]string{transcriptPath}) + + storedSize, storedMtime, found := database.GetFileInfoByPath(stateDB) + require.True(t, found) + assert.Equal(t, wantSize, storedSize) + assert.Equal(t, wantMtime, storedMtime) +} + +func writeHermesArchiveStateDB(t *testing.T, root string) string { + t.Helper() + stateDB := filepath.Join(root, "state.db") + conn, err := sql.Open("sqlite3", stateDB) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + _, err = conn.Exec(` + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT NOT NULL, + user_id TEXT, + model TEXT, + model_config TEXT, + system_prompt TEXT, + parent_session_id TEXT, + started_at REAL NOT NULL, + ended_at REAL, + end_reason TEXT, + message_count INTEGER DEFAULT 0, + tool_call_count INTEGER DEFAULT 0, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0, + cache_read_tokens INTEGER DEFAULT 0, + cache_write_tokens INTEGER DEFAULT 0, + reasoning_tokens INTEGER DEFAULT 0, + billing_provider TEXT, + billing_base_url TEXT, + billing_mode TEXT, + estimated_cost_usd REAL, + actual_cost_usd REAL, + cost_status TEXT, + cost_source TEXT, + pricing_version TEXT, + title TEXT, + api_call_count INTEGER DEFAULT 0 + ); + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT, + tool_call_id TEXT, + tool_calls TEXT, + tool_name TEXT, + timestamp REAL NOT NULL, + token_count INTEGER, + finish_reason TEXT, + reasoning TEXT, + reasoning_content TEXT, + reasoning_details TEXT, + codex_reasoning_items TEXT, + codex_message_items TEXT + ); + INSERT INTO sessions ( + id, source, model, started_at, ended_at, message_count + ) VALUES ( + 'child', 'discord', 'gpt-5.4', 1778767200.0, 1778767800.0, 1 + ); + INSERT INTO messages ( + session_id, role, content, timestamp + ) VALUES ( + 'child', 'user', 'state db message', 1778767210.0 + ); + `) + require.NoError(t, err) + return stateDB +} diff --git a/internal/sync/hermes_integration_test.go b/internal/sync/hermes_integration_test.go new file mode 100644 index 000000000..439d6e0b7 --- /dev/null +++ b/internal/sync/hermes_integration_test.go @@ -0,0 +1,152 @@ +package sync_test + +import ( + "database/sql" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/db" + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/sync" +) + +func TestSyncPathsHermesStateDBEventRefreshesArchive(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + root := t.TempDir() + stateDB := writeHermesSyncStateDB(t, root) + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + + engine.SyncPaths([]string{stateDB}) + + assertSessionState(t, database, "hermes:child", func(sess *db.Session) { + assert.Equal(t, string(parser.AgentHermes), sess.Agent) + assert.Equal(t, "hermes-discord", sess.Project) + require.NotNil(t, sess.DisplayName) + assert.Equal(t, "Child Session", *sess.DisplayName) + }) +} + +func TestSyncPathsHermesArchiveTranscriptEventRefreshesArchive(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + root := t.TempDir() + stateDB := writeHermesSyncStateDB(t, root) + database := dbtest.OpenTestDB(t) + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentHermes: {filepath.Join(root, "sessions")}, + }, + Machine: "local", + }) + engine.SyncPaths([]string{stateDB}) + assertSessionState(t, database, "hermes:child", nil) + + transcriptPath := filepath.Join(root, "sessions", "extra.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile( + transcriptPath, + []byte( + `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}`+"\n"+ + `{"role":"user","content":"new transcript","timestamp":"2026-05-14T10:01:00.000000"}`+"\n"+ + `{"role":"assistant","content":"Done.","timestamp":"2026-05-14T10:02:00.000000"}`+"\n", + ), + 0o644, + )) + + engine.SyncPaths([]string{transcriptPath}) + + assertSessionState(t, database, "hermes:extra", func(sess *db.Session) { + require.NotNil(t, sess.FirstMessage) + assert.Equal(t, "new transcript", *sess.FirstMessage) + }) +} + +func writeHermesSyncStateDB(t *testing.T, root string) string { + t.Helper() + stateDB := filepath.Join(root, "state.db") + conn, err := sql.Open("sqlite3", stateDB) + require.NoError(t, err) + t.Cleanup(func() { _ = conn.Close() }) + + _, err = conn.Exec(` + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT NOT NULL, + user_id TEXT, + model TEXT, + model_config TEXT, + system_prompt TEXT, + parent_session_id TEXT, + started_at REAL NOT NULL, + ended_at REAL, + end_reason TEXT, + message_count INTEGER DEFAULT 0, + tool_call_count INTEGER DEFAULT 0, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0, + cache_read_tokens INTEGER DEFAULT 0, + cache_write_tokens INTEGER DEFAULT 0, + reasoning_tokens INTEGER DEFAULT 0, + billing_provider TEXT, + billing_base_url TEXT, + billing_mode TEXT, + estimated_cost_usd REAL, + actual_cost_usd REAL, + cost_status TEXT, + cost_source TEXT, + pricing_version TEXT, + title TEXT, + api_call_count INTEGER DEFAULT 0 + ); + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT, + tool_call_id TEXT, + tool_calls TEXT, + tool_name TEXT, + timestamp REAL NOT NULL, + token_count INTEGER, + finish_reason TEXT, + reasoning TEXT, + reasoning_content TEXT, + reasoning_details TEXT, + codex_reasoning_items TEXT, + codex_message_items TEXT + ); + INSERT INTO sessions ( + id, source, model, parent_session_id, started_at, ended_at, + message_count, input_tokens, output_tokens, cache_read_tokens, + cache_write_tokens, reasoning_tokens, estimated_cost_usd, + cost_status, cost_source, title, api_call_count + ) VALUES ( + 'child', 'discord', 'gpt-5.4', 'parent', + 1778767200.0, 1778767800.0, 1, 300, 70, 20, 5, 9, + 0.123, 'estimated', 'hermes', 'Child Session', 4 + ); + INSERT INTO messages ( + session_id, role, content, timestamp + ) VALUES ( + 'child', 'user', 'state db only has one message', 1778767210.0 + ); + `) + require.NoError(t, err) + return stateDB +} diff --git a/internal/sync/openhands_retry_test.go b/internal/sync/openhands_retry_test.go index 872d0c091..353611c67 100644 --- a/internal/sync/openhands_retry_test.go +++ b/internal/sync/openhands_retry_test.go @@ -39,8 +39,15 @@ func TestProcessFileOpenHandsUsesSnapshotMtimeForRetryCache(t *testing.T) { oldDirMtime := dirInfo.ModTime() engine := &Engine{ - db: dbtest.OpenTestDB(t), - machine: "local", + db: dbtest.OpenTestDB(t), + machine: "local", + agentDirs: map[parser.AgentType][]string{ + parser.AgentOpenHands: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentOpenHands: parser.ProviderMigrationProviderAuthoritative, + }, skipCache: map[string]int64{sessionDir: oldDirMtime.UnixNano()}, } diff --git a/internal/sync/parsediff.go b/internal/sync/parsediff.go index 767898ada..39577484c 100644 --- a/internal/sync/parsediff.go +++ b/internal/sync/parsediff.go @@ -501,7 +501,7 @@ func stripVirtualSourceSuffix(path string) string { // Detecting either makes the source unreliable, so the caller skips the raced // guard entirely. This never masks genuine drift for those agents, while plain // file-based agents reading a literal file still get the real race protection. -func parseDiffSourceReliableForRaced( +func (e *Engine) parseDiffSourceReliableForRaced( agent parser.AgentType, sourcePath string, ) bool { // A virtual path carries a recognized "#..." suffix; stripping changes @@ -510,15 +510,16 @@ func parseDiffSourceReliableForRaced( if stripVirtualSourceSuffix(sourcePath) != sourcePath { return false } - // Only plain file-based agents (FileBased with a DiscoverFunc, the same - // on-disk-source condition resolveParseDiffAgents uses) read a literal - // file whose mtime populated file_mtime. An unknown or DB-backed agent has - // no such basis. + // Only agents with a literal on-disk source -- the same discoverability + // condition resolveParseDiffAgents uses -- read a file whose mtime + // populated file_mtime. parseDiffAgentDiscoverable gates out DB-backed + // (FileBased == false, e.g. Forge) and non-authoritative agents, so an + // unknown or DB-backed agent has no such basis. def, ok := parser.AgentByType(agent) if !ok { return false } - return def.FileBased && def.DiscoverFunc != nil + return e.parseDiffAgentDiscoverable(def) } // parseDiffLiveMtime resolves a session's live source mtime for the raced @@ -725,7 +726,7 @@ func (e *Engine) parseDiffCollectFile( raced := false if realDiffs > 0 && compare && sourceSessionCount[pw.sess.File.Path] == 1 && - parseDiffSourceReliableForRaced(pw.sess.Agent, pw.sess.File.Path) { + e.parseDiffSourceReliableForRaced(pw.sess.Agent, pw.sess.File.Path) { var storedMtime *int64 if stored != nil { storedMtime = stored.FileMtime diff --git a/internal/sync/parsediff_compare_test.go b/internal/sync/parsediff_compare_test.go index 6250f7ec8..42b380abb 100644 --- a/internal/sync/parsediff_compare_test.go +++ b/internal/sync/parsediff_compare_test.go @@ -1747,9 +1747,10 @@ func TestParseDiffSourceReliableForRaced(t *testing.T) { want: false, }, } + engine := NewDiffEngine(dbtest.OpenTestDB(t), EngineConfig{}) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := parseDiffSourceReliableForRaced(tt.agent, tt.path) + got := engine.parseDiffSourceReliableForRaced(tt.agent, tt.path) assert.Equal(t, tt.want, got) }) } diff --git a/internal/sync/provider_freshness_integration_test.go b/internal/sync/provider_freshness_integration_test.go new file mode 100644 index 000000000..6a093df33 --- /dev/null +++ b/internal/sync/provider_freshness_integration_test.go @@ -0,0 +1,53 @@ +package sync_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/sync" +) + +// TestProviderAuthoritativeUnchangedSessionSkipsOnResync verifies that a +// provider-authoritative agent whose source file is unchanged is skipped on a +// second full sync rather than reparsed and rewritten. Before the generic +// providerSourceUnchangedInDB freshness check, only Claude and Cowork had a +// pre-parse DB skip in processProviderFile, so the other migrated agents +// (OpenHands, Cursor, Hermes, Vibe) fell through to provider.Parse + writeBatch +// and rewrote unchanged sessions on every full/periodic sync. Vibe is used as a +// representative of that group. +func TestProviderAuthoritativeUnchangedSessionSkipsOnResync(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + vibeDir := t.TempDir() + testDB := dbtest.OpenTestDB(t) + engine := sync.NewEngine(testDB, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentVibe: {vibeDir}, + }, + Machine: "local", + }) + + sessionID := "abc123def-0000-0000-0000-000000000000" + writeVibeSyncFixture( + t, vibeDir, "session_20260616_083518_abc123", sessionID, "Title", + ) + + ctx := context.Background() + first := engine.SyncAll(ctx, nil) + require.Equal(t, 1, first.Synced, "first sync parses and stores the session") + + // Source files are untouched, so the second full sync must skip the session + // at the DB-freshness check instead of reparsing and rewriting it. + second := engine.SyncAll(ctx, nil) + assert.Equal(t, 0, second.Synced, + "an unchanged provider-authoritative session must not be re-synced") + assert.GreaterOrEqual(t, second.Skipped, 1, + "the unchanged session must be counted as skipped") +} diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index 2f9b81862..a389eac76 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -16,103 +17,6 @@ import ( "go.kenn.io/agentsview/internal/testjsonl" ) -func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-caller.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile( - sourcePath, - []byte(testjsonl.JoinJSONL( - testjsonl.ClaudeUserJSON( - "compare through the caller", - "2026-06-01T10:00:00Z", - "/Users/dev/code/demo", - ), - testjsonl.ClaudeAssistantJSON( - "provider stayed shadow-only", - "2026-06-01T10:01:00Z", - ), - )), - 0o644, - )) - - legacyResults, legacyExcluded, err := parser.ParseClaudeSessionWithExclusions( - sourcePath, "demo", "devbox", - ) - require.NoError(t, err) - require.Len(t, legacyResults, 1) - require.Empty(t, legacyExcluded) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - providerResult := legacyResults[0] - providerResult.Session.File.Inode, providerResult.Session.File.Device = getFileIdentity(info) - hash, err := ComputeFileHash(sourcePath) - require.NoError(t, err) - providerResult.Session.File.Hash = hash - - source := parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - ProjectHint: "demo", - } - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath, - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{{ - Result: providerResult, - DataVersion: parser.DataVersionCurrent, - }}, - ResultSetComplete: true, - }, - }, - source: source, - } - var comparisons []ProviderShadowComparison - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationShadowCompare, - }, - ProviderShadowRecorder: func(comparison ProviderShadowComparison) { - comparisons = append(comparisons, comparison) - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - }) - - require.NoError(t, result.err) - require.Len(t, result.results, 1) - assert.Equal(t, "shadow-caller", result.results[0].Session.ID) - assert.Equal(t, parser.AgentClaude, result.results[0].Session.Agent) - require.Len(t, comparisons, 1) - assert.NoError(t, comparisons[0].Err) - assert.Empty(t, comparisons[0].Mismatches) - assert.Equal(t, sourcePath, comparisons[0].File.Path) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) -} - func TestClassifyProviderChangedPathPassesStoredHintsToShadowProvider( t *testing.T, ) { @@ -237,105 +141,6 @@ func TestClassifyProviderChangedPathRunsAlongsideLegacyClassifier( assert.Equal(t, sourcePath, files[0].ProviderSource.DisplayPath) } -func TestProcessFileShadowUsesChangedPathProviderSource(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-provider-source.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile( - sourcePath, - []byte(testjsonl.JoinJSONL( - testjsonl.ClaudeUserJSON( - "provider source should win", - "2026-06-01T10:00:00Z", - "/Users/dev/code/demo", - ), - testjsonl.ClaudeAssistantJSON( - "force parse should propagate", - "2026-06-01T10:01:00Z", - ), - )), - 0o644, - )) - - legacyResults, legacyExcluded, err := parser.ParseClaudeSessionWithExclusions( - sourcePath, "demo", "devbox", - ) - require.NoError(t, err) - require.Len(t, legacyResults, 1) - require.Empty(t, legacyExcluded) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - providerResult := legacyResults[0] - providerResult.Session.File.Inode, providerResult.Session.File.Device = getFileIdentity(info) - hash, err := ComputeFileHash(sourcePath) - require.NoError(t, err) - providerResult.Session.File.Hash = hash - - changedSource := parser.SourceRef{ - Provider: parser.AgentClaude, - Key: "changed-path-source", - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - ProjectHint: "demo", - } - findFound := false - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath, - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{{ - Result: providerResult, - DataVersion: parser.DataVersionCurrent, - }}, - ResultSetComplete: true, - }, - }, - findFound: &findFound, - } - var comparisons []ProviderShadowComparison - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationShadowCompare, - }, - ProviderShadowRecorder: func(comparison ProviderShadowComparison) { - comparisons = append(comparisons, comparison) - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - ForceParse: true, - ProviderSource: &changedSource, - }) - - require.NoError(t, result.err) - require.Len(t, comparisons, 1) - assert.NoError(t, comparisons[0].Err) - assert.Empty(t, comparisons[0].Mismatches) - assert.Equal(t, changedSource, comparisons[0].Source) - assert.Equal(t, changedSource, provider.parseRequest.Source) - assert.True(t, provider.parseRequest.ForceParse) - assert.Empty(t, provider.findRequest) -} - func TestClassifyProviderChangedPathMarksAuthoritativeProviderProcess( t *testing.T, ) { @@ -566,27 +371,27 @@ func TestProcessFileShadowRecordsCachedSkipAsNotComparable(t *testing.T) { shadowTestProvider: shadowTestProvider{ ProviderBase: parser.ProviderBase{ Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", + Type: parser.AgentCowork, + DisplayName: "Claude Cowork", }, }, }, source: parser.SourceRef{ - Provider: parser.AgentClaude, + Provider: parser.AgentCowork, Key: sourcePath, }, } var comparisons []ProviderShadowComparison engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, + parser.AgentCowork: {root}, }, Machine: "devbox", ProviderFactories: []parser.ProviderFactory{ shadowCallerFactory{provider: provider}, }, ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationShadowCompare, + parser.AgentCowork: parser.ProviderMigrationShadowCompare, }, ProviderShadowRecorder: func(comparison ProviderShadowComparison) { comparisons = append(comparisons, comparison) @@ -598,7 +403,7 @@ func TestProcessFileShadowRecordsCachedSkipAsNotComparable(t *testing.T) { result := engine.processFile(context.Background(), parser.DiscoveredFile{ Path: sourcePath, - Agent: parser.AgentClaude, + Agent: parser.AgentCowork, }) require.True(t, result.skip) @@ -691,6 +496,177 @@ func TestProcessFileProviderAuthoritativeUsesInjectedProvider(t *testing.T) { assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) } +func TestProcessFileProviderAuthoritativeSkipsFreshClaudeBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "fresh.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + ProjectHint: "demo", + } + + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + Caps: parser.Capabilities{ + Source: parser.SourceCapabilities{ + IncrementalAppend: parser.CapabilitySupported, + }, + }, + }, + }, + source: source, + } + database := dbtest.OpenTestDB(t) + filePath := sourcePath + fileSize := info.Size() + fileMtime := info.ModTime().UnixNano() + require.NoError(t, database.UpsertSession(db.Session{ + ID: "fresh", + Project: "demo", + Machine: "devbox", + Agent: string(parser.AgentClaude), + FilePath: &filePath, + FileSize: &fileSize, + FileMtime: &fileMtime, + })) + require.NoError(t, database.SetSessionDataVersion("fresh", db.CurrentDataVersion())) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, fileMtime, result.mtime) + assert.Empty(t, provider.calls) + assert.Equal(t, sourcePath, provider.findRequest.StoredFilePath) +} + +func TestProcessFileProviderAuthoritativeSkipsFreshCoworkBeforeFingerprint(t *testing.T) { + root := t.TempDir() + database := dbtest.OpenTestDB(t) + sourcePath, sourceMtime := writeFreshCoworkProviderSource( + t, root, database, "fresh-session", + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCowork, + DisplayName: "Claude Cowork", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCowork, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCowork: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCowork: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + +func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *testing.T) { + root := t.TempDir() + database := dbtest.OpenTestDB(t) + sourcePath, sourceMtime := writeFreshCoworkProviderSource( + t, root, database, "force-session", + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCowork, + DisplayName: "Claude Cowork", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath, + MTimeNS: sourceMtime, + }, + outcome: parser.ParseOutcome{ + ResultSetComplete: true, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCowork, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCowork: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCowork: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + ForceParse: true, + }) + + require.NoError(t, result.err) + assert.False(t, result.skip) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) + assert.True(t, provider.parseRequest.ForceParse) +} + func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") @@ -1238,3 +1214,46 @@ func (f shadowCallerFactory) Capabilities() parser.Capabilities { func (f shadowCallerFactory) NewProvider(parser.ProviderConfig) parser.Provider { return f.provider } + +func writeFreshCoworkProviderSource( + t *testing.T, + root string, + database *db.DB, + rawSessionID string, +) (string, int64) { + t.Helper() + + sessionDir := filepath.Join(root, "org", "workspace", "local_fresh") + projectDir := filepath.Join(sessionDir, ".claude", "projects", "-demo") + require.NoError(t, os.MkdirAll(projectDir, 0o755)) + metaPath := sessionDir + ".json" + sourcePath := filepath.Join(projectDir, rawSessionID+".jsonl") + require.NoError(t, os.WriteFile(metaPath, []byte(`{"title":"Fresh"}`), 0o644)) + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + + transcriptTime := time.Unix(1_781_475_210, 0) + metaTime := transcriptTime.Add(time.Second) + require.NoError(t, os.Chtimes(sourcePath, transcriptTime, transcriptTime)) + require.NoError(t, os.Chtimes(metaPath, metaTime, metaTime)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + sourceSize := info.Size() + sourceMtime := parser.CoworkSessionMtime(sourcePath, info.ModTime().UnixNano()) + require.Equal(t, metaTime.UnixNano(), sourceMtime) + + fullSessionID := "cowork:" + rawSessionID + require.NoError(t, database.UpsertSession(db.Session{ + ID: fullSessionID, + Project: "cowork-project", + Machine: "devbox", + Agent: string(parser.AgentCowork), + FilePath: &sourcePath, + FileSize: &sourceSize, + FileMtime: &sourceMtime, + })) + require.NoError(t, database.SetSessionDataVersion( + fullSessionID, db.CurrentDataVersion(), + )) + + return sourcePath, sourceMtime +} diff --git a/internal/sync/s3_provider_discovery_test.go b/internal/sync/s3_provider_discovery_test.go new file mode 100644 index 000000000..cf786cec9 --- /dev/null +++ b/internal/sync/s3_provider_discovery_test.go @@ -0,0 +1,87 @@ +package sync + +import ( + "context" + "io" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/testjsonl" +) + +// TestProcessFileS3ProviderDiscoveredRoutesToS3Path verifies that an s3:// +// DiscoveredFile shaped exactly as discoverProviderSources now emits it -- a +// provider-authoritative agent, ProviderProcess set, and a ProviderSource +// carrying the S3DiscoveredSource opaque -- still routes through the S3 sync +// path. processProviderFile must let the s3:// guard win over the provider +// parse path (providers read local files), and the threaded Machine/size/mtime +// must drive the same namespaced result as direct S3 discovery. +func TestProcessFileS3ProviderDiscoveredRoutesToS3Path(t *testing.T) { + database := openTestDB(t) + path := "s3://bucket/laptop/raw/claude/test-proj/shared-id.jsonl" + content := testjsonl.NewSessionBuilder(). + AddClaudeUser("2024-01-01T00:00:00Z", "Hello"). + AddClaudeAssistant("2024-01-01T00:00:05Z", "Hi."). + String() + + oldFetch := fetchS3Object + t.Cleanup(func() { fetchS3Object = oldFetch }) + fetchS3Object = func(got string) (io.ReadCloser, error) { + if got != path { + return nil, missingS3ObjectError() + } + return io.NopCloser(strings.NewReader(content)), nil + } + + mtime := time.Date(2026, 6, 24, 12, 0, 0, 0, time.UTC).UnixNano() + e := &Engine{ + db: database, + machine: "central", + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + } + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: "test-proj", + Opaque: parser.S3DiscoveredSource{ + URI: path, + Project: "test-proj", + Machine: "laptop", + Size: int64(len(content)), + MtimeNS: mtime, + }, + } + res := e.processFile(context.Background(), parser.DiscoveredFile{ + Agent: parser.AgentClaude, + Path: path, + Project: "test-proj", + Machine: "laptop", + SourceSize: int64(len(content)), + SourceMtime: mtime, + ProviderSource: &source, + ProviderProcess: true, + }) + require.NoError(t, res.err) + require.Len(t, res.results, 1) + + written, _, failed := e.writeBatch([]pendingWrite{{ + sess: res.results[0].Session, + msgs: res.results[0].Messages, + }}, syncWriteDefault, false) + require.Equal(t, 1, written) + require.Equal(t, 0, failed) + + sess, err := database.GetSessionFull(context.Background(), "laptop~shared-id") + require.NoError(t, err) + require.NotNil(t, sess) + assert.Equal(t, "laptop", sess.Machine) + assert.Equal(t, path, derefString(sess.FilePath)) +}