diff --git a/internal/parser/cursor.go b/internal/parser/cursor.go index f570b07f7..fa42d25d0 100644 --- a/internal/parser/cursor.go +++ b/internal/parser/cursor.go @@ -17,10 +17,10 @@ import ( // under 500 KB; 10 MB provides generous headroom. const maxCursorTranscriptSize = 10 << 20 -// ParseCursorSession parses a Cursor agent transcript file. -// Transcripts are plain text with "user:" and "assistant:" role -// markers, tool calls, and thinking blocks. -func ParseCursorSession( +// parseSession parses a Cursor agent transcript file. Transcripts are plain +// text with "user:" and "assistant:" role markers, tool calls, and thinking +// blocks. +func (p *cursorProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { // Open with O_NOFOLLOW (Unix) to reject symlinks at the diff --git a/internal/parser/cursor_provider.go b/internal/parser/cursor_provider.go new file mode 100644 index 000000000..8244cdc33 --- /dev/null +++ b/internal/parser/cursor_provider.go @@ -0,0 +1,609 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*cursorProvider)(nil) + +type cursorProviderFactory struct { + def AgentDef +} + +func newCursorProviderFactory(def AgentDef) ProviderFactory { + return cursorProviderFactory{def: cloneAgentDef(def)} +} + +func (f cursorProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f cursorProviderFactory) Capabilities() Capabilities { + return cursorProviderCapabilities() +} + +func (f cursorProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &cursorProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: cursorProviderCapabilities(), + Config: cfg, + }, + sources: newCursorSourceSet(cfg.Roots), + } +} + +type cursorProvider struct { + ProviderBase + sources cursorSourceSet +} + +func (p *cursorProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *cursorProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *cursorProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *cursorProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = providerFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *cursorProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *cursorProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("cursor source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type cursorSource struct { + Root string + Path string +} + +type cursorSourceSet struct { + roots []string +} + +func newCursorSourceSet(roots []string) cursorSourceSet { + return cursorSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s cursorSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, path := range s.discoverTranscriptPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverTranscriptPaths walks a Cursor projects root and returns the primary +// transcript file paths. All paths resolve within the canonical root, +// preventing symlink escapes. Symlinked project directory entries are rejected. +// Cursor uses two layouts: flat (agent-transcripts/.{txt,jsonl}) and +// nested (agent-transcripts//.{txt,jsonl}); when both .jsonl and +// .txt exist for the same stem, .jsonl is preferred. +func (s cursorSourceSet) discoverTranscriptPaths(projectsDir string) []string { + if projectsDir == "" { + return nil + } + + // Canonicalize root once for containment checks. + resolvedRoot, err := filepath.EvalSymlinks(projectsDir) + if err != nil { + return nil + } + + entries, err := os.ReadDir(projectsDir) + if err != nil { + return nil + } + + var paths []string + for _, entry := range entries { + if !entry.IsDir() { + continue + } + // Reject symlinked project directory entries. + if entry.Type()&os.ModeSymlink != 0 { + continue + } + + transcriptsDir := filepath.Join( + projectsDir, entry.Name(), "agent-transcripts", + ) + + // Verify the transcripts directory resolves within + // the canonical root. + resolvedDir, err := filepath.EvalSymlinks(transcriptsDir) + if err != nil { + continue + } + if !isContainedIn(resolvedDir, resolvedRoot) { + continue + } + + transcripts, err := os.ReadDir(transcriptsDir) + if err != nil { + continue + } + + // Collect valid transcripts, deduping by basename + // stem. When both .jsonl and .txt exist for the + // same session, prefer .jsonl. + // + // Cursor uses two layouts: + // flat: agent-transcripts/.{txt,jsonl} + // nested: agent-transcripts//.{txt,jsonl} + seen := make(map[string]string) // stem -> path + for _, sf := range transcripts { + if !sf.IsDir() { + // Flat layout: file directly in + // agent-transcripts/. + name := sf.Name() + if !IsCursorTranscriptExt(name) { + continue + } + fullPath := filepath.Join(transcriptsDir, name) + if !IsRegularFile(fullPath) { + continue + } + cursorAddSeen(seen, name, fullPath) + continue + } + + // Nested layout: agent-transcripts// + // containing .{txt,jsonl}. + subDir := filepath.Join(transcriptsDir, sf.Name()) + subEntries, err := os.ReadDir(subDir) + if err != nil { + continue + } + dirName := sf.Name() + for _, sub := range subEntries { + if sub.IsDir() { + continue + } + name := sub.Name() + if !IsCursorTranscriptExt(name) { + continue + } + // Only accept files whose stem matches + // the parent directory name, e.g. + // /.jsonl. + stem := strings.TrimSuffix(name, filepath.Ext(name)) + if stem != dirName { + continue + } + fullPath := filepath.Join(subDir, name) + if !IsRegularFile(fullPath) { + continue + } + cursorAddSeen(seen, name, fullPath) + } + } + for _, path := range seen { + paths = append(paths, path) + } + } + return paths +} + +// cursorAddSeen inserts a transcript path into the seen map, preferring .jsonl +// over .txt when both exist for the same stem. +func cursorAddSeen(seen map[string]string, name, fullPath string) { + stem := strings.TrimSuffix(name, filepath.Ext(name)) + if prev, ok := seen[stem]; ok { + if strings.HasSuffix(prev, ".txt") && + strings.HasSuffix(name, ".jsonl") { + seen[stem] = fullPath + } + return + } + seen[stem] = fullPath +} + +func (s cursorSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "*.txt"}, + DebounceKey: string(AgentCursor) + ":transcripts:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s cursorSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + if !s.hasRoot(root) { + return nil, nil + } + source, ok := s.sourceForPathInRoot(root, req.Path) + if !ok { + return nil, nil + } + return []SourceRef{source}, nil + } + for _, root := range s.roots { + source, ok := s.sourceForPathInRoot(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s cursorSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + if source, ok := s.sourceForPath(path); ok { + return source, true, nil + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := cursorFindSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// cursorFindSourceFile finds a Cursor transcript file by session UUID across a +// projects root, preferring .jsonl over .txt. Returns "" if no matching file +// resolves within the canonical root. +func cursorFindSourceFile(projectsDir, sessionID string) string { + if projectsDir == "" || !IsValidSessionID(sessionID) { + return "" + } + + entries, err := os.ReadDir(projectsDir) + if err != nil { + return "" + } + + resolvedRoot, err := filepath.EvalSymlinks(projectsDir) + if err != nil { + return "" + } + + for _, ext := range []string{".jsonl", ".txt"} { + target := sessionID + ext + for _, entry := range entries { + if !entry.IsDir() { + continue + } + // Nested layout first (matches discovery + // precedence), then flat layout. + candidates := []string{ + filepath.Join( + projectsDir, entry.Name(), + "agent-transcripts", sessionID, target, + ), + filepath.Join( + projectsDir, entry.Name(), + "agent-transcripts", target, + ), + } + for _, candidate := range candidates { + if !IsRegularFile(candidate) { + continue + } + resolved, err := filepath.EvalSymlinks(candidate) + if err != nil { + continue + } + rel, err := filepath.Rel(resolvedRoot, resolved) + sep := string(filepath.Separator) + if err != nil || rel == ".." || + strings.HasPrefix(rel, ".."+sep) { + continue + } + return candidate + } + } + } + return "" +} + +func (s cursorSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("cursor source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + hash := "" + if info.Size() <= maxCursorTranscriptSize { + hash, err = hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + } + return SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Hash: hash, + }, nil +} + +func (s cursorSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case cursorSource: + return src.Path, src.Path != "" + case *cursorSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if ref, ok := s.sourceForPath(candidate); ok { + src := ref.Opaque.(cursorSource) + return src.Path, true + } + } + return "", false +} + +func (s cursorSourceSet) sourceForPath(path string) (SourceRef, bool) { + for _, root := range s.roots { + if source, ok := s.sourceForPathInRoot(root, path); ok { + return source, true + } + } + return SourceRef{}, false +} + +func (s cursorSourceSet) sourceForPathInRoot( + root string, + path string, +) (SourceRef, bool) { + rawID, ok := cursorRawSessionIDFromPath(root, path) + if !ok { + return SourceRef{}, false + } + projectDir, ok := cursorProjectDirFromPath(root, path) + if !ok { + return SourceRef{}, false + } + selected := cursorFindSourceFileInProject(root, projectDir, rawID) + if selected == "" { + return SourceRef{}, false + } + return s.sourceRef(root, selected) +} + +func (s cursorSourceSet) sourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !IsRegularFile(path) { + return SourceRef{}, false + } + rawID, ok := cursorRawSessionIDFromPath(root, path) + if !ok { + return SourceRef{}, false + } + projectDir, ok := cursorProjectDirFromPath(root, path) + if !ok { + return SourceRef{}, false + } + selected := cursorFindSourceFileInProject(root, projectDir, rawID) + if selected == "" || !samePath(selected, path) { + return SourceRef{}, false + } + project := DecodeCursorProjectDir(projectDir) + if project == "" { + project = "unknown" + } + return SourceRef{ + Provider: AgentCursor, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: cursorSource{ + Root: root, + Path: path, + }, + }, true +} + +func (s cursorSourceSet) hasRoot(root string) bool { + for _, configured := range s.roots { + if samePath(root, configured) { + return true + } + } + return false +} + +func cursorFindSourceFileInProject(root, projectDir, rawID string) string { + if root == "" || projectDir == "" || !IsValidSessionID(rawID) { + return "" + } + resolvedRoot, err := filepath.EvalSymlinks(root) + if err != nil { + return "" + } + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + for _, ext := range []string{".jsonl", ".txt"} { + target := rawID + ext + candidates := []string{ + filepath.Join(transcriptsDir, rawID, target), + filepath.Join(transcriptsDir, target), + } + for _, candidate := range candidates { + if !IsRegularFile(candidate) { + continue + } + resolved, err := filepath.EvalSymlinks(candidate) + if err != nil || !isContainedIn(resolved, resolvedRoot) { + continue + } + return candidate + } + } + return "" +} + +func cursorRawSessionIDFromPath(root, path string) (string, bool) { + rel, ok := cursorRelPath(root, path) + if !ok { + return "", false + } + parts := strings.Split(rel, string(filepath.Separator)) + switch len(parts) { + case 3: + return strings.TrimSuffix(parts[2], filepath.Ext(parts[2])), true + case 4: + return parts[2], true + default: + return "", false + } +} + +func cursorProjectDirFromPath(root, path string) (string, bool) { + rel, ok := cursorRelPath(root, path) + if !ok { + return "", false + } + return ParseCursorTranscriptRelPath(rel) +} + +func cursorRelPath(root, path string) (string, bool) { + rel, err := filepath.Rel(filepath.Clean(root), filepath.Clean(path)) + if err != nil { + return "", false + } + if _, ok := ParseCursorTranscriptRelPath(rel); !ok { + return "", false + } + return rel, true +} + +func cursorProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/cursor_provider_test.go b/internal/parser/cursor_provider_test.go new file mode 100644 index 000000000..c023bc2eb --- /dev/null +++ b/internal/parser/cursor_provider_test.go @@ -0,0 +1,282 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCursorProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCursor) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCursorProviderSourceMethods(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + flatTxt := cursorProviderWriteTranscript(t, transcriptsDir, "flat.txt", "old") + flatJSONL := cursorProviderWriteJSONLTranscript(t, transcriptsDir, "flat.jsonl", "new") + nestedTxt := cursorProviderWriteTranscript(t, transcriptsDir, filepath.Join("nested", "nested.txt"), "old") + nestedJSONL := cursorProviderWriteJSONLTranscript( + t, transcriptsDir, filepath.Join("nested", "nested.jsonl"), "new", + ) + cursorProviderWriteJSONLTranscript( + t, transcriptsDir, filepath.Join("nested", "subagents", "child.jsonl"), "child", + ) + cursorProviderWriteJSONLTranscript(t, transcriptsDir, filepath.Join("mismatch", "other.jsonl"), "other") + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl", "*.txt"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.ElementsMatch(t, []string{flatJSONL, nestedJSONL}, []string{ + discovered[0].DisplayPath, + discovered[1].DisplayPath, + }) + for _, source := range discovered { + assert.Equal(t, AgentCursor, source.Provider) + assert.Equal(t, DecodeCursorProjectDir(projectDir), source.ProjectHint) + } + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "remote~cursor:flat", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, flatJSONL, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: flatTxt, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, flatJSONL, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "nested", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, nestedJSONL, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, nestedJSONL, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + for _, tc := range []struct { + name string + path string + want string + }{ + {name: "flat txt promotes to jsonl", path: flatTxt, want: flatJSONL}, + {name: "flat jsonl", path: flatJSONL, want: flatJSONL}, + {name: "nested txt promotes to jsonl", path: nestedTxt, want: nestedJSONL}, + {name: "nested jsonl", path: nestedJSONL, want: nestedJSONL}, + } { + t.Run(tc.name, func(t *testing.T) { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: tc.path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, tc.want, changed[0].DisplayPath) + }) + } + + ignored, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(transcriptsDir, "nested", "subagents", "child.jsonl"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + wrongRoot, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: flatJSONL, + EventKind: "write", + WatchRoot: filepath.Join(root, "..", "other-root"), + }, + ) + require.NoError(t, err) + assert.Empty(t, wrongRoot) +} + +func TestCursorProviderResolvesDuplicateStemsWithinProject(t *testing.T) { + root := t.TempDir() + firstProject := "Users-fiona-Documents-first" + secondProject := "Users-fiona-Documents-second" + firstDir := filepath.Join(root, firstProject, "agent-transcripts") + secondDir := filepath.Join(root, secondProject, "agent-transcripts") + firstJSONL := cursorProviderWriteJSONLTranscript(t, firstDir, "shared.jsonl", "first") + secondTxt := cursorProviderWriteTranscript(t, secondDir, "shared.txt", "second old") + secondJSONL := cursorProviderWriteJSONLTranscript(t, secondDir, "shared.jsonl", "second new") + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + assert.ElementsMatch(t, []string{firstJSONL, secondJSONL}, sourceDisplayPaths(discovered)) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: secondTxt, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, secondJSONL, found.DisplayPath) + assert.Equal(t, DecodeCursorProjectDir(secondProject), found.ProjectHint) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: secondTxt, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, secondJSONL, changed[0].DisplayPath) + assert.Equal(t, DecodeCursorProjectDir(secondProject), changed[0].ProjectHint) +} + +func TestCursorProviderParse(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + sourcePath := cursorProviderWriteJSONLTranscript( + t, transcriptsDir, "parse.jsonl", "parse question", + ) + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.False(t, outcome.ForceReplace) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "cursor:parse", result.Result.Session.ID) + assert.Equal(t, AgentCursor, result.Result.Session.Agent) + assert.Equal(t, DecodeCursorProjectDir(projectDir), result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, sourcePath, result.Result.Session.File.Path) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, "parse question", result.Result.Session.FirstMessage) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCursorProviderFingerprintSkipsOversizedTranscriptHash(t *testing.T) { + root := t.TempDir() + projectDir := "Users-fiona-Documents-demo" + transcriptsDir := filepath.Join(root, projectDir, "agent-transcripts") + sourcePath := filepath.Join(transcriptsDir, "oversized.jsonl") + require.NoError(t, os.MkdirAll(transcriptsDir, 0o755)) + file, err := os.Create(sourcePath) + require.NoError(t, err) + require.NoError(t, file.Truncate(maxCursorTranscriptSize+1)) + require.NoError(t, file.Close()) + + provider, ok := NewProvider(AgentCursor, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Equal(t, int64(maxCursorTranscriptSize+1), fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.Empty(t, fingerprint.Hash) + + _, err = provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "file too large") +} + +func cursorProviderWriteTranscript( + t *testing.T, + dir string, + name string, + firstMessage string, +) string { + t.Helper() + path := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile( + path, + []byte("user:\n"+firstMessage+"\nassistant:\nDone.\n"), + 0o644, + )) + return path +} + +func cursorProviderWriteJSONLTranscript( + t *testing.T, + dir string, + name string, + firstMessage string, +) string { + t.Helper() + path := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile( + path, + []byte(`{"role":"user","message":{"content":"`+firstMessage+`"}}`+"\n"+ + `{"role":"assistant","message":{"content":"Done."}}`+"\n"), + 0o644, + )) + return path +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index a8c5e2e55..d360274d3 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -990,219 +990,6 @@ func confirmGeminiSessionID( return GeminiSessionID(data) == sessionID } -// DiscoverCursorSessions finds all agent transcript files under -// the Cursor projects dir (//agent-transcripts/.txt). -// All discovered paths are validated to resolve within the -// canonical projectsDir, preventing symlink escapes. -// cursorAddSeen inserts a transcript path into the seen map, -// preferring .jsonl over .txt when both exist for the same stem. -func cursorAddSeen( - seen map[string]string, name, fullPath string, -) { - stem := strings.TrimSuffix(name, filepath.Ext(name)) - if prev, ok := seen[stem]; ok { - if strings.HasSuffix(prev, ".txt") && - strings.HasSuffix(name, ".jsonl") { - seen[stem] = fullPath - } - return - } - seen[stem] = fullPath -} - -func DiscoverCursorSessions( - projectsDir string, -) []DiscoveredFile { - if projectsDir == "" { - return nil - } - - // Canonicalize root once for containment checks. - resolvedRoot, err := filepath.EvalSymlinks(projectsDir) - if err != nil { - return nil - } - - entries, err := os.ReadDir(projectsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if !entry.IsDir() { - continue - } - // Reject symlinked project directory entries. - if entry.Type()&os.ModeSymlink != 0 { - continue - } - - transcriptsDir := filepath.Join( - projectsDir, entry.Name(), "agent-transcripts", - ) - - // Verify the transcripts directory resolves within - // the canonical root. - resolvedDir, err := filepath.EvalSymlinks( - transcriptsDir, - ) - if err != nil { - continue - } - if !isContainedIn(resolvedDir, resolvedRoot) { - continue - } - - transcripts, err := os.ReadDir(transcriptsDir) - if err != nil { - continue - } - - project := DecodeCursorProjectDir(entry.Name()) - if project == "" { - project = "unknown" - } - - // Collect valid transcripts, deduping by basename - // stem. When both .jsonl and .txt exist for the - // same session, prefer .jsonl. - // - // Cursor uses two layouts: - // flat: agent-transcripts/.{txt,jsonl} - // nested: agent-transcripts//.{txt,jsonl} - seen := make(map[string]string) // stem -> path - for _, sf := range transcripts { - if !sf.IsDir() { - // Flat layout: file directly in - // agent-transcripts/. - name := sf.Name() - if !IsCursorTranscriptExt(name) { - continue - } - fullPath := filepath.Join( - transcriptsDir, name, - ) - if !IsRegularFile(fullPath) { - continue - } - cursorAddSeen(seen, name, fullPath) - continue - } - - // Nested layout: agent-transcripts// - // containing .{txt,jsonl}. - subDir := filepath.Join( - transcriptsDir, sf.Name(), - ) - subEntries, err := os.ReadDir(subDir) - if err != nil { - continue - } - dirName := sf.Name() - for _, sub := range subEntries { - if sub.IsDir() { - continue - } - name := sub.Name() - if !IsCursorTranscriptExt(name) { - continue - } - // Only accept files whose stem matches - // the parent directory name, e.g. - // /.jsonl. - stem := strings.TrimSuffix( - name, filepath.Ext(name), - ) - if stem != dirName { - continue - } - fullPath := filepath.Join( - subDir, name, - ) - if !IsRegularFile(fullPath) { - continue - } - cursorAddSeen(seen, name, fullPath) - } - } - for _, path := range seen { - files = append(files, DiscoveredFile{ - Path: path, - Project: project, - Agent: AgentCursor, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindCursorSourceFile finds a Cursor transcript file by -// session UUID. Prefers .jsonl over .txt. -func FindCursorSourceFile( - projectsDir, sessionID string, -) string { - if projectsDir == "" || !IsValidSessionID(sessionID) { - return "" - } - - entries, err := os.ReadDir(projectsDir) - if err != nil { - return "" - } - - resolvedRoot, err := filepath.EvalSymlinks(projectsDir) - if err != nil { - return "" - } - - for _, ext := range []string{".jsonl", ".txt"} { - target := sessionID + ext - for _, entry := range entries { - if !entry.IsDir() { - continue - } - // Nested layout first (matches discovery - // precedence), then flat layout. - candidates := []string{ - filepath.Join( - projectsDir, entry.Name(), - "agent-transcripts", sessionID, target, - ), - filepath.Join( - projectsDir, entry.Name(), - "agent-transcripts", target, - ), - } - for _, candidate := range candidates { - if !IsRegularFile(candidate) { - continue - } - resolved, err := filepath.EvalSymlinks( - candidate, - ) - if err != nil { - continue - } - rel, err := filepath.Rel( - resolvedRoot, resolved, - ) - sep := string(filepath.Separator) - if err != nil || rel == ".." || - strings.HasPrefix(rel, ".."+sep) { - continue - } - return candidate - } - } - } - return "" -} - // geminiProjectsFile holds the structure of // ~/.gemini/projects.json. type geminiProjectsFile struct { diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 72f4c98ba..61019154a 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -1152,11 +1152,9 @@ func TestDiscoverCursorSessions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCursorSessions(dir) - require.Len(t, files, tt.wantCount, "files count") - for _, f := range files { - assert.Equal(t, AgentCursor, f.Agent, "agent") - } + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, tt.wantCount, "paths count") }) } } @@ -1225,11 +1223,9 @@ func TestDiscoverCursorSessions_NestedLayout(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCursorSessions(dir) - require.Len(t, files, tt.wantCount, "files count") - for _, f := range files { - assert.Equal(t, AgentCursor, f.Agent, "agent") - } + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, tt.wantCount, "paths count") }) } } @@ -1243,10 +1239,11 @@ func TestDiscoverCursorSessions_DedupPrefersJsonl(t *testing.T) { filepath.Join(transcripts, "sess.txt"): "user:\nhi", filepath.Join(transcripts, "sess.jsonl"): `{"role":"user"}`, }) - files := DiscoverCursorSessions(dir) - require.Len(t, files, 1, "files count") - assert.True(t, strings.HasSuffix(files[0].Path, ".jsonl"), - "expected .jsonl path, got %q", files[0].Path) + set := newCursorSourceSet([]string{dir}) + paths := set.discoverTranscriptPaths(dir) + require.Len(t, paths, 1, "paths count") + assert.True(t, strings.HasSuffix(paths[0], ".jsonl"), + "expected .jsonl path, got %q", paths[0]) } func TestParseCursorTranscriptRelPath(t *testing.T) { @@ -1321,7 +1318,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess1.txt"): "data", }) - got := FindCursorSourceFile(dir, "sess1") + got := cursorFindSourceFile(dir, "sess1") assert.NotEmpty(t, got, "expected to find .txt file") }) @@ -1330,7 +1327,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess2.jsonl"): "{}", }) - got := FindCursorSourceFile(dir, "sess2") + got := cursorFindSourceFile(dir, "sess2") assert.NotEmpty(t, got, "expected to find .jsonl file") }) @@ -1343,7 +1340,7 @@ func TestFindCursorSourceFile(t *testing.T) { jsonlPath := filepath.Join( dir, cursorTranscripts, "sess3.jsonl", ) - got := FindCursorSourceFile(dir, "sess3") + got := cursorFindSourceFile(dir, "sess3") assert.Equal(t, jsonlPath, got, "(.jsonl preferred)") }) @@ -1352,7 +1349,7 @@ func TestFindCursorSourceFile(t *testing.T) { setupFileSystem(t, dir, map[string]string{ filepath.Join(cursorTranscripts, "sess4", "sess4.jsonl"): "{}", }) - got := FindCursorSourceFile(dir, "sess4") + got := cursorFindSourceFile(dir, "sess4") require.NotEmpty(t, got, "expected to find nested .jsonl file") assert.True(t, strings.HasSuffix(got, filepath.Join("sess4", "sess4.jsonl")), "unexpected path %q", got) @@ -1364,14 +1361,14 @@ func TestFindCursorSourceFile(t *testing.T) { filepath.Join(cursorTranscripts, "sess5", "sess5.txt"): "old", filepath.Join(cursorTranscripts, "sess5", "sess5.jsonl"): "new", }) - got := FindCursorSourceFile(dir, "sess5") + got := cursorFindSourceFile(dir, "sess5") assert.True(t, strings.HasSuffix(got, "sess5.jsonl"), "expected .jsonl path, got %q", got) }) t.Run("NotFound", func(t *testing.T) { dir := t.TempDir() - got := FindCursorSourceFile(dir, "nonexistent") + got := cursorFindSourceFile(dir, "nonexistent") assert.Empty(t, got, "expected empty") }) } diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 17a115f04..12e500d98 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -353,6 +353,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newCommandCodeProviderFactory(def) case AgentCortex: return newCortexProviderFactory(def) + case AgentCursor: + return newCursorProviderFactory(def) case AgentDeepSeekTUI: return newDeepSeekTUIProviderFactory(def) case AgentIflow: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index bc8349059..2b5797002 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -26,7 +26,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentOpenCode: ProviderMigrationLegacyOnly, AgentKilo: ProviderMigrationLegacyOnly, AgentOpenHands: ProviderMigrationProviderAuthoritative, - AgentCursor: ProviderMigrationLegacyOnly, + AgentCursor: ProviderMigrationProviderAuthoritative, AgentIflow: ProviderMigrationProviderAuthoritative, AgentAmp: ProviderMigrationProviderAuthoritative, AgentZencoder: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index dedb0bb1c..226cdc567 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -29,6 +29,11 @@ var legacyEntrypointVerb = regexp.MustCompile(`^(Discover|Find|Parse|Process|Cla var providerNeutralEntrypoints = map[string]bool{ "ParseVirtualSourcePath": true, "ParseVirtualSourcePathForBase": true, + // ParseCursorTranscriptRelPath is a pure rel-path shape validator with no + // filesystem or provider state. It is shared by the engine's path + // classification/enrichment and the Cursor provider's source set, so it + // stays a free helper rather than moving onto the provider. + "ParseCursorTranscriptRelPath": true, } // pendingShimProviderFiles are provider files whose behavior has not yet been @@ -48,7 +53,6 @@ var pendingShimProviderFiles = map[string]bool{ "codex_provider.go": true, "copilot_provider.go": true, "cowork_provider.go": true, - "cursor_provider.go": true, "db_backed_provider.go": true, "gemini_provider.go": true, "hermes_provider.go": true, diff --git a/internal/parser/types.go b/internal/parser/types.go index f799784f1..4c91fcbe3 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -219,15 +219,13 @@ var Registry = []AgentDef{ ShallowWatch: true, }, { - Type: AgentCursor, - DisplayName: "Cursor", - EnvVar: "CURSOR_PROJECTS_DIR", - ConfigKey: "cursor_project_dirs", - DefaultDirs: []string{".cursor/projects"}, - IDPrefix: "cursor:", - FileBased: true, - DiscoverFunc: DiscoverCursorSessions, - FindSourceFunc: FindCursorSourceFile, + Type: AgentCursor, + DisplayName: "Cursor", + EnvVar: "CURSOR_PROJECTS_DIR", + ConfigKey: "cursor_project_dirs", + DefaultDirs: []string{".cursor/projects"}, + IDPrefix: "cursor:", + FileBased: true, }, { Type: AgentAmp, diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 77b4f6511..b74eb28af 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -885,20 +885,6 @@ func isUnder(dir, path string) (string, bool) { return rel, true } -// findContainingDir returns the first dir from dirs that is a -// parent of path, or "" if none match. -func findContainingDir(dirs []string, path string) string { - for _, d := range dirs { - if d == "" { - continue - } - if _, ok := isUnder(d, path); ok { - return d - } - } - return "" -} - // classifyContainerPath runs the container- and SQLite-style classifiers that // resolve a path whether or not it currently exists on disk (OpenCode-format // stores, Kiro, Zed, Shelley, and Vibe). Split out of classifyOnePath to keep @@ -1132,30 +1118,6 @@ func (e *Engine) classifyOnePath( } } - // Cursor: - // //agent-transcripts/.{txt,jsonl} - // //agent-transcripts//.{txt,jsonl} - for _, cursorDir := range e.agentDirs[parser.AgentCursor] { - if cursorDir == "" { - continue - } - if rel, ok := isUnder(cursorDir, path); ok { - projectDir, ok := parser.ParseCursorTranscriptRelPath(rel) - if !ok { - continue - } - project := parser.DecodeCursorProjectDir(projectDir) - if project == "" { - project = "unknown" - } - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentCursor, - }, true - } - } - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { @@ -4261,8 +4223,6 @@ func (e *Engine) processFile( res = e.processGemini(file, info) case parser.AgentOpenCode, parser.AgentKilo, parser.AgentMiMoCode: res = e.processOpenCodeFormat(file.Agent, file, info) - case parser.AgentCursor: - res = e.processCursor(file, info) case parser.AgentVSCodeCopilot: res = e.processVSCodeCopilot(file, info) case parser.AgentVSCopilot: @@ -6552,61 +6512,6 @@ func (e *Engine) processAntigravityCLI( } } -func (e *Engine) processCursor( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Skip .txt if a sibling .jsonl exists — .jsonl is the - // richer format and takes precedence. - if stem, ok := strings.CutSuffix(file.Path, ".txt"); ok { - if parser.IsRegularFile(stem + ".jsonl") { - return processResult{skip: true} - } - } - - sessionID := parser.CursorSessionID(file.Path) - - if e.shouldSkipFile(sessionID, info) { - return processResult{skip: true} - } - - // Re-validate containment immediately before parsing to - // close the TOCTOU window between discovery and read. - // The parser opens with O_NOFOLLOW (rejecting symlinked - // final components), and this check catches parent - // directory swaps. - if root := findContainingDir( - e.agentDirs[parser.AgentCursor], file.Path, - ); root != "" { - if err := validateCursorContainment( - root, file.Path, - ); err != nil { - return processResult{ - err: fmt.Errorf( - "containment check: %w", err, - ), - } - } - } - - sess, msgs, err := parser.ParseCursorSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - // Hash is computed inside ParseCursorSession from the - // already-read data to avoid re-opening the file by path. - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func commandCodeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { size := info.Size() mtime := info.ModTime().UnixNano() @@ -6620,31 +6525,6 @@ func commandCodeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -// validateCursorContainment re-resolves both root and path -// to verify the file still resides within the cursor projects -// directory. Returns an error if containment fails. -func validateCursorContainment( - cursorDir, path string, -) error { - resolvedRoot, err := filepath.EvalSymlinks(cursorDir) - if err != nil { - return fmt.Errorf("resolve root: %w", err) - } - resolvedPath, err := filepath.EvalSymlinks(path) - if err != nil { - return fmt.Errorf("resolve path: %w", err) - } - rel, err := filepath.Rel(resolvedRoot, resolvedPath) - sep := string(filepath.Separator) - if err != nil || rel == ".." || - strings.HasPrefix(rel, ".."+sep) { - return fmt.Errorf( - "%s escapes %s", path, cursorDir, - ) - } - return nil -} - // computeFinalStreak counts trailing consecutive failures // from the end of the tool call list. func computeFinalStreak(calls []signals.ToolCallRow) int {