diff --git a/frontend/src/lib/utils/messages.test.ts b/frontend/src/lib/utils/messages.test.ts index 032ae5c39..38024d89b 100644 --- a/frontend/src/lib/utils/messages.test.ts +++ b/frontend/src/lib/utils/messages.test.ts @@ -58,6 +58,19 @@ describe("isSystemMessage", () => { ["command-name", "/commit"], ["local-command", "ok"], ["stop hook", "Stop hook feedback: blocked"], + ["legacy goal context", "\n\tstate"], + [ + "codex internal goal context", + ' state', + ], + [ + "codex internal goal context with attr before source", + 'state', + ], + [ + "codex internal goal context with attr after source", + 'state', + ], ])("detects prefix-based system message: %s", (_label, content) => { expect(isSystemMessage(msg({ content }))).toBe(true); }); @@ -68,6 +81,23 @@ describe("isSystemMessage", () => { ).toBe(false); }); + it.each([ + [ + "non-goal internal context", + 'state', + ], + [ + "data-source attribute", + 'state', + ], + [ + "missing closing tag delimiter", + ' { + expect(isSystemMessage(msg({ content }))).toBe(false); + }); + it.each([ "continuation", "resume", diff --git a/frontend/src/lib/utils/messages.ts b/frontend/src/lib/utils/messages.ts index 26044ab9f..11c997fc6 100644 --- a/frontend/src/lib/utils/messages.ts +++ b/frontend/src/lib/utils/messages.ts @@ -10,6 +10,10 @@ const SYSTEM_MSG_PREFIXES = [ "Stop hook feedback:", ]; +const LEGACY_GOAL_CONTEXT_PREFIX = ""; +const CODEX_INTERNAL_CONTEXT_TAG_PREFIX = " trimmed.startsWith(p)); + return ( + isGoalContextMessage(trimmed) || + SYSTEM_MSG_PREFIXES.some((p) => trimmed.startsWith(p)) + ); +} + +function isGoalContextMessage(trimmedContent: string): boolean { + if (trimmedContent.startsWith(LEGACY_GOAL_CONTEXT_PREFIX)) { + return true; + } + if (!trimmedContent.startsWith(CODEX_INTERNAL_CONTEXT_TAG_PREFIX)) { + return false; + } + const tagEnd = trimmedContent.indexOf(">"); + if (tagEnd < 0) { + return false; + } + const openTag = trimmedContent.slice(0, tagEnd); + return GOAL_CONTEXT_SOURCE_ATTR_RE.test(openTag); } /** diff --git a/internal/db/db.go b/internal/db/db.go index 8c7c2b9dd..a6f937c65 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -245,6 +245,9 @@ import ( // backfill. Re-parsing persists estimated usage events for existing // aggregate-only Kimi sessions and preserves explicit native event // model names instead of the proxy fallback.) +// (56: Codex goal-continuation context wrappers are filtered from +// persisted messages and user_message_count. Existing Codex rows need +// re-parsing so synthetic /goal continuation records are removed.) // (54: Antigravity .db sessions record a schema-fingerprint // source_version. Re-parsing populates source_version on existing // Antigravity IDE and CLI rows so "which agy release produced this @@ -258,7 +261,7 @@ import ( // (51: Gemini cumulative-to-delta token reparse.) // (17: Codex template filtering.) // (16: system messages.) -const dataVersion = 55 +const dataVersion = 56 const tokenCoverageRepairStatsKey = "token_coverage_repair_v1" diff --git a/internal/db/db_test.go b/internal/db/db_test.go index a92393e2b..96c289b13 100644 --- a/internal/db/db_test.go +++ b/internal/db/db_test.go @@ -696,10 +696,15 @@ func TestMigration_ToolResultEventsTable(t *testing.T) { } func TestCurrentDataVersionKimiUsageEvents(t *testing.T) { - assert.Equal(t, 55, CurrentDataVersion(), + assert.GreaterOrEqual(t, CurrentDataVersion(), 55, "Kimi persisted usage events require a data version bump") } +func TestCurrentDataVersionGoalContextFiltering(t *testing.T) { + assert.Equal(t, 56, CurrentDataVersion(), + "Codex goal-context filtering requires a data version bump") +} + func TestInsertMessages_PreservesToolResultEvents(t *testing.T) { d := testDB(t) insertSession(t, d, "s-events", "proj") diff --git a/internal/db/search.go b/internal/db/search.go index 88c9ce1bf..547d16530 100644 --- a/internal/db/search.go +++ b/internal/db/search.go @@ -3,6 +3,7 @@ package db import ( "context" "fmt" + "regexp" "strings" ) @@ -12,10 +13,10 @@ const ( snippetTokenLength = 32 ) -// SystemMsgPrefixes lists content prefixes that identify system-injected -// user messages. These are excluded from search results even when the -// is_system column has not been backfilled (e.g. Claude sessions parsed -// before schema version 2). Keep in sync with the frontend list in +// SystemMsgPrefixes lists non-goal content prefixes that identify +// system-injected user messages. These are excluded from search results even +// when the is_system column has not been backfilled (e.g. Claude sessions +// parsed before schema version 2). Keep in sync with the frontend list in // frontend/src/lib/utils/messages.ts. var SystemMsgPrefixes = []string{ "This session is being continued", @@ -27,32 +28,136 @@ var SystemMsgPrefixes = []string{ "Stop hook feedback:", } +const ( + legacyGoalContextPrefix = "" + codexInternalContextTagPrefix = "") + return ok && goalContextSourceAttrRe.MatchString(openTag) + } + return false +} + +type systemPrefixSQLDialect int + +const ( + systemPrefixSQLite systemPrefixSQLDialect = iota + systemPrefixPostgres + systemPrefixDuckDB +) + // SystemPrefixSQL returns a SQL clause that excludes user messages -// matching any system prefix. The column alias for content must be -// passed (e.g. "m.content" or "m2.content"). Uses case-sensitive -// substr comparison, which behaves identically on SQLite and -// PostgreSQL (unlike LIKE, which is case-insensitive on SQLite). +// matching any system prefix. The column alias for content must be passed +// (e.g. "m.content" or "m2.content"). Uses case-sensitive substr and +// position checks instead of LIKE, which is case-insensitive on SQLite. func SystemPrefixSQL(contentCol, roleCol string) string { + return systemPrefixSQL(contentCol, roleCol, systemPrefixSQLite) +} + +// PostgresSystemPrefixSQL is the PostgreSQL form of SystemPrefixSQL. +func PostgresSystemPrefixSQL(contentCol, roleCol string) string { + return systemPrefixSQL(contentCol, roleCol, systemPrefixPostgres) +} + +// DuckDBSystemPrefixSQL is the DuckDB form of SystemPrefixSQL. +func DuckDBSystemPrefixSQL(contentCol, roleCol string) string { + return systemPrefixSQL(contentCol, roleCol, systemPrefixDuckDB) +} + +func systemPrefixSQL( + contentCol, roleCol string, dialect systemPrefixSQLDialect, +) string { // LTRIM strips the same whitespace as Go's strings.TrimSpace, // JS .trim(), and the parser's isSystem helpers: ASCII whitespace, // BOM (U+FEFF), and Unicode // spaces (U+0085, U+00A0, U+1680, U+2000–U+200A, U+2028, - // U+2029, U+202F, U+205F, U+3000). Both SQLite and PostgreSQL + // U+2029, U+202F, U+205F, U+3000). SQLite, PostgreSQL, and DuckDB // handle multi-byte UTF-8 characters in the trim set correctly. - trimmed := "LTRIM(" + contentCol + ", ' \t\n\v\f\r" + - "\u0085\u00A0\u1680" + - "\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A" + - "\u2028\u2029\u202F\u205F\u3000\uFEFF')" - parts := make([]string, len(SystemMsgPrefixes)) - for i, p := range SystemMsgPrefixes { - parts[i] = fmt.Sprintf( + trimmed := systemPrefixSQLTrimmed(contentCol) + parts := make([]string, 0, len(SystemMsgPrefixes)+1) + for _, p := range SystemMsgPrefixes { + parts = append(parts, fmt.Sprintf( "substr(%s, 1, %d) = '%s'", trimmed, len(p), p, - ) + )) } + parts = append(parts, goalContextPrefixSQL(trimmed, dialect)) return "NOT (" + roleCol + " = 'user' AND (" + strings.Join(parts, " OR ") + "))" } +func systemPrefixSQLTrimmed(contentCol string) string { + return "LTRIM(" + contentCol + ", ' \t\n\v\f\r" + + "\u0085\u00A0\u1680" + + "\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A" + + "\u2028\u2029\u202F\u205F\u3000\uFEFF')" +} + +func goalContextPrefixSQL(trimmed string, dialect systemPrefixSQLDialect) string { + legacy := fmt.Sprintf("substr(%s, 1, %d) = '%s'", + trimmed, len(legacyGoalContextPrefix), legacyGoalContextPrefix) + current := fmt.Sprintf( + "(substr(%[1]s, 1, %[2]d) = '%[3]s' AND %[4]s)", + trimmed, len(codexInternalContextTagPrefix), + codexInternalContextTagPrefix, + goalContextSourceAttrSQL(openingTagSQL(trimmed, dialect), dialect), + ) + return "(" + legacy + " OR " + current + ")" +} + +func openingTagSQL(trimmed string, dialect systemPrefixSQLDialect) string { + return fmt.Sprintf("substr(%s, 1, %s)", + trimmed, sqlPosition(dialect, ">", trimmed)) +} + +func goalContextSourceAttrSQL( + openTag string, dialect systemPrefixSQLDialect, +) string { + normalized := openTag + for _, ws := range []string{"\t", "\n", "\v", "\f", "\r"} { + normalized = fmt.Sprintf("replace(%s, '%s', ' ')", normalized, ws) + } + checks := []string{ + sqlContains(dialect, normalized, goalContextSourceAttrSQLPrefix+" "), + sqlContains(dialect, normalized, goalContextSourceAttrSQLPrefix+">"), + sqlContains(dialect, normalized, goalContextSourceAttrSQLPrefix+"/>"), + } + return "(" + strings.Join(checks, " OR ") + ")" +} + +func sqlContains( + dialect systemPrefixSQLDialect, haystack, needle string, +) string { + return sqlPosition(dialect, needle, haystack) + " > 0" +} + +func sqlPosition( + dialect systemPrefixSQLDialect, needle, haystack string, +) string { + quotedNeedle := "'" + needle + "'" + if dialect == systemPrefixPostgres { + return fmt.Sprintf("POSITION(%s IN %s)", quotedNeedle, haystack) + } + return fmt.Sprintf("instr(%s, %s)", haystack, quotedNeedle) +} + // systemPrefixTrimCutset is the leading-whitespace set SystemPrefixSQL's // LTRIM strips: ASCII whitespace, BOM, and the Unicode spaces. Kept // identical so the Go and SQL system-prefix checks agree. @@ -70,6 +175,9 @@ func IsSystemPrefixed(content, role string) bool { if role != "user" { return false } + if IsGoalContextPrefixed(content, role) { + return true + } trimmed := strings.TrimLeft(content, systemPrefixTrimCutset) for _, p := range SystemMsgPrefixes { if strings.HasPrefix(trimmed, p) { diff --git a/internal/db/search_test.go b/internal/db/search_test.go index a0b12b9a9..a1f965466 100644 --- a/internal/db/search_test.go +++ b/internal/db/search_test.go @@ -22,6 +22,12 @@ func TestIsSystemPrefixed(t *testing.T) { {"task-notification prefix", "done", "user", true}, {"leading whitespace then prefix", "\n\t /foo", "user", true}, {"bom then prefix", "\uFEFFx", "user", true}, + {"legacy goal context prefix", "\n\tstate", "user", true}, + {"codex internal goal context prefix", `state`, "user", true}, + {"codex goal context with attr before source", `state`, "user", true}, + {"codex goal context with attr after source", `state`, "user", true}, + {"codex non-goal internal context", `state`, "user", false}, + {"codex data-source attr is not goal", `state`, "user", false}, {"assistant role is never system-prefixed", SystemMsgPrefixes[0], "assistant", false}, {"prefix mid-content does not match", "see later", "user", false}, } @@ -33,6 +39,74 @@ func TestIsSystemPrefixed(t *testing.T) { } } +func TestIsGoalContextPrefixed(t *testing.T) { + t.Parallel() + cases := []struct { + name string + content string + role string + want bool + }{ + {"legacy wrapper", "state", "user", true}, + {"legacy wrapper with whitespace", "\n\tstate", "user", true}, + {"current wrapper", `state`, "user", true}, + {"current wrapper with extra attrs", `state`, "user", true}, + {"current wrapper with attrs after source", `state`, "user", true}, + {"current wrapper with newline before source", "state", "user", true}, + {"non goal internal context", `state`, "user", false}, + {"data-source attr is not goal", `state`, "user", false}, + {"missing closing tag delimiter", `state", "assistant", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tc.want, IsGoalContextPrefixed(tc.content, tc.role)) + }) + } +} + +func TestSystemPrefixSQL(t *testing.T) { + d := testDB(t) + rows, err := d.getReader().QueryContext(context.Background(), ` + WITH candidates(label, role, content) AS ( + VALUES + ('normal', 'user', 'regular message'), + ('assistant-goal', 'assistant', 'state'), + ('legacy-goal', 'user', 'state'), + ('current-goal', 'user', 'state'), + ('attr-before-source', 'user', 'state'), + ('attr-after-source', 'user', 'state'), + ('newline-before-source', 'user', 'state'), + ('self-closing-goal', 'user', 'state'), + ('non-goal-internal', 'user', 'state'), + ('data-source', 'user', 'state'), + ('missing-close', 'user', '") - return ok && strings.Contains(openTag, `source="goal"`) + return ok && codexGoalContextSourceAttrRe.MatchString(openTag) } return false } diff --git a/internal/parser/codex_parser_test.go b/internal/parser/codex_parser_test.go index 8cb3da38c..cc2acf282 100644 --- a/internal/parser/codex_parser_test.go +++ b/internal/parser/codex_parser_test.go @@ -1437,6 +1437,10 @@ func TestParseCodexSession_EdgeCases(t *testing.T) { "The objective below is user-provided data." current := "\n" + goalBody + "\n" + attrBeforeSource := "\n" + + goalBody + "\n" + attrAfterSource := "\n" + + goalBody + "\n" legacy := "\n" + goalBody + "\n" content := testjsonl.JoinJSONL( testjsonl.CodexSessionMetaJSON("abc", "/tmp", "user", tsEarly), @@ -1444,8 +1448,10 @@ func TestParseCodexSession_EdgeCases(t *testing.T) { testjsonl.CodexMsgJSON("assistant", "Working on it", "2024-01-01T10:00:02Z"), testjsonl.CodexMsgJSON("user", current, "2024-01-01T10:00:03Z"), testjsonl.CodexMsgJSON("assistant", "Still working", "2024-01-01T10:00:04Z"), - testjsonl.CodexMsgJSON("user", legacy, "2024-01-01T10:00:05Z"), - testjsonl.CodexMsgJSON("user", "Real second request", "2024-01-01T10:00:06Z"), + testjsonl.CodexMsgJSON("user", attrBeforeSource, "2024-01-01T10:00:05Z"), + testjsonl.CodexMsgJSON("user", attrAfterSource, "2024-01-01T10:00:06Z"), + testjsonl.CodexMsgJSON("user", legacy, "2024-01-01T10:00:07Z"), + testjsonl.CodexMsgJSON("user", "Real second request", "2024-01-01T10:00:08Z"), ) sess, msgs := runCodexParserTest(t, "test.jsonl", content, false) require.NotNil(t, sess) @@ -1458,6 +1464,43 @@ func TestParseCodexSession_EdgeCases(t *testing.T) { "goal continuation context must not count as user turns") }) + t.Run("keeps non-goal codex internal contexts", func(t *testing.T) { + cases := []struct { + name string + content string + }{ + { + name: "data source goal", + content: "\n" + + "Preserve this internal context.\n", + }, + { + name: "other source", + content: "\n" + + "Preserve this internal context.\n", + }, + { + name: "no source", + content: "\n" + + "Preserve this internal context.\n", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + content := testjsonl.JoinJSONL( + testjsonl.CodexSessionMetaJSON("abc", "/tmp", "user", tsEarly), + testjsonl.CodexMsgJSON("user", tc.content, tsEarlyS1), + ) + sess, msgs := runCodexParserTest(t, "test.jsonl", content, false) + require.NotNil(t, sess) + require.Len(t, msgs, 1) + assert.Equal(t, tc.content, msgs[0].Content) + assert.Equal(t, 1, sess.UserMessageCount, + "non-goal internal contexts must count as user turns") + }) + } + }) + // Only the structured goal wrapper is system content; a real user // message that merely quotes the goal sentence stays in the transcript. t.Run("keeps unwrapped goal-like user text", func(t *testing.T) { diff --git a/internal/postgres/activity.go b/internal/postgres/activity.go index ed2d49313..306c89c82 100644 --- a/internal/postgres/activity.go +++ b/internal/postgres/activity.go @@ -25,7 +25,7 @@ func (s *Store) GetSessionActivity( // 2. Visible-message filter (same as SQLite). visFilter := "m.is_system = FALSE AND " + - db.SystemPrefixSQL("m.content", "m.role") + db.PostgresSystemPrefixSQL("m.content", "m.role") // 3. Get min/max timestamps from visible messages. // PG stores timestamp as TIMESTAMPTZ, so scan into *time.Time. diff --git a/internal/postgres/messages.go b/internal/postgres/messages.go index 5a4ac22a0..463c7967d 100644 --- a/internal/postgres/messages.go +++ b/internal/postgres/messages.go @@ -114,7 +114,7 @@ func (s *Store) SearchSession( AND tc.message_ordinal = m.ordinal WHERE m.session_id = $1 AND m.is_system = FALSE - AND `+db.SystemPrefixSQL("m.content", "m.role")+` + AND `+db.PostgresSystemPrefixSQL("m.content", "m.role")+` AND (m.content ILIKE $2 OR tc.result_content ILIKE $2) ORDER BY m.ordinal ASC`, @@ -244,7 +244,7 @@ func (s *Store) Search( WHERE %s AND s.deleted_at IS NULL AND m.is_system = FALSE - AND `+db.SystemPrefixSQL("m.content", "m.role")+` + AND `+db.PostgresSystemPrefixSQL("m.content", "m.role")+` %s ORDER BY m.session_id, POSITION(LOWER($2) IN LOWER(m.content)) ASC, @@ -274,7 +274,7 @@ func (s *Store) Search( SELECT 1 FROM messages mx WHERE mx.session_id = s.id AND mx.is_system = FALSE - AND `+db.SystemPrefixSQL("mx.content", "mx.role")+` + AND `+db.PostgresSystemPrefixSQL("mx.content", "mx.role")+` ) AND s.id NOT IN (SELECT session_id FROM msg_matches) %s diff --git a/internal/postgres/search_content.go b/internal/postgres/search_content.go index 7527bf9ba..8a7d49ad4 100644 --- a/internal/postgres/search_content.go +++ b/internal/postgres/search_content.go @@ -123,7 +123,7 @@ func pgMessagesBranch( sysPred := "TRUE" if f.ExcludeSystem { sysPred = "m.is_system = FALSE AND " + - db.SystemPrefixSQL("m.content", "m.role") + db.PostgresSystemPrefixSQL("m.content", "m.role") } // Select the full content; the snippet is windowed and redacted in Go. @@ -398,7 +398,7 @@ func pgMessagesCandidateBranch( sysPred := "TRUE" if f.ExcludeSystem { sysPred = "m.is_system = FALSE AND " + - db.SystemPrefixSQL("m.content", "m.role") + db.PostgresSystemPrefixSQL("m.content", "m.role") } return fmt.Sprintf(` diff --git a/internal/postgres/trends.go b/internal/postgres/trends.go index 42a9a56b8..ae5881e36 100644 --- a/internal/postgres/trends.go +++ b/internal/postgres/trends.go @@ -45,7 +45,7 @@ func (s *Store) GetTrendsTerms( WHERE ` + where + ` AND m.role IN ('user', 'assistant') AND m.is_system = FALSE - AND ` + db.SystemPrefixSQL("m.content", "m.role") + AND ` + db.PostgresSystemPrefixSQL("m.content", "m.role") rows, err := s.pg.QueryContext(ctx, query, pb.args...) if err != nil { diff --git a/internal/server/export.go b/internal/server/export.go index 299a60f77..91405b2a0 100644 --- a/internal/server/export.go +++ b/internal/server/export.go @@ -628,6 +628,8 @@ footer a { func generateExportHTML( session *db.Session, msgs []db.Message, ) string { + msgs = filterExportHTMLMessages(msgs) + startedAt := "" if session.StartedAt != nil { startedAt = formatTimestamp(*session.StartedAt) @@ -636,7 +638,7 @@ func generateExportHTML( data := exportData{ Project: session.Project, Agent: agentDisplayName(session.Agent), - MessageCount: session.MessageCount, + MessageCount: len(msgs), StartedAt: startedAt, Messages: make([]exportMessage, len(msgs)), } @@ -670,6 +672,17 @@ func generateExportHTML( return b.String() } +func filterExportHTMLMessages(msgs []db.Message) []db.Message { + filtered := make([]db.Message, 0, len(msgs)) + for _, m := range msgs { + if db.IsGoalContextPrefixed(m.Content, m.Role) { + continue + } + filtered = append(filtered, m) + } + return filtered +} + func generateInsightExportHTML(insight *db.Insight) string { data := insightExportData{ Title: insightExportTitle(insight), @@ -752,7 +765,8 @@ func focusedExportOrdinals(msgs []db.Message) map[int]bool { continue } - if m.IsSystem || isThinkingOnly(m.Content) { + if m.IsSystem || db.IsGoalContextPrefixed(m.Content, m.Role) || + isThinkingOnly(m.Content) { continue } diff --git a/internal/server/export_test.go b/internal/server/export_test.go index fcacf267a..d7472c65b 100644 --- a/internal/server/export_test.go +++ b/internal/server/export_test.go @@ -498,6 +498,91 @@ func TestGenerateExportHTML_TranscriptModeControls(t *testing.T) { }) } +func TestGenerateExportHTML_OmitsGoalContextRows(t *testing.T) { + t.Parallel() + session := testSession(func(s *db.Session) { + s.MessageCount = 4 + }) + currentGoal := "\n" + + "Continue working toward the active thread goal.\n" + + "" + legacyGoal := "\n" + + "Continue working toward the active thread goal.\n" + + "" + msgs := []db.Message{ + { + SessionID: "test-id", Ordinal: 0, + Role: "user", Content: "Actual user message", + Timestamp: "2025-01-15T10:00:00Z", + }, + { + SessionID: "test-id", Ordinal: 1, + Role: "user", Content: "\n\t" + currentGoal, + Timestamp: "2025-01-15T10:00:01Z", + }, + { + SessionID: "test-id", Ordinal: 2, + Role: "user", Content: " " + legacyGoal, + Timestamp: "2025-01-15T10:00:02Z", + }, + { + SessionID: "test-id", Ordinal: 3, + Role: "assistant", Content: "Assistant reply", + Timestamp: "2025-01-15T10:00:03Z", + }, + } + + html := generateExportHTML(session, msgs) + + assertContainsAll(t, html, []string{ + "2 messages", + `class="message user" data-ordinal="0"`, + `class="message assistant" data-ordinal="3"`, + "Actual user message", + "Assistant reply", + }) + assertContainsNone(t, html, []string{ + `data-ordinal="1"`, + `data-ordinal="2"`, + "", + "Continue working toward the active thread goal.", + }) +} + +func TestGenerateExportHTML_PreservesNonGoalSystemPrefixedRows(t *testing.T) { + t.Parallel() + session := testSession(func(s *db.Session) { + s.MessageCount = 3 + }) + msgs := []db.Message{ + { + SessionID: "test-id", Ordinal: 0, + Role: "user", Content: "This session is being continued from a previous conversation.", + Timestamp: "2025-01-15T10:00:00Z", + }, + { + SessionID: "test-id", Ordinal: 1, + Role: "user", Content: "done", + Timestamp: "2025-01-15T10:00:01Z", + }, + { + SessionID: "test-id", Ordinal: 2, + Role: "user", Content: "Stop hook feedback: blocked", + Timestamp: "2025-01-15T10:00:02Z", + }, + } + + html := generateExportHTML(session, msgs) + + assertContainsAll(t, html, []string{ + "3 messages", + "This session is being continued from a previous conversation.", + "<task-notification>done</task-notification>", + "Stop hook feedback: blocked", + }) +} + func TestFocusedExportOrdinals(t *testing.T) { t.Parallel() tests := []struct { @@ -583,6 +668,41 @@ func TestFocusedExportOrdinals(t *testing.T) { }, want: []int{0, 1, 4}, }, + { + name: "ignores system-prefixed goal contexts", + msgs: []db.Message{ + exportUserMsg(0), + exportAssistantMsg(1, "draft"), + { + SessionID: "test-id", + Ordinal: 2, + Role: "user", + Content: `state`, + }, + { + SessionID: "test-id", + Ordinal: 3, + Role: "user", + Content: "\n\tstate", + }, + exportAssistantMsg(4, "final"), + }, + want: []int{0, 4}, + }, + { + name: "keeps non-goal system-prefixed user rows", + msgs: []db.Message{ + exportUserMsg(0), + { + SessionID: "test-id", + Ordinal: 1, + Role: "user", + Content: "Stop hook feedback: blocked", + }, + exportAssistantMsg(2, "answer"), + }, + want: []int{0, 1, 2}, + }, { name: "keeps answer before compact boundary", msgs: []db.Message{