Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions fastlane/lanes/ai_translator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def format_forms(forms)
def validated_forms(parsed, needed, english_forms)
other = english_forms['other']
needed.each_with_object({}) do |category, out|
candidate = clean(parsed[category].to_s)
candidate = parsed[category].to_s.strip # already JSON-decoded — trim only; clean() would strip a value's own quotes
next if candidate.empty?

source = english_forms[category] || other
Expand Down Expand Up @@ -311,7 +311,7 @@ def translate_batch(chunk, locale)
# Map each numbered item to its validated translation by key; drop empty/placeholder-breaking ones.
def validated_batch(parsed, numbered)
numbered.each_with_object({}) do |(index, string), out|
candidate = clean(parsed[index.to_s].to_s)
candidate = parsed[index.to_s].to_s.strip # already JSON-decoded — trim only; clean() would strip a value's own quotes
next if candidate.empty?

out[string[:key]] = candidate if TranslationValidator.placeholders_match?(string[:source], candidate)
Expand Down Expand Up @@ -363,9 +363,11 @@ def batch_job(custom_id, locale, numbered)
}
end

# Models occasionally wrap the answer in quotation marks or add a trailing newline despite the
# "only the translation" instruction; strip those cosmetic wrappers. Anything more substantial (a prose
# explanation that slipped through) almost always breaks the placeholder gate and is discarded there.
# Strip the cosmetic wrapper a model sometimes adds to a RAW single-string reply — wrapping quotes or a
# trailing newline, despite the "only the translation" instruction. Only ever run this on a raw reply, never
# on a JSON-decoded value: JSON.parse has already removed the structural quotes, so any quotes left there are
# part of the content (a value like "Reader" must keep them). Anything more substantial (a prose explanation
# that slipped through) almost always breaks the placeholder gate and is discarded there.
def clean(text)
stripped = text.strip
if stripped.length >= 2 &&
Expand Down
36 changes: 36 additions & 0 deletions fastlane/lanes/ai_translator_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -286,4 +286,40 @@ def test_collect_batch_handles_a_missing_batch_reply
prep = t.prepare_batch({ 'fr' => [{ key: 'a', source: 'One' }] }, batch_size: 25)
assert_equal({ 'fr' => {} }, t.collect_batch({}, prep[:manifest]))
end

# When a translation's value is itself wrapped in quotation marks, those quotes are part of the content and
# must survive — only the model's cosmetic wrapping around a raw reply should be stripped.
def test_translate_plural_preserves_a_quoted_value
reply = '{"other":"\"Reader\""}'
out = translator(reply: reply).translate_plural(
english_forms: { 'other' => '"Reader"' },
categories: %w[other], locale: 'fr'
)
assert_equal({ 'other' => '"Reader"' }, out)
end

def test_translate_all_preserves_a_quoted_value
reply = '{"1":"\"Reader\""}'
out = translator(reply: reply).translate_all(
[{ key: 'sample.quoted', source: '"Reader"' }], locale: 'fr'
)
assert_equal({ 'sample.quoted' => '"Reader"' }, out)
end

# The same holds for the curly/smart quotes clean() also strips: a JSON-decoded value wrapped in “ ” keeps them.
def test_translate_all_preserves_a_curly_quoted_value
reply = '{"1":"“Reader”"}'
out = translator(reply: reply).translate_all(
[{ key: 'sample.curly', source: '“Reader”' }], locale: 'fr'
)
assert_equal({ 'sample.curly' => '“Reader”' }, out)
end

# The async Batch path shares validated_batch with translate_all, so it must preserve a quoted value too.
def test_collect_batch_preserves_a_quoted_value
t = translator(reply: '{}')
prep = t.prepare_batch({ 'fr' => [{ key: 'sample.quoted', source: '"Reader"' }] }, batch_size: 25)
texts = { 'fr_0' => '{"1":"\"Reader\""}' }
assert_equal({ 'fr' => { 'sample.quoted' => '"Reader"' } }, t.collect_batch(texts, prep[:manifest]))
end
end