From 3c67f058dedd5a84049c394f929af1681da6cf22 Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Thu, 25 Jun 2026 22:47:53 -0600 Subject: [PATCH 1/4] Localization: AI translation primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reusable, unit-tested Ruby primitives for the AI translation tier of the localization pipeline — the service behind the `human ?? AI ?? English` floor whose AI stub was left open in #25688. Pure prompt-building and validation with the Anthropic SDK call injected, so the logic is testable without the gem or the network. Not wired into any lane yet. - TranslationValidator: format-specifier safety gate — a translation must preserve the source's placeholders (count and type; positional reordering allowed), or it is rejected and falls back to English. - Glossary: brand do-not-translate list plus per-locale terms and register. - AITranslator: single-string, per-key plural form-set (one consistent stem across CLDR forms), and batched string translation, with structured-output (output_config) enforcement. - AnthropicBatch: Message Batches submit/await/results/collect for bulk backfill. 50 unit tests, rubocop clean. --- Gemfile | 2 + Gemfile.lock | 8 + fastlane/lanes/ai_translator.rb | 390 +++++++++++++++++++ fastlane/lanes/ai_translator_test.rb | 289 ++++++++++++++ fastlane/lanes/anthropic_batch.rb | 98 +++++ fastlane/lanes/anthropic_batch_test.rb | 104 +++++ fastlane/lanes/translation_glossary.rb | 51 +++ fastlane/lanes/translation_glossary_test.rb | 37 ++ fastlane/lanes/translation_validator.rb | 108 +++++ fastlane/lanes/translation_validator_test.rb | 59 +++ 10 files changed, 1146 insertions(+) create mode 100644 fastlane/lanes/ai_translator.rb create mode 100644 fastlane/lanes/ai_translator_test.rb create mode 100644 fastlane/lanes/anthropic_batch.rb create mode 100644 fastlane/lanes/anthropic_batch_test.rb create mode 100644 fastlane/lanes/translation_glossary.rb create mode 100644 fastlane/lanes/translation_glossary_test.rb create mode 100644 fastlane/lanes/translation_validator.rb create mode 100644 fastlane/lanes/translation_validator_test.rb diff --git a/Gemfile b/Gemfile index fa6bc472ba4c..9e74f95cc238 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,8 @@ source 'https://rubygems.org' +# Official Anthropic SDK — backs the AI translation tier of the localization pipeline (fastlane/lanes/ai_translator.rb). +gem 'anthropic', '~> 1.50' gem 'danger-dangermattic', '~> 1.3' gem 'dotenv' # 2.223.1 includes a fix for an ASC-interfacing issue diff --git a/Gemfile.lock b/Gemfile.lock index 8325e2b30df2..0b69002c4dd6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -5,6 +5,10 @@ GEM abbrev (0.1.2) addressable (2.9.0) public_suffix (>= 2.0.2, < 8.0) + anthropic (1.50.0) + cgi + connection_pool + standardwebhooks artifactory (3.0.17) ast (2.4.3) atomos (0.1.3) @@ -33,6 +37,7 @@ GEM bigdecimal (4.1.2) buildkit (1.6.1) sawyer (>= 0.6) + cgi (0.5.2) chroma (0.2.0) claide (1.1.0) claide-plugins (0.9.2) @@ -43,6 +48,7 @@ GEM colored2 (3.1.2) commander (4.6.0) highline (~> 2.0.0) + connection_pool (3.0.2) cork (0.3.0) colored2 (~> 3.1) csv (3.3.5) @@ -348,6 +354,7 @@ GEM CFPropertyList naturally singleton (0.3.0) + standardwebhooks (1.0.1) terminal-notifier (2.0.0) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) @@ -376,6 +383,7 @@ PLATFORMS ruby DEPENDENCIES + anthropic (~> 1.50) danger-dangermattic (~> 1.3) dotenv fastlane (~> 2.236) diff --git a/fastlane/lanes/ai_translator.rb b/fastlane/lanes/ai_translator.rb new file mode 100644 index 000000000000..c0228fa9a4a6 --- /dev/null +++ b/fastlane/lanes/ai_translator.rb @@ -0,0 +1,390 @@ +# frozen_string_literal: true + +require 'json' +require_relative 'anthropic_batch' +require_relative 'translation_glossary' +require_relative 'translation_validator' + +# AI translation tier for the localization pipeline — the service behind the `human ?? AI ?? English` floor. +# +# `localization_plurals.rb` currently stubs `ai_translate_plural(...)` to return nil; this is what replaces +# it. Given an English source string, a target locale, and the developer context, it asks Claude for a +# translation, then runs the result through `TranslationValidator` before returning it. Anything that fails +# the format-specifier gate (or comes back empty / refused) returns nil — the documented "no machine +# translation" signal the fold treats as English-fallback (flagged needs_review). It never returns a +# placeholder-broken string. +# +# The model call is INJECTED as a `complete` callable, not hard-wired, so the prompt-building and validation +# logic stays pure and unit-testable without the SDK or the network. `AITranslator.with_anthropic` builds the +# live, Claude-backed instance; the unit tests build one around a canned-reply lambda. +class AITranslator # rubocop:disable Metrics/ClassLength -- mostly static localization config (33-locale name map + prompt templates) + DEFAULT_MODEL = 'claude-opus-4-8' + + # lproj code → human language name for the prompt. Covers the current ship locales; an unmapped code falls + # back to itself (the model still does something reasonable, but add the name here for best results). + LANGUAGE_NAMES = { + 'ar' => 'Arabic', 'bg' => 'Bulgarian', 'cs' => 'Czech', 'cy' => 'Welsh', 'da' => 'Danish', + 'de' => 'German', 'en-AU' => 'English (Australia)', 'en-CA' => 'English (Canada)', + 'en-GB' => 'English (United Kingdom)', 'es' => 'Spanish', 'fr' => 'French', 'he' => 'Hebrew', + 'hr' => 'Croatian', 'hu' => 'Hungarian', 'id' => 'Indonesian', 'is' => 'Icelandic', 'it' => 'Italian', + 'ja' => 'Japanese', 'ko' => 'Korean', 'nb' => 'Norwegian Bokmål', 'nl' => 'Dutch', 'pl' => 'Polish', + 'pt' => 'Portuguese', 'pt-BR' => 'Portuguese (Brazil)', 'ro' => 'Romanian', 'ru' => 'Russian', + 'sk' => 'Slovak', 'sq' => 'Albanian', 'sv' => 'Swedish', 'th' => 'Thai', 'tr' => 'Turkish', + 'zh-Hans' => 'Chinese (Simplified)', 'zh-Hant' => 'Chinese (Traditional)' + }.freeze + + # `{{language}}` / `{{brands}}` are substituted by literal gsub (NOT `format`/`%`, which would choke on the + # literal `%@` / `%1$@` examples below). Shared by the single-string and plural prompts. + TRANSLATION_RULES = <<~PROMPT + You are an expert software localizer translating user-facing UI strings for the WordPress and Jetpack iOS apps into {{language}}. + + Rules: + - Translate into natural, concise {{language}} suitable for a mobile app UI. Screen space is limited, so prefer the shorter faithful phrasing. + - Keep these names EXACTLY as written, untranslated: {{brands}}. + - Preserve every format specifier (e.g. %@, %1$@, %d, %lld, %1$d) EXACTLY — same count and type. You may reorder positional specifiers such as %1$@ and %2$d to suit the target grammar, but each must appear exactly once and keep its number. + - Preserve any HTML tags, markup, and leading/trailing whitespace exactly as in the source. + - Do not translate URLs, email addresses, file paths, or code. + - Follow the tone and terminology conventions of the WordPress.org {{language}} translation community, including its formal/informal form-of-address convention. + PROMPT + + # Output instruction for a single string. + SINGLE_OUTPUT = 'Output ONLY the translated string — no quotation marks, no explanation, no notes, nothing else.' + + # Output instruction for a plural form-set. The consistency rule is the whole reason to translate the forms + # together (one request) rather than per category: it stops the model drifting between synonyms across forms + # (e.g. Polish słowo -> wyrazy -> słów), which a per-cell call structurally cannot prevent. + PLURAL_OUTPUT = <<~PROMPT + You are translating the plural forms of ONE UI string. Use a single consistent word and stem across every form — only the grammatical inflection (ending) changes between forms; never switch to a synonym between forms. + + Return ONLY a JSON object mapping each requested CLDR plural category to its translation, e.g. {"one": "...", "other": "..."}. No markdown fences, no commentary — just the JSON object. + PROMPT + + # Brief, locale-agnostic cue per CLDR category (the model knows the language's actual rules; this just + # disambiguates which form we're asking for). + CLDR_CUES = { + 'zero' => 'the zero form', + 'one' => 'singular (n = 1)', + 'two' => 'the dual form (n = 2)', + 'few' => 'the "few" form (e.g. 2-4 in many Slavic languages)', + 'many' => 'the "many" form (e.g. 5+ in many Slavic languages)', + 'other' => 'the general / catch-all form (also used for fractions)' + }.freeze + + # Default number of strings per batched request. Small enough to keep each JSON reply parseable and bound the + # blast radius if one reply is malformed (only that batch falls back to English); large enough to amortize the + # cached system prompt across many strings. + DEFAULT_BATCH_SIZE = 25 + + # Output instruction for a batch of independent strings (keyed by item number, not the long reverse-DNS key, + # so the model can't garble the mapping). + BATCH_OUTPUT = <<~PROMPT + You are translating a batch of independent UI strings. Translate each on its own; the items are unrelated unless a context note says otherwise. + + Return ONLY a JSON object mapping each item's number (as a string) to its translation, e.g. {"1": "...", "2": "..."}. Include every number you are given, and translate nothing else. No markdown fences, no commentary — just the JSON object. + PROMPT + + # @param complete [#call] callable invoked as `complete.call(system:, user:, schema: nil)` returning the + # model's raw text reply. Injected so the translator is testable without the SDK. + # @param glossary [Glossary] brand do-not-translate list + per-locale terms/register (translation_glossary.rb). + # @param language_names [Hash{String=>String}] lproj code → language name. + def initialize(complete:, glossary: Glossary.default, language_names: LANGUAGE_NAMES) + @complete = complete + @glossary = glossary + @language_names = language_names + end + + # Validated translation of `source` into `locale`, or nil if one can't be produced SAFELY: blank source, a + # blank/garbled reply, or — critically — a reply that breaks the format-specifier contract. + # + # @param source [String] the English source string. + # @param locale [String] target lproj code (e.g. "fr", "pt-BR", "zh-Hans"). + # @param context [String, nil] developer comment / context for the string (the `comment:` field). Feeding + # this is the single biggest quality lever, so pass it whenever available. + def translate(source:, locale:, context: nil) + source = source.to_s + return nil if source.strip.empty? + + candidate = clean(@complete.call(system: system_prompt(locale), user: user_prompt(source, context)).to_s) + return nil if candidate.empty? + return nil unless TranslationValidator.placeholders_match?(source, candidate) + + candidate + end + + # Adapter matching the `ai_translate_plural(id:, source:, category:, note:, locale:)` contract in + # `localization_plurals.rb`, so wiring the live tier is a one-line swap of the `ai_translator:` argument: + # translator = AITranslator.with_anthropic + # PluralStrings.fold_translations!(catalog, ..., ai_translator: translator.method(:for_plural)) + # rubocop:disable Lint/UnusedMethodArgument -- keyword names are the documented call contract + def for_plural(id:, source:, category:, note:, locale:) + translate(source: source, locale: locale, context: plural_context(note, category)) + end + # rubocop:enable Lint/UnusedMethodArgument + + # Translates a whole plural form-set for one key in a SINGLE request, so the model keeps one consistent + # word/stem across the forms (the fix for per-cell lemma drift). Returns { category => translation } for the + # requested categories, each placeholder-validated against its English source; forms that fail the gate or + # are absent from the reply are omitted, so the caller falls back to English (needs_review) for those. + # + # @param english_forms [Hash{String=>String}] English plural forms by CLDR category (must include "other"; + # a requested category with no English form of its own falls back to the "other" English value). + # @param categories [Array] the CLDR categories to produce (the ones the target locale needs). + # @param locale [String] target lproj code. + # @param note [String, nil] developer context / comment for the string. + # @param anchors [Hash{String=>String}] already-finalized (e.g. human-translated) forms — shown to the model + # as fixed context to stay consistent with, and excluded from what it is asked to produce. + def translate_plural(english_forms:, categories:, locale:, note: nil, anchors: {}) + english_forms = to_string_keys(english_forms) + anchors = to_string_keys(anchors) + return {} if english_forms['other'].to_s.strip.empty? + + needed = categories.map(&:to_s) - anchors.keys + return {} if needed.empty? + + reply = @complete.call( + system: plural_system_prompt(locale), + user: plural_user_prompt(english_forms, needed, note, anchors), + schema: object_schema(needed) + ) + validated_forms(parse_forms(reply), needed, english_forms) + end + + # Translates many independent strings in batched requests (default DEFAULT_BATCH_SIZE per request), returning + # { key => translation } for those that pass the placeholder gate. Strings absent from the result (gate + # failure, blank source, or a malformed batch reply) fall back to human/English at the call site. Pass the + # strings already sorted by key so each batch naturally groups one feature (reader.*, editor.*) — better + # terminology consistency within a batch. + # + # @param strings [Array] each { key:, source:, comment: } (string or symbol keys both accepted). + # @param locale [String] target lproj code. + # @param batch_size [Integer] strings per request. + def translate_all(strings, locale:, batch_size: DEFAULT_BATCH_SIZE) + items = batchable_items(strings) + return {} if items.empty? + + items.each_slice(batch_size).with_object({}) do |chunk, out| + out.merge!(translate_batch(chunk, locale)) + end + end + + # Builds Message Batch jobs for many strings across many locales (the async / cheaper bulk path). Returns + # { jobs:, manifest: }: `jobs` ({ custom_id:, system:, user:, schema: }) go to `AnthropicBatch.submit`; + # `manifest` (custom_id => { locale:, numbered: }) is handed back to `collect_batch` with the batch results. + # Pure — no model or SDK here; `AnthropicBatch.submit` adds the model when it builds the requests. + # + # @param strings_by_locale [Hash{String=>Array}] locale => array of { key:, source:, comment: }. + def prepare_batch(strings_by_locale, batch_size: DEFAULT_BATCH_SIZE) + jobs = [] + manifest = {} + strings_by_locale.each do |locale, strings| + batchable_items(strings).each_slice(batch_size).with_index do |chunk, index| + numbered = number_chunk(chunk) + custom_id = "#{locale}_#{index}" # must match ^[a-zA-Z0-9_-]{1,64}$; locale codes have hyphens, not underscores, so this stays unique + jobs << batch_job(custom_id, locale, numbered) + manifest[custom_id] = { locale: locale, numbered: numbered } + end + end + { jobs: jobs, manifest: manifest } + end + + # Validates the batch replies and assembles { locale => { key => translation } }. `texts_by_custom_id` comes + # from `AnthropicBatch.results`; `manifest` from `prepare_batch`. A custom_id with no reply (errored batch + # request) or a per-string gate failure simply doesn't appear → the caller falls back to human/English. Pure. + def collect_batch(texts_by_custom_id, manifest) + manifest.each_with_object({}) do |(custom_id, entry), result| + bucket = (result[entry[:locale]] ||= {}) + text = texts_by_custom_id[custom_id] + next if text.nil? + + bucket.merge!(validated_batch(parse_forms(text), entry[:numbered])) + end + end + + # Builds a translator backed by the Anthropic Ruby SDK (`gem 'anthropic'`, in the Gemfile) — needs + # ANTHROPIC_API_KEY in the env. This `complete` lambda is the only part of the file the unit tests don't + # exercise, by design: everything the tests cover stays on the pure side of the injection boundary. + def self.with_anthropic(api_key: ENV.fetch('ANTHROPIC_API_KEY', nil), model: DEFAULT_MODEL, **) + client = AnthropicBatch.client(api_key: api_key) + complete = lambda do |system:, user:, schema: nil| + AnthropicBatch.text_of(client.messages.create(**AnthropicBatch.message_params(model: model, system: system, user: user, schema: schema))) + end + new(complete: complete, **) + rescue LoadError + raise LoadError, "The `anthropic` gem (in the Gemfile) isn't installed — run `bundle install` (or `gem install anthropic`)." + end + + private + + # Shared rule block (brands, format specifiers) with {{language}}/{{brands}} filled in, plus the glossary's + # per-locale terms + register note appended when present. + def render_rules(locale) + language = @language_names.fetch(locale, locale) + rules = TRANSLATION_RULES.gsub('{{language}}') { language }.gsub('{{brands}}') { @glossary.do_not_translate.join(', ') } + guidance = @glossary.guidance(locale) + guidance.empty? ? rules : "#{rules}\n#{guidance}" + end + + def system_prompt(locale) + "#{render_rules(locale)}\n#{SINGLE_OUTPUT}" + end + + def plural_system_prompt(locale) + "#{render_rules(locale)}\n#{PLURAL_OUTPUT}" + end + + def user_prompt(source, context) + parts = [] + parts << "Context: #{context}" if context && !context.to_s.strip.empty? + parts << "English source string:\n#{source}" + parts.join("\n\n") + end + + def plural_user_prompt(english_forms, needed, note, anchors) + sections = [] + sections << "Context: #{note}" if note && !note.to_s.strip.empty? + sections << "English source forms:\n#{format_forms(english_forms)}" + sections << "Already-finalized forms — match their exact word choice and stem, and do not re-output them:\n#{format_forms(anchors)}" unless anchors.empty? + catalog = needed.map { |category| " #{category} - #{CLDR_CUES.fetch(category, category)}" }.join("\n") + sections << "Translate these CLDR plural categories, returning a JSON object keyed exactly by these category names:\n#{catalog}" + sections.join("\n\n") + end + + def format_forms(forms) + forms.map { |category, value| " #{category} = #{value}" }.join("\n") + end + + # Keep only the parsed forms whose placeholders match their English source (the form's own English, or the + # "other" value for categories English doesn't distinguish). Failed/empty forms are dropped → English fallback. + def validated_forms(parsed, needed, english_forms) + other = english_forms['other'] + needed.each_with_object({}) do |category, out| + candidate = clean(parsed[category].to_s) + next if candidate.empty? + + source = english_forms[category] || other + out[category] = candidate if TranslationValidator.placeholders_match?(source, candidate) + end + end + + # JSON Schema for a flat object whose values are all required strings — passed as `output_config.format` to + # make the model emit exactly this shape (structured outputs). additionalProperties must be false; that's the + # only form structured outputs support, and it also stops the model inventing extra keys. + def object_schema(keys) + { + 'type' => 'object', + 'properties' => keys.to_h { |key| [key, { 'type' => 'string' }] }, + 'required' => keys, + 'additionalProperties' => false + } + end + + # Parse the model's JSON reply into { key => value }; tolerate ```json fences; {} on any parse failure + # (every entry then falls back to English — safe, though structured outputs make a failure very unlikely). + def parse_forms(reply) + text = reply.to_s.strip.sub(/\A```(?:json)?\s*/i, '').sub(/```\s*\z/, '').strip + data = JSON.parse(text) + data.is_a?(Hash) ? data : {} + rescue JSON::ParserError + {} + end + + def to_string_keys(hash) + (hash || {}).each_with_object({}) { |(key, value), acc| acc[key.to_s] = value } + end + + # One batched request: number the chunk, ask for a JSON {number => translation}, keep the validated ones. + def translate_batch(chunk, locale) + numbered = number_chunk(chunk) + reply = @complete.call( + system: batch_system_prompt(locale), + user: batch_user_prompt(numbered), + schema: object_schema(numbered.keys.map(&:to_s)) + ) + validated_batch(parse_forms(reply), numbered) + end + + # Map each numbered item to its validated translation by key; drop empty/placeholder-breaking ones. + def validated_batch(parsed, numbered) + numbered.each_with_object({}) do |(index, string), out| + candidate = clean(parsed[index.to_s].to_s) + next if candidate.empty? + + out[string[:key]] = candidate if TranslationValidator.placeholders_match?(string[:source], candidate) + end + end + + def batch_system_prompt(locale) + "#{render_rules(locale)}\n#{BATCH_OUTPUT}" + end + + def batch_user_prompt(numbered) + items = numbered.map { |index, string| batch_item_line(index, string) } + "Translate each numbered UI string below into the target language.\n\n#{items.join("\n")}" + end + + # One prompt line per string: number, the reverse-DNS key (UI-role context), the English, and the dev note. + def batch_item_line(index, string) + line = "[#{index}] " + line << "(#{string[:key]}) " unless string[:key].to_s.empty? + line << string[:source].to_s + line << " — #{string[:comment]}" unless string[:comment].to_s.strip.empty? + line + end + + def normalize_string(string) + { key: field(string, :key), source: field(string, :source), comment: field(string, :comment) } + end + + def field(hash, name) + hash[name] || hash[name.to_s] + end + + # Normalize to { key:, source:, comment: } hashes and drop entries with a blank source (nothing to translate). + def batchable_items(strings) + strings.map { |string| normalize_string(string) }.reject { |string| string[:source].to_s.strip.empty? } + end + + # Number a chunk 1..N → { 1 => string, … } (the index the model maps its JSON reply by). + def number_chunk(chunk) + chunk.each_with_index.to_h { |string, index| [index + 1, string] } + end + + def batch_job(custom_id, locale, numbered) + { + custom_id: custom_id, + system: batch_system_prompt(locale), + user: batch_user_prompt(numbered), + schema: object_schema(numbered.keys.map(&:to_s)) + } + end + + # Models occasionally wrap the answer in quotation marks or add a trailing newline despite the + # "only the translation" instruction; strip those cosmetic wrappers. Anything more substantial (a prose + # explanation that slipped through) almost always breaks the placeholder gate and is discarded there. + def clean(text) + stripped = text.strip + if stripped.length >= 2 && + ((stripped.start_with?('"') && stripped.end_with?('"')) || + (stripped.start_with?('“') && stripped.end_with?('”'))) + stripped = stripped[1...-1].strip + end + stripped + end + + # The dev note plus an explicit CLDR-category cue, so the model produces the correct grammatical plural + # form (e.g. the Polish `few` form) rather than guessing from the English source alone. + def plural_context(note, category) + [note, "Plural category: #{category}. Render the grammatically correct plural form for this category."] + .compact.reject(&:empty?).join(' ') + end +end + +# Tiny CLI to eyeball quality against the real model (needs the `anthropic` gem + ANTHROPIC_API_KEY): +# ruby fastlane/lanes/ai_translator.rb fr "You have %1$d new posts" "Notification text. %1$d is the count." +if __FILE__ == $PROGRAM_NAME + locale, source, context = ARGV + abort("usage: ruby #{File.basename(__FILE__)} \"\" [\"\"]") unless locale && source + + result = AITranslator.with_anthropic.translate(source: source, locale: locale, context: context) + puts result.nil? ? '(no safe translation — placeholder check failed or empty reply)' : result +end diff --git a/fastlane/lanes/ai_translator_test.rb b/fastlane/lanes/ai_translator_test.rb new file mode 100644 index 000000000000..1f0e77953547 --- /dev/null +++ b/fastlane/lanes/ai_translator_test.rb @@ -0,0 +1,289 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for AITranslator. Run directly: `ruby fastlane/lanes/ai_translator_test.rb`. +# Uses a canned-reply lambda for `complete:`, so it exercises all of the prompt-building / validation logic +# without the `anthropic` gem or the network. +require 'minitest/autorun' +require_relative 'ai_translator' + +# Exercises prompt-building and the validator gate via a canned-reply `complete:` lambda (no gem / network). +class AITranslatorTest < Minitest::Test # rubocop:disable Metrics/ClassLength -- exhaustive scenario coverage + # Builds a translator whose model "reply" is fixed, optionally recording the prompts it was called with. + def translator(reply:, prompts: nil) + complete = lambda do |system:, user:, schema: nil| + prompts&.replace({ system: system, user: user, schema: schema }) + reply + end + AITranslator.new(complete: complete) + end + + def test_returns_cleaned_translation + t = translator(reply: %("Réglages"\n)) # wrapped in quotes + trailing newline + assert_equal 'Réglages', t.translate(source: 'Settings', locale: 'fr') + end + + def test_accepts_a_reply_that_preserves_placeholders + t = translator(reply: '%2$@ wurde von %1$@ eingeladen') + assert_equal '%2$@ wurde von %1$@ eingeladen', + t.translate(source: '%1$@ invited %2$@', locale: 'de') + end + + def test_rejects_a_reply_that_breaks_placeholders + t = translator(reply: '%1$d Beiträge') # object → int: must be discarded + assert_nil t.translate(source: '%1$@ posts', locale: 'de') + end + + def test_blank_source_makes_no_model_call + called = false + complete = lambda do |**| + called = true + 'x' + end + t = AITranslator.new(complete: complete) + assert_nil t.translate(source: " \n", locale: 'fr') + refute called + end + + def test_blank_reply_returns_nil + assert_nil translator(reply: " \n").translate(source: 'Settings', locale: 'fr') + end + + def test_prompt_carries_language_brands_and_context + prompts = {} + t = translator(reply: 'Publier', prompts: prompts) + t.translate(source: 'Publish', locale: 'fr', context: 'Button to publish a post') + + assert_includes prompts[:system], 'French' + assert_includes prompts[:system], 'WordPress' + assert_includes prompts[:user], 'Button to publish a post' + assert_includes prompts[:user], 'Publish' + end + + def test_for_plural_adapter_maps_arguments_and_cues_category + prompts = {} + t = translator(reply: '%1$d Beiträge pro Woche', prompts: prompts) + out = t.for_plural( + id: 'blogging.reminders.weeklyCount|==|plural.other', + source: '%1$d times a week', + category: 'other', + note: 'Number of blogging reminders per week.', + locale: 'de' + ) + + assert_equal '%1$d Beiträge pro Woche', out + assert_includes prompts[:user], 'Number of blogging reminders per week.' + assert_includes prompts[:user], 'other' # the CLDR-category cue reaches the prompt + end + + def test_translate_plural_returns_all_requested_forms + reply = '{"one":"%1$ld słowo","few":"%1$ld słowa","many":"%1$ld słów","other":"%1$ld słowa"}' + out = translator(reply: reply).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one few many other], locale: 'pl', note: 'Number of words.' + ) + assert_equal( + { 'one' => '%1$ld słowo', 'few' => '%1$ld słowa', 'many' => '%1$ld słów', 'other' => '%1$ld słowa' }, out + ) + end + + def test_translate_plural_drops_a_form_that_breaks_placeholders + # 'few' switched %1$ld -> %1$d (length change) — drop it; the rest survive. + reply = '{"one":"%1$ld słowo","few":"%1$d słowa","other":"%1$ld słowa"}' + out = translator(reply: reply).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one few other], locale: 'pl' + ) + assert_equal %w[one other], out.keys.sort + refute out.key?('few') + end + + def test_translate_plural_excludes_anchors_and_passes_them_as_context + prompts = {} + reply = '{"few":"%1$ld słowa","many":"%1$ld słów","other":"%1$ld słowa"}' + out = translator(reply: reply, prompts: prompts).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one few many other], locale: 'pl', anchors: { 'one' => '%1$ld słowo' } + ) + refute out.key?('one') # human-anchored — not produced + assert_equal %w[few many other], out.keys.sort + assert_includes prompts[:user], '%1$ld słowo' # anchor shown to the model as fixed context + end + + def test_translate_plural_falls_back_to_empty_on_bad_json + out = translator(reply: 'sorry — here are your forms!').translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one other], locale: 'pl' + ) + assert_empty out + end + + def test_translate_plural_tolerates_json_code_fences + reply = "```json\n{\"one\":\"%1$ld słowo\",\"other\":\"%1$ld słowa\"}\n```" + out = translator(reply: reply).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one other], locale: 'pl' + ) + assert_equal({ 'one' => '%1$ld słowo', 'other' => '%1$ld słowa' }, out) + end + + def test_translate_plural_validates_fallback_category_against_other + # 'many' has no English form of its own → validated against the English 'other' (%1$ld words). + out = translator(reply: '{"many":"%1$ld słów"}').translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[many], locale: 'pl' + ) + assert_equal({ 'many' => '%1$ld słów' }, out) + end + + def test_translate_all_maps_keys_and_validates + reply = '{"1":"Réglages","2":"%1$@ articles"}' + out = translator(reply: reply).translate_all( + [{ key: 'settings.title', source: 'Settings', comment: 'Screen title' }, + { key: 'posts.count', source: '%1$@ posts', comment: 'Count' }], + locale: 'fr' + ) + assert_equal({ 'settings.title' => 'Réglages', 'posts.count' => '%1$@ articles' }, out) + end + + def test_translate_all_drops_a_placeholder_breaker + reply = '{"1":"Réglages","2":"%1$d articles"}' # item 2 changed %1$@ -> %1$d + out = translator(reply: reply).translate_all( + [{ key: 'settings.title', source: 'Settings' }, { key: 'posts.count', source: '%1$@ posts' }], + locale: 'fr' + ) + assert_equal({ 'settings.title' => 'Réglages' }, out) + refute out.key?('posts.count') + end + + def test_translate_all_skips_blank_sources + out = translator(reply: '{"1":"Réglages"}').translate_all( + [{ key: 'settings.title', source: 'Settings' }, { key: 'blank', source: ' ' }], + locale: 'fr' + ) + assert_equal({ 'settings.title' => 'Réglages' }, out) + end + + def test_translate_all_chunks_and_merges + calls = 0 + complete = lambda do |**| + calls += 1 + '{"1":"x","2":"y"}' + end + out = AITranslator.new(complete: complete).translate_all( + [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }, { key: 'c', source: 'Three' }], + locale: 'fr', batch_size: 2 + ) + assert_equal 2, calls # 3 items / batch 2 = 2 requests + assert_equal({ 'a' => 'x', 'b' => 'y', 'c' => 'x' }, out) + end + + def test_translate_all_bad_json_batch_falls_back + out = translator(reply: 'not json at all').translate_all([{ key: 'a', source: 'One' }], locale: 'fr') + assert_empty out + end + + def test_translate_all_empty_input_makes_no_call + called = false + complete = lambda do |**| + called = true + '{}' + end + assert_empty AITranslator.new(complete: complete).translate_all([], locale: 'fr') + refute called + end + + def test_translate_all_prompt_carries_key_context_and_language + prompts = {} + translator(reply: '{"1":"Publier"}', prompts: prompts).translate_all( + [{ key: 'editor.publish', source: 'Publish', comment: 'Publish button' }], locale: 'fr' + ) + assert_includes prompts[:system], 'French' + assert_includes prompts[:user], 'editor.publish' + assert_includes prompts[:user], 'Publish button' + assert_includes prompts[:user], 'Publish' + end + + def test_translate_plural_passes_a_schema_of_its_categories + prompts = {} + translator(reply: '{"one":"%1$ld słowo","other":"%1$ld słowa"}', prompts: prompts).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, categories: %w[one other], locale: 'pl' + ) + assert_equal %w[one other], prompts[:schema]['required'].sort + assert_equal false, prompts[:schema]['additionalProperties'] + end + + def test_translate_all_passes_a_numbered_schema + prompts = {} + translator(reply: '{"1":"a","2":"b"}', prompts: prompts).translate_all( + [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }], locale: 'fr' + ) + assert_equal %w[1 2], prompts[:schema]['required'].sort + end + + def test_single_translate_passes_no_schema + prompts = {} + translator(reply: 'Publier', prompts: prompts).translate(source: 'Publish', locale: 'fr') + assert_nil prompts[:schema] + end + + def test_glossary_terms_and_register_reach_the_prompt + prompts = {} + glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article' } }, register: { 'fr' => 'Use formal vous.' }) + complete = lambda do |system:, user:, schema: nil| + prompts.replace({ system: system, user: user, schema: schema }) + 'Publier' + end + AITranslator.new(complete: complete, glossary: glossary).translate(source: 'Publish', locale: 'fr') + assert_includes prompts[:system], 'post -> article' + assert_includes prompts[:system], 'Register: Use formal vous.' + end + + def test_prepare_batch_chunks_each_locale_into_jobs + prep = translator(reply: '{}').prepare_batch( + { 'fr' => [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }, { key: 'c', source: 'Three' }], + 'de' => [{ key: 'a', source: 'One' }] }, + batch_size: 2 + ) + assert_equal(%w[fr_0 fr_1 de_0], prep[:jobs].map { |job| job[:custom_id] }) + assert_equal %w[1 2], prep[:jobs].first[:schema]['required'].sort + end + + def test_prepare_batch_manifest_maps_custom_id_to_locale_and_strings + prep = translator(reply: '{}').prepare_batch( + { 'fr' => [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }] }, batch_size: 25 + ) + assert_equal 'fr', prep[:manifest]['fr_0'][:locale] + assert_equal(%w[a b], prep[:manifest]['fr_0'][:numbered].values.map { |string| string[:key] }) + end + + def test_prepare_batch_custom_ids_match_the_api_pattern + # The Batch API requires custom_id =~ ^[a-zA-Z0-9_-]{1,64}$ — hyphenated locales like pt-BR must still pass. + prep = translator(reply: '{}').prepare_batch({ 'pt-BR' => [{ key: 'a', source: 'One' }] }, batch_size: 25) + prep[:jobs].each { |job| assert_match(/\A[a-zA-Z0-9_-]{1,64}\z/, job[:custom_id]) } + end + + def test_collect_batch_validates_and_groups_by_locale + t = translator(reply: '{}') + prep = t.prepare_batch( + { 'fr' => [{ key: 'settings', source: 'Settings' }, { key: 'count', source: '%1$@ items' }] }, batch_size: 25 + ) + texts = { 'fr_0' => '{"1":"Réglages","2":"%1$@ éléments"}' } + assert_equal({ 'fr' => { 'settings' => 'Réglages', 'count' => '%1$@ éléments' } }, + t.collect_batch(texts, prep[:manifest])) + end + + def test_collect_batch_drops_invalid_and_missing + t = translator(reply: '{}') + prep = t.prepare_batch( + { 'fr' => [{ key: 'settings', source: 'Settings' }, { key: 'count', source: '%1$@ items' }] }, batch_size: 25 + ) + texts = { 'fr_0' => '{"1":"Réglages","2":"%1$d éléments"}' } # item 2 breaks the placeholder + assert_equal({ 'fr' => { 'settings' => 'Réglages' } }, t.collect_batch(texts, prep[:manifest])) + end + + def test_collect_batch_handles_a_missing_batch_reply + t = translator(reply: '{}') + prep = t.prepare_batch({ 'fr' => [{ key: 'a', source: 'One' }] }, batch_size: 25) + assert_equal({ 'fr' => {} }, t.collect_batch({}, prep[:manifest])) + end +end diff --git a/fastlane/lanes/anthropic_batch.rb b/fastlane/lanes/anthropic_batch.rb new file mode 100644 index 000000000000..89ec02dd62e2 --- /dev/null +++ b/fastlane/lanes/anthropic_batch.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +require 'json' + +# SDK glue for the Anthropic Ruby client: the message create-params shape, response-text extraction, and the +# Message Batches submit/poll/collect cycle. Isolated here so `AITranslator` stays pure prompt-building + +# validation, and all knowledge of the SDK's request/response shape lives in ONE place — the synchronous path +# (`AITranslator.with_anthropic`) and the async batch path share `message_params` / `text_of`, so the request +# shape can't drift between them. +# +# The batch path is the cost/throughput lever for a full backfill: one async job covering many (locale, chunk) +# requests at ~50% the per-token price. Flow: `AITranslator#prepare_batch` → `submit` → poll `ready?` → +# `results` → `AITranslator#collect_batch`. +module AnthropicBatch + MAX_TOKENS = 8192 # generous so a batch's JSON object can't truncate (a truncated reply fails the JSON parse) + + module_function + + # `messages.create` params for one request; adds output_config (structured outputs) when a schema is given. + def message_params(model:, system:, user:, schema: nil) + params = { + model: model.to_sym, + max_tokens: MAX_TOKENS, + system_: [{ type: 'text', text: system, cache_control: { type: 'ephemeral' } }], + messages: [{ role: 'user', content: user }] + } + params[:output_config] = { format: { type: :json_schema, schema: schema } } unless schema.nil? + params + end + + # Concatenate the text blocks of a Message response. + def text_of(message) + message.content.select { |block| block.type == :text }.map(&:text).join("\n") + end + + # Submit jobs ({ custom_id:, system:, user:, schema: }) as one Message Batch; returns the batch id. + def submit(jobs, client:, model:) + requests = jobs.map do |job| + { custom_id: job[:custom_id], params: message_params(model: model, system: job[:system], user: job[:user], schema: job[:schema]) } + end + client.messages.batches.create(requests: requests).id + end + + # True once the batch has finished processing (results are available to stream). + def ready?(batch_id, client:) + client.messages.batches.retrieve(batch_id).processing_status.to_s == 'ended' + end + + # { custom_id => reply text } for the succeeded requests. `results_streaming` yields raw JSONL lines (one per + # request) — the SDK's lenient coercion passes the line through as a String — so each is parsed here. + # Errored/expired/canceled entries (and any unparseable line) are skipped, so the strings they covered fall + # back to human/English at collect time. + def results(batch_id, client:) + client.messages.batches.results_streaming(batch_id).each_with_object({}) do |line, out| + record = parse_line(line) + result = record['result'] || {} + out[record['custom_id']] = content_text(result.dig('message', 'content')) if result['type'] == 'succeeded' + end + end + + # Parse a JSONL result line into a Hash; {} on anything unparseable. Tolerates a Hash (already parsed). + def parse_line(line) + line.is_a?(String) ? JSON.parse(line) : line + rescue JSON::ParserError + {} + end + + # Join the text blocks of a parsed message-content array (Hash blocks, not the typed objects `text_of` takes). + def content_text(content) + Array(content).select { |block| block['type'] == 'text' }.map { |block| block['text'] }.join("\n") + end + + # Poll until the batch finishes, then return its results (same shape as `results`); returns nil if it hasn't + # finished within `timeout`. `interval`/`timeout` are seconds; `sleeper` is injected so tests run instantly. + # Yields elapsed seconds after each not-ready check (progress reporting). Timeout is approximate (summed + # intervals, not wall clock). + # + # This is the simple synchronous "submit and wait" mechanism. For a huge backfill that may run for a long + # time, prefer submitting, persisting the batch id, and collecting in a later step over blocking on this — + # `submit` returns the id immediately, and `ready?` / `results` let a separate step pick it up. + def await(batch_id, client:, interval: 30, timeout: 3600, sleeper: ->(seconds) { sleep(seconds) }) + waited = 0 + loop do + return results(batch_id, client: client) if ready?(batch_id, client: client) + return nil if waited >= timeout + + yield waited if block_given? + sleeper.call(interval) + waited += interval + end + end + + # A raw Anthropic client for the batch calls (needs the `anthropic` gem + ANTHROPIC_API_KEY). + def client(api_key: ENV.fetch('ANTHROPIC_API_KEY', nil)) + require 'anthropic' + Anthropic::Client.new(api_key: api_key) + end +end diff --git a/fastlane/lanes/anthropic_batch_test.rb b/fastlane/lanes/anthropic_batch_test.rb new file mode 100644 index 000000000000..978aa104203f --- /dev/null +++ b/fastlane/lanes/anthropic_batch_test.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for AnthropicBatch. Run: `ruby fastlane/lanes/anthropic_batch_test.rb`. +# Drives the submit / poll / results glue against a fake client that mimics the SDK's shape (no gem, no network). +require 'minitest/autorun' +require 'json' +require_relative 'anthropic_batch' + +# Exercises the submit / poll / results glue via a fake client that mimics the SDK shape. `create`/`retrieve` +# return typed-ish objects (a Batch struct); `results_streaming` yields raw JSONL strings, as the real SDK does. +class AnthropicBatchTest < Minitest::Test + Batch = Struct.new(:id, :processing_status) + + # Mimics client.messages.batches.{create,retrieve,results_streaming}. + class FakeBatches + attr_reader :created_requests + + def initialize(status:, entries:, ready_after: nil) + @status = status + @entries = entries + @ready_after = ready_after # report :ended only once `retrieve` has been called this many times + @retrieve_calls = 0 + end + + def create(requests:) + @created_requests = requests + Batch.new('batch_1', :in_progress) + end + + def retrieve(_id) + @retrieve_calls += 1 + Batch.new('batch_1', effective_status) + end + + def results_streaming(_id) + @entries + end + + private + + def effective_status + return @status if @ready_after.nil? + + @retrieve_calls >= @ready_after ? :ended : :in_progress + end + end + + def fake_client(status: :ended, entries: [], ready_after: nil) + batches = FakeBatches.new(status: status, entries: entries, ready_after: ready_after) + Struct.new(:messages).new(Struct.new(:batches).new(batches)) + end + + # Build a raw JSONL result line, the way results_streaming yields them. + def succeeded_line(custom_id, json) + JSON.generate('custom_id' => custom_id, + 'result' => { 'type' => 'succeeded', 'message' => { 'content' => [{ 'type' => 'text', 'text' => json }] } }) + end + + def errored_line(custom_id) + JSON.generate('custom_id' => custom_id, 'result' => { 'type' => 'errored' }) + end + + def test_message_params_adds_output_config_only_with_a_schema + bare = AnthropicBatch.message_params(model: 'claude-opus-4-8', system: 's', user: 'u') + refute bare.key?(:output_config) + assert_equal :'claude-opus-4-8', bare[:model] + + with_schema = AnthropicBatch.message_params(model: 'claude-opus-4-8', system: 's', user: 'u', schema: { 'type' => 'object' }) + assert_equal({ format: { type: :json_schema, schema: { 'type' => 'object' } } }, with_schema[:output_config]) + end + + def test_submit_builds_requests_and_returns_the_id + client = fake_client + jobs = [{ custom_id: 'fr_0', system: 'sys', user: 'usr', schema: { 'type' => 'object' } }] + id = AnthropicBatch.submit(jobs, client: client, model: 'claude-opus-4-8') + + assert_equal 'batch_1', id + request = client.messages.batches.created_requests.first + assert_equal 'fr_0', request[:custom_id] + assert_equal :'claude-opus-4-8', request[:params][:model] + end + + def test_ready_reflects_processing_status + refute AnthropicBatch.ready?('b', client: fake_client(status: :in_progress)) + assert AnthropicBatch.ready?('b', client: fake_client(status: :ended)) + end + + def test_results_returns_text_for_succeeded_requests_only + entries = [succeeded_line('fr_0', '{"1":"Bonjour"}'), errored_line('fr_1')] + out = AnthropicBatch.results('b', client: fake_client(entries: entries)) + assert_equal({ 'fr_0' => '{"1":"Bonjour"}' }, out) + end + + def test_await_polls_until_ready_then_returns_results + client = fake_client(ready_after: 3, entries: [succeeded_line('fr_0', '{"1":"Bonjour"}')]) + out = AnthropicBatch.await('b', client: client, interval: 1, sleeper: ->(_seconds) {}) + assert_equal({ 'fr_0' => '{"1":"Bonjour"}' }, out) + end + + def test_await_returns_nil_on_timeout + client = fake_client(status: :in_progress) + assert_nil AnthropicBatch.await('b', client: client, interval: 30, timeout: 60, sleeper: ->(_seconds) {}) + end +end diff --git a/fastlane/lanes/translation_glossary.rb b/fastlane/lanes/translation_glossary.rb new file mode 100644 index 000000000000..c7ed0d6a5c2e --- /dev/null +++ b/fastlane/lanes/translation_glossary.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +# Terminology configuration for the translator: brand/product names kept verbatim, plus per-locale glossary +# terms (the preferred translation for an English term) and a register/style note. A pure value object — +# SOURCING this data (the WordPress.org per-locale glossaries + style guides, a committed YAML, …) is +# pre-processing done elsewhere and handed in here, so this stays I/O-free and unit-testable. +class Glossary + # Brand / product proper nouns kept verbatim in every locale. Deliberately tight to unambiguous proper nouns + # — feature words locales legitimately translate ("Reader", "Stats") are intentionally NOT here. + DEFAULT_DO_NOT_TRANSLATE = [ + 'WordPress', 'WordPress.com', 'Jetpack', 'WooCommerce', 'Woo', + 'Akismet', 'Gravatar', 'Gutenberg', 'Tumblr', 'Simplenote', 'Crowdsignal' + ].freeze + + attr_reader :do_not_translate + + # @param do_not_translate [Array] brand/product names kept verbatim. + # @param terms [Hash{String=>Hash{String=>String}}] locale => { english term => preferred translation }. + # @param register [Hash{String=>String}] locale => style/register note (e.g. "Use the informal 'du' form."). + def initialize(do_not_translate: DEFAULT_DO_NOT_TRANSLATE, terms: {}, register: {}) + @do_not_translate = do_not_translate + @terms = terms + @register = register + end + + # The default brand-only glossary (no per-locale terms or register). + def self.default + new + end + + # Prompt fragment with this locale's preferred terms + register note (or '' if neither applies). Appended to + # the shared rules so the model uses the community's terminology and tone. + def guidance(locale) + [term_guidance(locale), register_note(locale)].reject(&:empty?).join("\n") + end + + private + + def term_guidance(locale) + pairs = @terms[locale] + return '' if pairs.nil? || pairs.empty? + + lines = pairs.map { |english, translation| " #{english} -> #{translation}" } + "Use these exact translations for these terms, consistently:\n#{lines.join("\n")}" + end + + def register_note(locale) + note = @register[locale].to_s.strip + note.empty? ? '' : "Register: #{note}" + end +end diff --git a/fastlane/lanes/translation_glossary_test.rb b/fastlane/lanes/translation_glossary_test.rb new file mode 100644 index 000000000000..4bc29817f3e2 --- /dev/null +++ b/fastlane/lanes/translation_glossary_test.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for Glossary. Run directly: `ruby fastlane/lanes/translation_glossary_test.rb`. +require 'minitest/autorun' +require_relative 'translation_glossary' + +# Covers the brand list, per-locale term guidance, register note, the combination, and empty cases. +class GlossaryTest < Minitest::Test + def test_default_is_brands_only + glossary = Glossary.default + assert_includes glossary.do_not_translate, 'WordPress' + assert_equal '', glossary.guidance('fr') + end + + def test_term_guidance_is_per_locale + glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article', 'tag' => 'étiquette' } }) + assert_includes glossary.guidance('fr'), 'post -> article' + assert_includes glossary.guidance('fr'), 'tag -> étiquette' + assert_equal '', glossary.guidance('de') # no terms for de + end + + def test_register_note + glossary = Glossary.new(register: { 'de' => "Use the informal 'du' form." }) + assert_includes glossary.guidance('de'), "Register: Use the informal 'du' form." + end + + def test_terms_and_register_combined + glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article' } }, register: { 'fr' => 'Use formal vous.' }) + guidance = glossary.guidance('fr') + assert_includes guidance, 'post -> article' + assert_includes guidance, 'Register: Use formal vous.' + end + + def test_custom_do_not_translate + assert_equal %w[Foo Bar], Glossary.new(do_not_translate: %w[Foo Bar]).do_not_translate + end +end diff --git a/fastlane/lanes/translation_validator.rb b/fastlane/lanes/translation_validator.rb new file mode 100644 index 000000000000..29b9e9c887fa --- /dev/null +++ b/fastlane/lanes/translation_validator.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +# Format-specifier safety gate for machine-translated strings. +# +# The one correctness invariant for a translated `.strings` / `.xcstrings` value: it must preserve the +# source's printf / NSString format ARGUMENTS exactly — same count, same types, and (for positional +# `%1$@` specifiers) the same index→type mapping. The surrounding prose is free to change; the argument +# contract is not. Break it and the app reads the wrong vararg off the stack — a crash or garbage at +# runtime, in a locale the author can't read and CI can't catch. +# +# This is deliberately plain Ruby with no dependencies, so it can gate EVERY machine translation before it +# is written and be unit-tested directly. It's the floor under the `human ?? AI ?? English` resolution in +# `PluralStrings.fold_cell`: an AI cell that fails this check is discarded (the caller falls through to the +# English source, flagged needs_review) rather than shipped. +module TranslationValidator + module_function + + # printf / NSString format specifier: optional positional `N$`, flags, width, precision, length modifier, + # conversion. The space flag (`% d`) is deliberately EXCLUDED — exactly as `CatalogHelper::FORMAT_SPECIFIER` + # excludes it — because `% ` matches inside ordinary prose ("100% done" → "% d"), which would make + # the validator hallucinate an argument in plain text and reject a perfectly good translation. + FORMAT_SPECIFIER = / + % # leading percent + (?:(?\d+)\$)? # optional positional index: 1$, 2$, … + [\#0\-+']* # flags (NOT space — see note above) + (?:\d+|\*)? # field width + (?:\.(?:\d+|\*))? # precision + (?hh|h|ll|l|L|q|z|t|j)? # length modifier + (?[@dDiuUxXoOfFeEgGaAcCsSpn%]) # conversion + /x + + # Conversion char → coarse argument type-class. We compare by class, not by exact letter, so cosmetic + # swaps that don't change the consumed argument (`%x`↔`%X`, `%d`↔`%i`) pass, while a real type change + # that WOULD crash (`%@`→`%d`: object vs integer) is caught. The length modifier is kept separately in the + # signature, because `%d`↔`%ld` is a genuine ABI difference (int vs long) that can crash on mismatch. + TYPE_CLASS = { + '@' => :object, + 'd' => :int, 'D' => :int, 'i' => :int, 'u' => :int, 'U' => :int, + 'x' => :int, 'X' => :int, 'o' => :int, 'O' => :int, + 'f' => :float, 'F' => :float, 'e' => :float, 'E' => :float, + 'g' => :float, 'G' => :float, 'a' => :float, 'A' => :float, + 's' => :cstring, 'S' => :cstring, 'c' => :char, 'C' => :char, 'p' => :pointer + }.freeze + private_constant :TYPE_CLASS + + # Two parallel views of a string's format arguments: + # positional — { index => "length:type-class" }; order-INDEPENDENT (reordering `%1$@`/`%2$@` to suit + # target grammar is the whole point of positional specifiers). + # sequential — [ "length:type-class", … ]; order-DEPENDENT (a non-positional specifier's argument is + # bound by appearance order, so `%@ %d` and `%d %@` are NOT interchangeable). + # `%%` (a literal percent) consumes no argument and is excluded from both. + Signature = Struct.new(:positional, :sequential) + private_constant :Signature + + # True when `candidate` preserves `source`'s format-argument contract. + def placeholders_match?(source, candidate) + mismatch_reason(source, candidate).nil? + end + + # nil when the contract is preserved; otherwise a short human-readable reason (for logging which AI cells + # were rejected and why). + def mismatch_reason(source, candidate) + src = signature(source) + cand = signature(candidate) + + if src.positional != cand.positional + "positional placeholders differ (source: #{describe_positional(src.positional)}; " \ + "translation: #{describe_positional(cand.positional)})" + elsif src.sequential != cand.sequential + "sequential placeholders differ (source: #{src.sequential.inspect}; translation: #{cand.sequential.inspect})" + end + end + + # Parsed argument signature of `str` (see the Signature struct above). + def signature(str) + positional = {} + sequential = [] + each_specifier(str.to_s) do |match| + next if match[:conv] == '%' # literal %% — not an argument + + token = "#{match[:length]}:#{TYPE_CLASS.fetch(match[:conv], match[:conv])}" + if match[:position] + positional[match[:position].to_i] = token + else + sequential << token + end + end + Signature.new(positional, sequential) + end + + # Yields each format-specifier MatchData in appearance order. Scans forward from the end of each match, so + # adjacent specifiers (`%d%@`) and specifiers embedded in text are all found. + def each_specifier(str) + pos = 0 + while (match = FORMAT_SPECIFIER.match(str, pos)) + yield match + pos = match.end(0) + end + end + private_class_method :each_specifier + + def describe_positional(positional) + return 'none' if positional.empty? + + positional.sort.map { |index, token| "%#{index}$(#{token})" }.join(', ') + end + private_class_method :describe_positional +end diff --git a/fastlane/lanes/translation_validator_test.rb b/fastlane/lanes/translation_validator_test.rb new file mode 100644 index 000000000000..8cbd91a2f38f --- /dev/null +++ b/fastlane/lanes/translation_validator_test.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for TranslationValidator. Run directly: `ruby fastlane/lanes/translation_validator_test.rb`. +require 'minitest/autorun' +require_relative 'translation_validator' + +# Exercises the format-specifier contract: positional reordering allowed, type/length/count changes rejected. +class TranslationValidatorTest < Minitest::Test + V = TranslationValidator + + def test_no_specifiers_anything_matches + assert V.placeholders_match?('Settings', 'Réglages') + assert V.placeholders_match?('', '') + end + + def test_positional_reordering_is_allowed + # Reordering %1$@ / %2$@ to suit target grammar is the whole point of positional specifiers. + assert V.placeholders_match?('%1$@ invited %2$@', '%2$@ wurde von %1$@ eingeladen') + end + + def test_positional_type_change_is_rejected + # %1$@ (object) → %1$d (int) would read the wrong vararg — a crash vector. + refute V.placeholders_match?('%1$@ posts', '%1$d posts') + end + + def test_sequential_order_must_be_preserved + refute V.placeholders_match?('%@: %d', '%d : %@') # flipped non-positional args + assert V.placeholders_match?('%@: %d', 'Total %@: %d') # same order, prose changed + end + + def test_count_mismatch_is_rejected + refute V.placeholders_match?('Hello %@', 'Bonjour') # dropped an argument + refute V.placeholders_match?('Hello %@', 'Bonjour %@ %@') # added an argument + end + + def test_literal_percent_is_ignored + assert V.placeholders_match?('100% done', '100% terminé') # no real specifier (space after %) + assert V.placeholders_match?('%d%% complete', '%d%% terminé') # %% literal, %d preserved + refute V.placeholders_match?('%d%% complete', '%% terminé') # dropped the %d argument + end + + def test_length_modifier_change_is_rejected + # %ld (long) → %d (int) is a genuine ABI difference that can crash on mismatch. + refute V.placeholders_match?('%1$ld words', '%1$d words') + assert V.placeholders_match?('%1$ld words', '%1$ld mots') + end + + def test_case_only_conversion_change_is_allowed + assert V.placeholders_match?('%x', '%X') # cosmetic; same integer type-class + end + + def test_mismatch_reason_is_descriptive + reason = V.mismatch_reason('%1$@ posts', '%1$d posts') + refute_nil reason + assert_includes reason, 'positional' + + assert_nil V.mismatch_reason('%1$@ invited %2$@', '%2$@ a invité %1$@') + end +end From 74128501416124f3961c99d3c6756afd54be5fcd Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Thu, 25 Jun 2026 22:55:21 -0600 Subject: [PATCH 2/4] Localization: run the AI translation tooling unit tests in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pure-Ruby unit suites (TranslationValidator, Glossary, AnthropicBatch, AITranslator) weren't executed by any pipeline step — the "Unit Tests" jobs are the Xcode/XCTest suites, and rubocop (via Danger) only lints them. Add a lightweight Buildkite step that runs each fastlane/lanes/*_test.rb with plain ruby (stdlib minitest — no Xcode, no app build, no bundle). Runs unconditionally rather than behind should-skip-job.sh --job-type validation, which skips on tooling-only changes — i.e. exactly the PRs that touch these files. --- .../commands/test-localization-tooling.sh | 22 +++++++++++++++++++ .buildkite/pipeline.yml | 7 ++++++ 2 files changed, 29 insertions(+) create mode 100755 .buildkite/commands/test-localization-tooling.sh diff --git a/.buildkite/commands/test-localization-tooling.sh b/.buildkite/commands/test-localization-tooling.sh new file mode 100755 index 000000000000..2e129cd95470 --- /dev/null +++ b/.buildkite/commands/test-localization-tooling.sh @@ -0,0 +1,22 @@ +#!/bin/bash -eu + +# Runs the localization tooling's pure-Ruby unit suites (stdlib minitest — no Xcode, no app build, no bundle). +# Intentionally always runs (no should-skip-job guard): these guard the fastlane localization helpers, and the +# `validation` skip rule skips on tooling-only changes — exactly when these tests matter most. + +echo "--- :test_tube: Localization tooling unit tests" + +shopt -s nullglob +tests=(fastlane/lanes/*_test.rb) +if [[ ${#tests[@]} -eq 0 ]]; then + echo "No *_test.rb files found under fastlane/lanes/." + exit 0 +fi + +status=0 +for test in "${tests[@]}"; do + echo "+++ :ruby: ${test}" + ruby "${test}" || status=1 +done + +exit "${status}" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6cb0910aa3fe..9ef4cd1b7e58 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -163,6 +163,13 @@ steps: - github_commit_status: context: "Verify String Catalog Coverage" + - label: ":test_tube: Localization Tooling Unit Tests" + command: .buildkite/commands/test-localization-tooling.sh + plugins: [$CI_TOOLKIT_PLUGIN] + notify: + - github_commit_status: + context: "Localization Tooling Unit Tests" + ################# # Claude Build Analysis - dynamically uploaded so Build result conditions evaluate at runtime after the wait ################# From bbd70ac795bf10d0898d0188fbdabceb535aca3b Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Fri, 26 Jun 2026 14:16:36 -0600 Subject: [PATCH 3/4] Wire AI plural translation into the reverse fold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the ai_translate_plural -> nil stub with the AI tier. download_localized_plurals now builds AITranslator.with_anthropic once (gated on ANTHROPIC_API_KEY; absent => the prior English-fallback behavior, unchanged) and PluralStrings.fold_translations! invokes it once per (key, locale) with the whole form-set via translate_plural — keeping one consistent stem across forms — passing the human-translated forms as anchors. The fold's ai_translator contract changes from per-cell to per-form-set; a per-set API failure degrades to English (needs_review) instead of aborting the fold. Adds plural_strings_helper_test.rb covering provenance and the form-set/anchors contract; the nokogiri require is made lazy so that pure suite needs no gems (matches the no-bundle CI job). --- fastlane/lanes/localization_plurals.rb | 38 +++-- fastlane/lanes/plural_strings_helper.rb | 60 +++++-- fastlane/lanes/plural_strings_helper_test.rb | 162 +++++++++++++++++++ 3 files changed, 236 insertions(+), 24 deletions(-) create mode 100644 fastlane/lanes/plural_strings_helper_test.rb diff --git a/fastlane/lanes/localization_plurals.rb b/fastlane/lanes/localization_plurals.rb index 9cc9692fab89..7a8e1d0288a4 100644 --- a/fastlane/lanes/localization_plurals.rb +++ b/fastlane/lanes/localization_plurals.rb @@ -81,7 +81,7 @@ catalog, categories_by_locale: categories_by_locale, translations_by_locale: plural_translations_by_locale(File.join(PROJECT_ROOT_FOLDER, 'WordPress', 'Resources')), - ai_translator: method(:ai_translate_plural) + ai_translator: plural_ai_translator ) File.write(PLURALS_CATALOG, "#{JSON.pretty_generate(catalog)}\n") UI.message("Folded plural translations from Localizable.strings into #{File.basename(PLURALS_CATALOG)} (#{written} locale variations).") @@ -138,13 +138,33 @@ def plural_translations_by_locale(dir) end end - # Machine-translation floor for the reverse fold: invoked for every plural slot with no human translation. - # Returns nil until wired to a translation service, leaving such slots to fall back to the English source - # (flagged needs_review). The named `category` + dev `note` let the prompt request the correct grammatical - # form (e.g. "give me the Polish *few* form of …"). - # rubocop:disable Lint/UnusedMethodArgument -- keyword names are the documented call contract - def ai_translate_plural(id:, source:, category:, note:, locale:) - nil # TODO: call the translation service. + # The machine-translation tier for the reverse fold (the AI rung of the `human ?? AI ?? English` floor), or + # nil when ANTHROPIC_API_KEY isn't configured — in which case untranslated plural cells keep falling back to + # the English source (flagged needs_review), exactly as before this was wired. Built once and reused for + # every (key, locale) form-set. + # + # The returned callable matches PluralStrings.fold_translations!'s form-set contract and is wrapped to + # DEGRADE, not crash: a per-set API failure logs and returns {} so that one set falls back to English while + # the rest of the fold proceeds and commits (the whole reverse step is also guarded by `run_plural_step`). + # Going through `AITranslator#translate_plural` — the whole form-set in one request — keeps one consistent + # word/stem across the forms (a per-cell call lets the model drift between synonyms, e.g. Polish słowo → + # wyrazy → słów). + def plural_ai_translator + if ENV['ANTHROPIC_API_KEY'].to_s.empty? + UI.important('ANTHROPIC_API_KEY not set — skipping AI plural translation; untranslated plurals fall back to English (needs_review).') + return nil + end + + require_relative 'ai_translator' + translator = AITranslator.with_anthropic + lambda do |english_forms:, categories:, locale:, note:, anchors:| + translator.translate_plural(english_forms: english_forms, categories: categories, locale: locale, note: note, anchors: anchors) + rescue StandardError => e + UI.error("AI plural translation failed for #{locale} (#{e.message}); falling back to English for this form-set.") + {} + end + rescue LoadError => e + UI.important("AI translation tier unavailable (#{e.message}); untranslated plurals fall back to English.") + nil end - # rubocop:enable Lint/UnusedMethodArgument end diff --git a/fastlane/lanes/plural_strings_helper.rb b/fastlane/lanes/plural_strings_helper.rb index 01d9ee8ee73f..2c17b1029646 100644 --- a/fastlane/lanes/plural_strings_helper.rb +++ b/fastlane/lanes/plural_strings_helper.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require 'json' -require 'nokogiri' # Logic for the String Catalog ⇄ GlotPress plural pipeline. Plain Ruby with no fastlane dependencies, so it's # unit-testable directly — the lanes in `localization_plurals.rb` call into it. @@ -11,7 +10,7 @@ # `|==|plural.` — the same id Apple's `xcodebuild -exportLocalizations` # uses. Translations fold back into the catalog JSON using a per-locale CLDR category map that the reverse # derives from Apple's exporter at fold time (a throwaway one-plural project — categories are a locale property). -module PluralStrings +module PluralStrings # rubocop:disable Metrics/ModuleLength -- one cohesive pipeline of small, single-purpose, individually-documented helpers XLIFF_NS = { 'x' => 'urn:oasis:names:tc:xliff:document:1.2' }.freeze INFIX = '|==|plural.' CLDR_ORDER = %w[zero one two few many other].freeze @@ -88,6 +87,7 @@ def serialize_legacy_strings(entries) # Apple owns the truth; the reverse derives this at fold time from a throwaway-fixture export. # @return [Hash{String=>Array}] locale => categories (CLDR order). def categories_by_locale_from_skeletons(xliff_paths) + require 'nokogiri' # only the exporter-skeleton path needs it; kept lazy so the pure fold has no gem dependency xliff_paths.each_with_object({}) do |path, acc| cats = Nokogiri::XML(File.read(path)).xpath('//x:trans-unit', XLIFF_NS).filter_map do |tu| id = tu['id'].to_s @@ -101,8 +101,16 @@ def categories_by_locale_from_skeletons(xliff_paths) # variations — the inverse of `flat_originals`. For each plural key and target locale, emit exactly the # categories that locale needs (per `categories_by_locale`), filling each with `human ?? AI ?? English`. # Human cells are `translated`; AI / English-fallback cells are `needs_review` (machine output to re-check). - # `ai_translator` is optional and may return nil (the floor falls through to English). Mutates `catalog`; - # returns the count of (key, locale) variations written. + # Mutates `catalog`; returns the count of (key, locale) variations written. + # + # `ai_translator` (optional) is invoked ONCE per (key, locale) with the whole form-set — not per cell — so + # the model keeps one consistent stem across the forms; a per-category call lets it drift between synonyms + # (e.g. Polish słowo -> wyrazy -> słów). It is called as: + # ai_translator.call(english_forms:, categories:, locale:, note:, anchors:) => { => translation } + # where `anchors` are the forms a human already translated (passed as fixed context to stay consistent with, + # and excluded from what's asked for). It may return nil / {} or omit any category — those cells fall through + # to English. `AITranslator#translate_plural` implements this contract directly, so wiring the live tier is + # `ai_translator: translator.method(:translate_plural)`. # # @param categories_by_locale [Hash{String=>Array}] locale => CLDR categories it needs # @param translations_by_locale [Hash{String=>Hash{String=>String}}] locale => { "|==|plural." => value } @@ -134,23 +142,45 @@ def cldr_sort(categories) end private_class_method :cldr_sort - # One locale's plural variation hash: { 'variations' => { 'plural' => { => stringUnit } } }. + # One locale's plural variation hash: { 'variations' => { 'plural' => { => stringUnit } } }. Resolve the + # English and human forms first, ask the AI tier (once, whole form-set) for whatever's still missing, then + # write each cell as human ?? AI ?? English. def plural_variation(entry, cats, human, ai_translator, locale) - forms = cats.to_h { |cat| [cat, fold_cell(entry, cat, human, ai_translator, locale)] } + english_forms = english_forms_for(entry.plural, cats) + human_forms = human_forms_for(entry.key, cats, human) + ai_forms = ai_translator.nil? ? {} : ai_translator.call(english_forms: english_forms, categories: cats, locale: locale, note: entry.comment, anchors: human_forms) || {} + + forms = cats.to_h { |cat| [cat, fold_cell(cat, human_forms, ai_forms, english_forms)] } { 'variations' => { 'plural' => forms } } end private_class_method :plural_variation - # One target stringUnit for (entry, cat, locale): human ?? AI ?? English source; state reflects provenance - # (human => translated; AI / English fallback => needs_review). - def fold_cell(entry, cat, human, ai_translator, locale) - id = "#{entry.key}#{INFIX}#{cat}" - human_value = human[id] - return cell('translated', human_value) unless human_value.to_s.empty? + # English value per needed category — the form's own English, or the `other` value for categories English + # doesn't itself distinguish (zero/two/few/many). CLDR guarantees `other`, so it's always present. + def english_forms_for(plural, cats) + other = plural.dig('other', 'stringUnit', 'value') + cats.to_h { |cat| [cat, plural.dig(cat, 'stringUnit', 'value') || other] } + end + private_class_method :english_forms_for + + # Human (GlotPress) translations present for this key, keyed by CLDR category. These ship as `translated` and + # double as the AI request's anchors so the machine-filled forms stay consistent with the human's word choice. + def human_forms_for(key, cats, human) + cats.each_with_object({}) do |cat, acc| + value = human["#{key}#{INFIX}#{cat}"] + acc[cat] = value unless value.to_s.empty? + end + end + private_class_method :human_forms_for + + # One target stringUnit for a category: human ?? AI ?? English; state reflects provenance (human => + # translated; AI / English fallback => needs_review, i.e. machine output to re-check). + def fold_cell(cat, human_forms, ai_forms, english_forms) + human = human_forms[cat] + return cell('translated', human) unless human.to_s.empty? - english = entry.plural.dig(cat, 'stringUnit', 'value') || entry.plural.dig('other', 'stringUnit', 'value') - ai = ai_translator&.call(id: id, source: english, category: cat, note: entry.comment, locale: locale) - cell('needs_review', ai.to_s.empty? ? english : ai) + ai = ai_forms[cat] + cell('needs_review', ai.to_s.empty? ? english_forms[cat] : ai) end private_class_method :fold_cell diff --git a/fastlane/lanes/plural_strings_helper_test.rb b/fastlane/lanes/plural_strings_helper_test.rb new file mode 100644 index 000000000000..25911eec026a --- /dev/null +++ b/fastlane/lanes/plural_strings_helper_test.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for PluralStrings.fold_translations! — the reverse fold that folds downloaded plural +# translations back into the String Catalog with the `human ?? AI ?? English` floor. Run directly: +# `ruby fastlane/lanes/plural_strings_helper_test.rb`. No bundle / network (the AI tier is a stub lambda). +require 'minitest/autorun' +require_relative 'plural_strings_helper' + +# Exercises provenance (human => translated; AI / English fallback => needs_review) and the form-set contract: +# the AI tier is called ONCE per (key, locale) with the whole set of needed categories and the human forms as +# anchors — never per cell. +class PluralStringsFoldTest < Minitest::Test # rubocop:disable Metrics/ClassLength -- exhaustive scenario coverage + KEY = 'posts.count' + INFIX = PluralStrings::INFIX + + def unit(state, value) + { 'stringUnit' => { 'state' => state, 'value' => value } } + end + + # A catalog with one English plural (one/other). `extra` adds sibling entries (e.g. a non-plural string). + def catalog(extra: {}) + { + 'sourceLanguage' => 'en', + 'version' => '1.0', + 'strings' => { + KEY => { + 'comment' => 'Number of posts.', + 'localizations' => { 'en' => { 'variations' => { 'plural' => { + 'one' => unit('translated', '%lld post'), + 'other' => unit('translated', '%lld posts') + } } } } + } + }.merge(extra) + } + end + + # The full stringUnit wrapper a fold wrote for (locale, category) of the plural key under test. + def cell(cat, catalog:, locale:) + catalog.dig('strings', KEY, 'localizations', locale, 'variations', 'plural', cat) + end + + # An AI stub returning `reply`, recording every call's kwargs so the form-set contract can be asserted. + def recording_translator(reply:, calls:) + lambda do |english_forms:, categories:, locale:, note:, anchors:| + calls << { english_forms: english_forms, categories: categories, locale: locale, note: note, anchors: anchors } + reply + end + end + + def fold(cat, categories_by_locale:, translations_by_locale: {}, ai_translator: nil) + PluralStrings.fold_translations!(cat, categories_by_locale: categories_by_locale, translations_by_locale: translations_by_locale, ai_translator: ai_translator) + end + + # Polish needs four categories but only `one` is human-translated — the setup the form-set contract is about. + # Folds with the supplied AI reply and returns [catalog, recorded_calls]. + def fold_polish(reply:) + cat = catalog + calls = [] + fold(cat, + categories_by_locale: { 'pl' => %w[one few many other] }, + translations_by_locale: { 'pl' => { "#{KEY}#{INFIX}one" => '%lld wpis' } }, + ai_translator: recording_translator(reply: reply, calls: calls)) + [cat, calls] + end + + POLISH_AI = { 'few' => '%lld wpisy', 'many' => '%lld wpisów', 'other' => '%lld wpisu' }.freeze + + def test_human_translation_wins_and_is_marked_translated + cat = catalog + written = fold(cat, categories_by_locale: { 'fr' => %w[one other] }, translations_by_locale: { + 'fr' => { "#{KEY}#{INFIX}one" => '%lld article', "#{KEY}#{INFIX}other" => '%lld articles' } + }) + + assert_equal 1, written + assert_equal unit('translated', '%lld article'), cell('one', catalog: cat, locale: 'fr') + assert_equal unit('translated', '%lld articles'), cell('other', catalog: cat, locale: 'fr') + end + + def test_english_fallback_when_no_human_and_no_ai + cat = catalog + fold(cat, categories_by_locale: { 'fr' => %w[one other] }) + + # No human, no AI tier wired: each cell falls through to the English source, flagged for review. + assert_equal unit('needs_review', '%lld post'), cell('one', catalog: cat, locale: 'fr') + assert_equal unit('needs_review', '%lld posts'), cell('other', catalog: cat, locale: 'fr') + end + + def test_ai_fills_missing_cells_and_marks_needs_review + cat = catalog + ai = recording_translator(reply: { 'one' => '%lld article', 'other' => '%lld articles' }, calls: []) + fold(cat, categories_by_locale: { 'fr' => %w[one other] }, ai_translator: ai) + + assert_equal unit('needs_review', '%lld article'), cell('one', catalog: cat, locale: 'fr') + assert_equal unit('needs_review', '%lld articles'), cell('other', catalog: cat, locale: 'fr') + end + + def test_formset_call_carries_english_forms_anchors_and_note + _cat, calls = fold_polish(reply: POLISH_AI) + + assert_equal 1, calls.size, 'expected a single form-set call, not one per category' + call = calls.first + assert_equal %w[one few many other], call[:categories] + assert_equal 'pl', call[:locale] + assert_equal 'Number of posts.', call[:note] + assert_equal({ 'one' => '%lld wpis' }, call[:anchors]) + # few/many/other have no English form of their own, so they fall back to the English `other` value. + assert_equal({ 'one' => '%lld post', 'few' => '%lld posts', 'many' => '%lld posts', 'other' => '%lld posts' }, call[:english_forms]) + end + + def test_formset_result_merges_human_and_ai_by_provenance + cat, = fold_polish(reply: POLISH_AI) + + assert_equal unit('translated', '%lld wpis'), cell('one', catalog: cat, locale: 'pl') # human + assert_equal unit('needs_review', '%lld wpisy'), cell('few', catalog: cat, locale: 'pl') # AI + assert_equal unit('needs_review', '%lld wpisów'), cell('many', catalog: cat, locale: 'pl') + assert_equal unit('needs_review', '%lld wpisu'), cell('other', catalog: cat, locale: 'pl') + end + + def test_ai_omitted_category_falls_back_to_english + cat = catalog + ai = recording_translator(reply: { 'one' => '%lld Beitrag' }, calls: []) # 'other' omitted + fold(cat, categories_by_locale: { 'de' => %w[one other] }, ai_translator: ai) + + assert_equal unit('needs_review', '%lld Beitrag'), cell('one', catalog: cat, locale: 'de') + assert_equal unit('needs_review', '%lld posts'), cell('other', catalog: cat, locale: 'de') # English fallback + end + + def test_ai_nil_return_falls_back_to_english + cat = catalog + fold(cat, categories_by_locale: { 'de' => %w[one other] }, ai_translator: ->(**) {}) # declines entirely (nil) + + assert_equal unit('needs_review', '%lld post'), cell('one', catalog: cat, locale: 'de') + assert_equal unit('needs_review', '%lld posts'), cell('other', catalog: cat, locale: 'de') + end + + def test_source_locale_is_not_folded + cat = catalog + original_en = cat.dig('strings', KEY, 'localizations', 'en') + written = fold(cat, categories_by_locale: { 'en' => %w[one other], 'fr' => %w[one other] }) + + assert_equal 1, written, 'the source locale must be excluded from the fold' + assert_same original_en, cat.dig('strings', KEY, 'localizations', 'en'), 'source localization left untouched' + refute_nil cell('one', catalog: cat, locale: 'fr') + end + + def test_non_plural_entries_are_skipped + extra = { 'app.title' => { 'localizations' => { 'en' => unit('translated', 'WordPress') } } } + cat = catalog(extra: extra) + written = fold(cat, categories_by_locale: { 'fr' => %w[one other] }) + + assert_equal 1, written, 'only the plural entry is counted' + # The non-plural entry is left exactly as it was — no `fr` localization invented for it. + assert_equal({ 'en' => unit('translated', 'WordPress') }, cat.dig('strings', 'app.title', 'localizations')) + end + + def test_counts_variations_across_locales + cat = catalog + written = fold(cat, categories_by_locale: { 'fr' => %w[one other], 'de' => %w[one other] }) + + assert_equal 2, written + end +end From 068fe82000dc0fd5fbe6f4a9d442a8a4bc6202d0 Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Fri, 26 Jun 2026 15:10:02 -0600 Subject: [PATCH 4/4] Document the translation pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds docs/localization-pipeline.md: the GlotPress + AI round trip, the human ?? AI ?? English floor, the AI tier (gating, placeholder gate, form-set plurals), and why regular-string MT is deferred to the String Catalog cutover — machine translations only ship from a state-bearing store (the catalog's needs_review), never from the live legacy .strings. Linked from AGENTS.md. --- AGENTS.md | 2 +- docs/localization-pipeline.md | 81 +++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 docs/localization-pipeline.md diff --git a/AGENTS.md b/AGENTS.md index 698aeb6be1cb..688642d6f44e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -46,7 +46,7 @@ WordPress-iOS uses a modular architecture with the main app and separate Swift p ### Important Considerations - **Multi-site Support**: Code must handle both WordPress.com and self-hosted sites - **Accessibility**: Use proper accessibility labels and traits -- **Localization**: follow best practices from @docs/localization.md +- **Localization**: follow best practices from @docs/localization.md. For how strings flow through GlotPress and the AI translation tier (the `human ?? AI ?? English` floor), see @docs/localization-pipeline.md. ## Xcode Schemes - `WordPress` builds the WordPress iOS app and runs `WordPressUnitTests.xctestplan` — default for builds and the full unit test suite. Use this scheme to run unit tests. diff --git a/docs/localization-pipeline.md b/docs/localization-pipeline.md new file mode 100644 index 000000000000..691f0c9e2204 --- /dev/null +++ b/docs/localization-pipeline.md @@ -0,0 +1,81 @@ +# Localization translation pipeline + +How user-facing strings get from English source into every shipped locale. This is the **release/tooling** view (the fastlane lanes under `fastlane/lanes/`); for how to *write* localizable strings in app code, see [localization.md](./localization.md). + +> The contract for every shipped string is **`human ?? AI ?? English`**: a human (GlotPress) translation if one exists, otherwise a machine translation, otherwise the English source. Nothing ships a broken placeholder — machine output that fails the format-specifier gate falls back to English. + +## The round trip + +Strings make two trips, both driven from fastlane. + +### Forward (code freeze) — English → GlotPress + +Run as part of `complete_code_freeze` (`generate_strings_file_for_glotpress`): + +- **Regular strings** are extracted from source (`ios_generate_strings_file_from_code`, i.e. `genstrings` over `NSLocalizedString` / `AppLocalizedString`) into `WordPress/Resources/en.lproj/Localizable.strings`, then the manually-maintained `.strings` files are merged in. These English originals are uploaded to the [apps/ios GlotPress project](https://translate.wordpress.org/projects/apps/ios/dev/). +- **Plurals** are authored in `WordPress/Classes/Plurals.xcstrings` (English `one`/`other`). The forward lane (`generate_plural_strings_for_glotpress`) flattens each plural form into an independent string keyed `|==|plural.` and merges those originals into the same `Localizable.strings`, so they ride the same GlotPress project as everything else. + +Translators then do their work in GlotPress. + +### Reverse (release prep) — GlotPress → app + +`download_localized_strings` (called by `complete_code_freeze` / `finalize_release`) runs, in order: + +1. **Download** each locale's `Localizable.strings` from GlotPress (`ios_download_strings_files_from_glotpress`) into `WordPress/Resources/.lproj/`, and commit. The export filter is `status: current`, so **only translated strings come back** — untranslated ones are *omitted entirely* (not emitted as empty values; the action even errors if it finds an empty value). This is why `pl` ships ~1,650 of ~4,280 keys while `fr` ships ~all of them. +2. **Re-dispatch** the relevant subset back to the manually-maintained `.strings` files (`ios_extract_keys_from_strings_files`), and commit. +3. **Plural fold** (`download_localized_plurals`): pull the flat plural translations back out of the downloaded `Localizable.strings`, fold them into `Plurals.xcstrings`, and fill the gaps with the AI tier (below). + +Step 3 runs via `run_plural_step`, which logs and continues on failure — the AI tier can never break a release. + +## The AI tier + +The machine-translation rung of the floor. It is **injected and gated**, never mandatory: + +- **Gate**: `ANTHROPIC_API_KEY`. Absent ⇒ the AI tier is skipped entirely and untranslated cells keep their English fallback — i.e. exactly the pre-AI behavior. Providing the key (e.g. in the release environment) is what turns it on. +- **Placeholder gate**: every machine cell must preserve the source's `printf`/`NSString` format specifiers exactly (count + type; positional `%1$@` may reorder). A mismatch is rejected and the cell falls back to English. So the AI tier can only ever produce a *safe* translation or nothing. +- **Model**: `claude-opus-4-8` by default (see `AITranslator::DEFAULT_MODEL`). + +The reusable primitives live in `fastlane/lanes/`: `AITranslator` (prompt building + validation; `translate` / `translate_plural` / `translate_all` / the async Message-Batches path), `TranslationValidator` (the placeholder gate), `Glossary` (brand do-not-translate list + per-locale terms), and `AnthropicBatch` (SDK glue). All the logic is pure and unit-tested with a canned-reply lambda; only `AITranslator.with_anthropic` touches the network. + +## What's wired today: plurals + +The plural reverse-fold (`PluralStrings.fold_translations!`) fills each `(key, locale)` cell of `Plurals.xcstrings` as `human ?? AI ?? English` — human ⇒ `translated`; AI / English ⇒ `needs_review`. The AI tier is called **once per `(key, locale)` form-set** (`AITranslator#translate_plural`), not per cell, with the already-human-translated forms passed as **anchors**. Translating the whole set in one request keeps a single consistent stem across the forms — a per-category call lets the model drift between synonyms (Polish `słowo` → `wyrazy` → `słów`), which it structurally can't prevent. + +**`Plurals.xcstrings` is a String Catalog, which is why this works**: the catalog carries a real `needs_review` state, so a machine cell is recorded as machine output and a human translation supersedes it on the next download. + +> **This does not ship machine translations yet.** `Plurals.xcstrings` is built into the app but **not consumed at runtime** — no code reads from it; the app still renders plurals the legacy way. The fold *pre-populates* the catalog so it's ready when plurals cut over to it. Until that cutover, the AI plural translations sit in the catalog unused. + +## What's deferred: regular strings + +Regular (non-plural) strings are **not** machine-translated, by design. The app still ships the legacy `WordPress/Resources/.lproj/Localizable.strings` for them — `Localizable.xcstrings` (`generate_strings_catalog`) is generated as the future backing store but isn't the runtime store yet. A machine translation written into the legacy `.strings` would be **live immediately**, and we don't want machine-translated regular strings shipping before the catalog cutover. + +So regular-string MT waits for the same shape as plurals: once `Localizable.xcstrings` becomes the runtime store, a regular-string **catalog reverse-fold** folds the human translations in and AI-fills the `needs_review` gaps, staged in the catalog (not shipped) until cutover — exactly as the plural fold does today. + +When that's built, two facts established here will carry over: + +- **"Undefined by GlotPress" = absent**, not empty. The export omits untranslated strings (`status: current`; verified no empty-valued entries), so absence is the untranslated signal. +- **Humans always supersede MT**, and machine output never returns to GlotPress — so there's no translation-memory pollution and no manual reconciliation, as long as MT lives in a state-bearing store (the catalog's `needs_review`). + +## Why these choices + +- **Why translate whole plural form-sets at once?** Per-category calls let the model pick different synonyms for different forms of the same word. One request for the whole set, with human forms as anchors, keeps one stem. +- **Why is the AI tier gated and non-fatal?** Cost and safety: it runs only where a key is configured, and a failure logs and continues rather than breaking a release. +- **Why does regular-string MT need the catalog, not legacy `.strings`?** The catalog's `needs_review` state lets a machine translation be *staged* (built but not shipped until cutover) and lets humans supersede it automatically. Legacy `.strings` has no state and is live, so anything written there ships immediately — which is exactly what we don't want before cutover. + +## Operational notes + +- **Eyeball one string against the live model** (needs `ANTHROPIC_API_KEY` + `bundle install`): + `ruby fastlane/lanes/ai_translator.rb fr "You have %1$d new posts" "Notification text. %1$d is the count."` +- **Tests** are pure stdlib minitest and run in CI (`.buildkite/commands/test-localization-tooling.sh`): `ruby fastlane/lanes/*_test.rb`. + +## Code map + +| Concern | File | +| --- | --- | +| Translation tier (prompts, validation, `translate*`) | `fastlane/lanes/ai_translator.rb` | +| Placeholder safety gate | `fastlane/lanes/translation_validator.rb` | +| Brand do-not-translate + per-locale terms | `fastlane/lanes/translation_glossary.rb` | +| Anthropic SDK glue + Message Batches | `fastlane/lanes/anthropic_batch.rb` | +| Plural fold (`Localizable.strings` ⇄ `Plurals.xcstrings`) + AI wiring | `fastlane/lanes/plural_strings_helper.rb`, `fastlane/lanes/localization_plurals.rb` | +| Catalog generation (future regular-string backing store) | `fastlane/lanes/localization_catalog.rb` | +| Download/upload orchestration | `fastlane/lanes/localization.rb` |