From 3c67f058dedd5a84049c394f929af1681da6cf22 Mon Sep 17 00:00:00 2001
From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com>
Date: Thu, 25 Jun 2026 22:47:53 -0600
Subject: [PATCH 1/4] Localization: AI translation primitives
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reusable, unit-tested Ruby primitives for the AI translation tier of the
localization pipeline — the service behind the `human ?? AI ?? English` floor
whose AI stub was left open in #25688. Pure prompt-building and validation with
the Anthropic SDK call injected, so the logic is testable without the gem or the
network. Not wired into any lane yet.

- TranslationValidator: format-specifier safety gate — a translation must
  preserve the source's placeholders (count and type; positional reordering
  allowed), or it is rejected and falls back to English.
- Glossary: brand do-not-translate list plus per-locale terms and register.
- AITranslator: single-string, per-key plural form-set (one consistent stem
  across CLDR forms), and batched string translation, with structured-output
  (output_config) enforcement.
- AnthropicBatch: Message Batches submit/await/results/collect for bulk backfill.

50 unit tests, rubocop clean.
---
 Gemfile                                      |   2 +
 Gemfile.lock                                 |   8 +
 fastlane/lanes/ai_translator.rb              | 390 +++++++++++++++++++
 fastlane/lanes/ai_translator_test.rb         | 289 ++++++++++++++
 fastlane/lanes/anthropic_batch.rb            |  98 +++++
 fastlane/lanes/anthropic_batch_test.rb       | 104 +++++
 fastlane/lanes/translation_glossary.rb       |  51 +++
 fastlane/lanes/translation_glossary_test.rb  |  37 ++
 fastlane/lanes/translation_validator.rb      | 108 +++++
 fastlane/lanes/translation_validator_test.rb |  59 +++
 10 files changed, 1146 insertions(+)
 create mode 100644 fastlane/lanes/ai_translator.rb
 create mode 100644 fastlane/lanes/ai_translator_test.rb
 create mode 100644 fastlane/lanes/anthropic_batch.rb
 create mode 100644 fastlane/lanes/anthropic_batch_test.rb
 create mode 100644 fastlane/lanes/translation_glossary.rb
 create mode 100644 fastlane/lanes/translation_glossary_test.rb
 create mode 100644 fastlane/lanes/translation_validator.rb
 create mode 100644 fastlane/lanes/translation_validator_test.rb

diff --git a/Gemfile b/Gemfile
index fa6bc472ba4c..9e74f95cc238 100644
--- a/Gemfile
+++ b/Gemfile
@@ -2,6 +2,8 @@
 
 source 'https://rubygems.org'
 
+# Official Anthropic SDK — backs the AI translation tier of the localization pipeline (fastlane/lanes/ai_translator.rb).
+gem 'anthropic', '~> 1.50'
 gem 'danger-dangermattic', '~> 1.3'
 gem 'dotenv'
 # 2.223.1 includes a fix for an ASC-interfacing issue
diff --git a/Gemfile.lock b/Gemfile.lock
index 8325e2b30df2..0b69002c4dd6 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -5,6 +5,10 @@ GEM
     abbrev (0.1.2)
     addressable (2.9.0)
       public_suffix (>= 2.0.2, < 8.0)
+    anthropic (1.50.0)
+      cgi
+      connection_pool
+      standardwebhooks
     artifactory (3.0.17)
     ast (2.4.3)
     atomos (0.1.3)
@@ -33,6 +37,7 @@ GEM
     bigdecimal (4.1.2)
     buildkit (1.6.1)
       sawyer (>= 0.6)
+    cgi (0.5.2)
     chroma (0.2.0)
     claide (1.1.0)
     claide-plugins (0.9.2)
@@ -43,6 +48,7 @@ GEM
     colored2 (3.1.2)
     commander (4.6.0)
       highline (~> 2.0.0)
+    connection_pool (3.0.2)
     cork (0.3.0)
       colored2 (~> 3.1)
     csv (3.3.5)
@@ -348,6 +354,7 @@ GEM
       CFPropertyList
       naturally
     singleton (0.3.0)
+    standardwebhooks (1.0.1)
     terminal-notifier (2.0.0)
     terminal-table (3.0.2)
       unicode-display_width (>= 1.1.1, < 3)
@@ -376,6 +383,7 @@ PLATFORMS
   ruby
 
 DEPENDENCIES
+  anthropic (~> 1.50)
   danger-dangermattic (~> 1.3)
   dotenv
   fastlane (~> 2.236)
diff --git a/fastlane/lanes/ai_translator.rb b/fastlane/lanes/ai_translator.rb
new file mode 100644
index 000000000000..c0228fa9a4a6
--- /dev/null
+++ b/fastlane/lanes/ai_translator.rb
@@ -0,0 +1,390 @@
+# frozen_string_literal: true
+
+require 'json'
+require_relative 'anthropic_batch'
+require_relative 'translation_glossary'
+require_relative 'translation_validator'
+
+# AI translation tier for the localization pipeline — the service behind the `human ?? AI ?? English` floor.
+#
+# `localization_plurals.rb` currently stubs `ai_translate_plural(...)` to return nil; this is what replaces
+# it. Given an English source string, a target locale, and the developer context, it asks Claude for a
+# translation, then runs the result through `TranslationValidator` before returning it. Anything that fails
+# the format-specifier gate (or comes back empty / refused) returns nil — the documented "no machine
+# translation" signal the fold treats as English-fallback (flagged needs_review). It never returns a
+# placeholder-broken string.
+#
+# The model call is INJECTED as a `complete` callable, not hard-wired, so the prompt-building and validation
+# logic stays pure and unit-testable without the SDK or the network. `AITranslator.with_anthropic` builds the
+# live, Claude-backed instance; the unit tests build one around a canned-reply lambda.
+class AITranslator # rubocop:disable Metrics/ClassLength -- mostly static localization config (33-locale name map + prompt templates)
+  DEFAULT_MODEL = 'claude-opus-4-8'
+
+  # lproj code → human language name for the prompt. Covers the current ship locales; an unmapped code falls
+  # back to itself (the model still does something reasonable, but add the name here for best results).
+  LANGUAGE_NAMES = {
+    'ar' => 'Arabic', 'bg' => 'Bulgarian', 'cs' => 'Czech', 'cy' => 'Welsh', 'da' => 'Danish',
+    'de' => 'German', 'en-AU' => 'English (Australia)', 'en-CA' => 'English (Canada)',
+    'en-GB' => 'English (United Kingdom)', 'es' => 'Spanish', 'fr' => 'French', 'he' => 'Hebrew',
+    'hr' => 'Croatian', 'hu' => 'Hungarian', 'id' => 'Indonesian', 'is' => 'Icelandic', 'it' => 'Italian',
+    'ja' => 'Japanese', 'ko' => 'Korean', 'nb' => 'Norwegian Bokmål', 'nl' => 'Dutch', 'pl' => 'Polish',
+    'pt' => 'Portuguese', 'pt-BR' => 'Portuguese (Brazil)', 'ro' => 'Romanian', 'ru' => 'Russian',
+    'sk' => 'Slovak', 'sq' => 'Albanian', 'sv' => 'Swedish', 'th' => 'Thai', 'tr' => 'Turkish',
+    'zh-Hans' => 'Chinese (Simplified)', 'zh-Hant' => 'Chinese (Traditional)'
+  }.freeze
+
+  # `{{language}}` / `{{brands}}` are substituted by literal gsub (NOT `format`/`%`, which would choke on the
+  # literal `%@` / `%1$@` examples below). Shared by the single-string and plural prompts.
+  TRANSLATION_RULES = <<~PROMPT
+    You are an expert software localizer translating user-facing UI strings for the WordPress and Jetpack iOS apps into {{language}}.
+
+    Rules:
+    - Translate into natural, concise {{language}} suitable for a mobile app UI. Screen space is limited, so prefer the shorter faithful phrasing.
+    - Keep these names EXACTLY as written, untranslated: {{brands}}.
+    - Preserve every format specifier (e.g. %@, %1$@, %d, %lld, %1$d) EXACTLY — same count and type. You may reorder positional specifiers such as %1$@ and %2$d to suit the target grammar, but each must appear exactly once and keep its number.
+    - Preserve any HTML tags, markup, and leading/trailing whitespace exactly as in the source.
+    - Do not translate URLs, email addresses, file paths, or code.
+    - Follow the tone and terminology conventions of the WordPress.org {{language}} translation community, including its formal/informal form-of-address convention.
+  PROMPT
+
+  # Output instruction for a single string.
+  SINGLE_OUTPUT = 'Output ONLY the translated string — no quotation marks, no explanation, no notes, nothing else.'
+
+  # Output instruction for a plural form-set. The consistency rule is the whole reason to translate the forms
+  # together (one request) rather than per category: it stops the model drifting between synonyms across forms
+  # (e.g. Polish słowo -> wyrazy -> słów), which a per-cell call structurally cannot prevent.
+  PLURAL_OUTPUT = <<~PROMPT
+    You are translating the plural forms of ONE UI string. Use a single consistent word and stem across every form — only the grammatical inflection (ending) changes between forms; never switch to a synonym between forms.
+
+    Return ONLY a JSON object mapping each requested CLDR plural category to its translation, e.g. {"one": "...", "other": "..."}. No markdown fences, no commentary — just the JSON object.
+  PROMPT
+
+  # Brief, locale-agnostic cue per CLDR category (the model knows the language's actual rules; this just
+  # disambiguates which form we're asking for).
+  CLDR_CUES = {
+    'zero' => 'the zero form',
+    'one' => 'singular (n = 1)',
+    'two' => 'the dual form (n = 2)',
+    'few' => 'the "few" form (e.g. 2-4 in many Slavic languages)',
+    'many' => 'the "many" form (e.g. 5+ in many Slavic languages)',
+    'other' => 'the general / catch-all form (also used for fractions)'
+  }.freeze
+
+  # Default number of strings per batched request. Small enough to keep each JSON reply parseable and bound the
+  # blast radius if one reply is malformed (only that batch falls back to English); large enough to amortize the
+  # cached system prompt across many strings.
+  DEFAULT_BATCH_SIZE = 25
+
+  # Output instruction for a batch of independent strings (keyed by item number, not the long reverse-DNS key,
+  # so the model can't garble the mapping).
+  BATCH_OUTPUT = <<~PROMPT
+    You are translating a batch of independent UI strings. Translate each on its own; the items are unrelated unless a context note says otherwise.
+
+    Return ONLY a JSON object mapping each item's number (as a string) to its translation, e.g. {"1": "...", "2": "..."}. Include every number you are given, and translate nothing else. No markdown fences, no commentary — just the JSON object.
+  PROMPT
+
+  # @param complete [#call] callable invoked as `complete.call(system:, user:, schema: nil)` returning the
+  #   model's raw text reply. Injected so the translator is testable without the SDK.
+  # @param glossary [Glossary] brand do-not-translate list + per-locale terms/register (translation_glossary.rb).
+  # @param language_names [Hash{String=>String}] lproj code → language name.
+  def initialize(complete:, glossary: Glossary.default, language_names: LANGUAGE_NAMES)
+    @complete = complete
+    @glossary = glossary
+    @language_names = language_names
+  end
+
+  # Validated translation of `source` into `locale`, or nil if one can't be produced SAFELY: blank source, a
+  # blank/garbled reply, or — critically — a reply that breaks the format-specifier contract.
+  #
+  # @param source [String] the English source string.
+  # @param locale [String] target lproj code (e.g. "fr", "pt-BR", "zh-Hans").
+  # @param context [String, nil] developer comment / context for the string (the `comment:` field). Feeding
+  #   this is the single biggest quality lever, so pass it whenever available.
+  def translate(source:, locale:, context: nil)
+    source = source.to_s
+    return nil if source.strip.empty?
+
+    candidate = clean(@complete.call(system: system_prompt(locale), user: user_prompt(source, context)).to_s)
+    return nil if candidate.empty?
+    return nil unless TranslationValidator.placeholders_match?(source, candidate)
+
+    candidate
+  end
+
+  # Adapter matching the `ai_translate_plural(id:, source:, category:, note:, locale:)` contract in
+  # `localization_plurals.rb`, so wiring the live tier is a one-line swap of the `ai_translator:` argument:
+  #   translator = AITranslator.with_anthropic
+  #   PluralStrings.fold_translations!(catalog, ..., ai_translator: translator.method(:for_plural))
+  # rubocop:disable Lint/UnusedMethodArgument -- keyword names are the documented call contract
+  def for_plural(id:, source:, category:, note:, locale:)
+    translate(source: source, locale: locale, context: plural_context(note, category))
+  end
+  # rubocop:enable Lint/UnusedMethodArgument
+
+  # Translates a whole plural form-set for one key in a SINGLE request, so the model keeps one consistent
+  # word/stem across the forms (the fix for per-cell lemma drift). Returns { category => translation } for the
+  # requested categories, each placeholder-validated against its English source; forms that fail the gate or
+  # are absent from the reply are omitted, so the caller falls back to English (needs_review) for those.
+  #
+  # @param english_forms [Hash{String=>String}] English plural forms by CLDR category (must include "other";
+  #   a requested category with no English form of its own falls back to the "other" English value).
+  # @param categories [Array<String>] the CLDR categories to produce (the ones the target locale needs).
+  # @param locale [String] target lproj code.
+  # @param note [String, nil] developer context / comment for the string.
+  # @param anchors [Hash{String=>String}] already-finalized (e.g. human-translated) forms — shown to the model
+  #   as fixed context to stay consistent with, and excluded from what it is asked to produce.
+  def translate_plural(english_forms:, categories:, locale:, note: nil, anchors: {})
+    english_forms = to_string_keys(english_forms)
+    anchors = to_string_keys(anchors)
+    return {} if english_forms['other'].to_s.strip.empty?
+
+    needed = categories.map(&:to_s) - anchors.keys
+    return {} if needed.empty?
+
+    reply = @complete.call(
+      system: plural_system_prompt(locale),
+      user: plural_user_prompt(english_forms, needed, note, anchors),
+      schema: object_schema(needed)
+    )
+    validated_forms(parse_forms(reply), needed, english_forms)
+  end
+
+  # Translates many independent strings in batched requests (default DEFAULT_BATCH_SIZE per request), returning
+  # { key => translation } for those that pass the placeholder gate. Strings absent from the result (gate
+  # failure, blank source, or a malformed batch reply) fall back to human/English at the call site. Pass the
+  # strings already sorted by key so each batch naturally groups one feature (reader.*, editor.*) — better
+  # terminology consistency within a batch.
+  #
+  # @param strings [Array<Hash>] each { key:, source:, comment: } (string or symbol keys both accepted).
+  # @param locale [String] target lproj code.
+  # @param batch_size [Integer] strings per request.
+  def translate_all(strings, locale:, batch_size: DEFAULT_BATCH_SIZE)
+    items = batchable_items(strings)
+    return {} if items.empty?
+
+    items.each_slice(batch_size).with_object({}) do |chunk, out|
+      out.merge!(translate_batch(chunk, locale))
+    end
+  end
+
+  # Builds Message Batch jobs for many strings across many locales (the async / cheaper bulk path). Returns
+  # { jobs:, manifest: }: `jobs` ({ custom_id:, system:, user:, schema: }) go to `AnthropicBatch.submit`;
+  # `manifest` (custom_id => { locale:, numbered: }) is handed back to `collect_batch` with the batch results.
+  # Pure — no model or SDK here; `AnthropicBatch.submit` adds the model when it builds the requests.
+  #
+  # @param strings_by_locale [Hash{String=>Array<Hash>}] locale => array of { key:, source:, comment: }.
+  def prepare_batch(strings_by_locale, batch_size: DEFAULT_BATCH_SIZE)
+    jobs = []
+    manifest = {}
+    strings_by_locale.each do |locale, strings|
+      batchable_items(strings).each_slice(batch_size).with_index do |chunk, index|
+        numbered = number_chunk(chunk)
+        custom_id = "#{locale}_#{index}" # must match ^[a-zA-Z0-9_-]{1,64}$; locale codes have hyphens, not underscores, so this stays unique
+        jobs << batch_job(custom_id, locale, numbered)
+        manifest[custom_id] = { locale: locale, numbered: numbered }
+      end
+    end
+    { jobs: jobs, manifest: manifest }
+  end
+
+  # Validates the batch replies and assembles { locale => { key => translation } }. `texts_by_custom_id` comes
+  # from `AnthropicBatch.results`; `manifest` from `prepare_batch`. A custom_id with no reply (errored batch
+  # request) or a per-string gate failure simply doesn't appear → the caller falls back to human/English. Pure.
+  def collect_batch(texts_by_custom_id, manifest)
+    manifest.each_with_object({}) do |(custom_id, entry), result|
+      bucket = (result[entry[:locale]] ||= {})
+      text = texts_by_custom_id[custom_id]
+      next if text.nil?
+
+      bucket.merge!(validated_batch(parse_forms(text), entry[:numbered]))
+    end
+  end
+
+  # Builds a translator backed by the Anthropic Ruby SDK (`gem 'anthropic'`, in the Gemfile) — needs
+  # ANTHROPIC_API_KEY in the env. This `complete` lambda is the only part of the file the unit tests don't
+  # exercise, by design: everything the tests cover stays on the pure side of the injection boundary.
+  def self.with_anthropic(api_key: ENV.fetch('ANTHROPIC_API_KEY', nil), model: DEFAULT_MODEL, **)
+    client = AnthropicBatch.client(api_key: api_key)
+    complete = lambda do |system:, user:, schema: nil|
+      AnthropicBatch.text_of(client.messages.create(**AnthropicBatch.message_params(model: model, system: system, user: user, schema: schema)))
+    end
+    new(complete: complete, **)
+  rescue LoadError
+    raise LoadError, "The `anthropic` gem (in the Gemfile) isn't installed — run `bundle install` (or `gem install anthropic`)."
+  end
+
+  private
+
+  # Shared rule block (brands, format specifiers) with {{language}}/{{brands}} filled in, plus the glossary's
+  # per-locale terms + register note appended when present.
+  def render_rules(locale)
+    language = @language_names.fetch(locale, locale)
+    rules = TRANSLATION_RULES.gsub('{{language}}') { language }.gsub('{{brands}}') { @glossary.do_not_translate.join(', ') }
+    guidance = @glossary.guidance(locale)
+    guidance.empty? ? rules : "#{rules}\n#{guidance}"
+  end
+
+  def system_prompt(locale)
+    "#{render_rules(locale)}\n#{SINGLE_OUTPUT}"
+  end
+
+  def plural_system_prompt(locale)
+    "#{render_rules(locale)}\n#{PLURAL_OUTPUT}"
+  end
+
+  def user_prompt(source, context)
+    parts = []
+    parts << "Context: #{context}" if context && !context.to_s.strip.empty?
+    parts << "English source string:\n#{source}"
+    parts.join("\n\n")
+  end
+
+  def plural_user_prompt(english_forms, needed, note, anchors)
+    sections = []
+    sections << "Context: #{note}" if note && !note.to_s.strip.empty?
+    sections << "English source forms:\n#{format_forms(english_forms)}"
+    sections << "Already-finalized forms — match their exact word choice and stem, and do not re-output them:\n#{format_forms(anchors)}" unless anchors.empty?
+    catalog = needed.map { |category| "  #{category} - #{CLDR_CUES.fetch(category, category)}" }.join("\n")
+    sections << "Translate these CLDR plural categories, returning a JSON object keyed exactly by these category names:\n#{catalog}"
+    sections.join("\n\n")
+  end
+
+  def format_forms(forms)
+    forms.map { |category, value| "  #{category} = #{value}" }.join("\n")
+  end
+
+  # Keep only the parsed forms whose placeholders match their English source (the form's own English, or the
+  # "other" value for categories English doesn't distinguish). Failed/empty forms are dropped → English fallback.
+  def validated_forms(parsed, needed, english_forms)
+    other = english_forms['other']
+    needed.each_with_object({}) do |category, out|
+      candidate = clean(parsed[category].to_s)
+      next if candidate.empty?
+
+      source = english_forms[category] || other
+      out[category] = candidate if TranslationValidator.placeholders_match?(source, candidate)
+    end
+  end
+
+  # JSON Schema for a flat object whose values are all required strings — passed as `output_config.format` to
+  # make the model emit exactly this shape (structured outputs). additionalProperties must be false; that's the
+  # only form structured outputs support, and it also stops the model inventing extra keys.
+  def object_schema(keys)
+    {
+      'type' => 'object',
+      'properties' => keys.to_h { |key| [key, { 'type' => 'string' }] },
+      'required' => keys,
+      'additionalProperties' => false
+    }
+  end
+
+  # Parse the model's JSON reply into { key => value }; tolerate ```json fences; {} on any parse failure
+  # (every entry then falls back to English — safe, though structured outputs make a failure very unlikely).
+  def parse_forms(reply)
+    text = reply.to_s.strip.sub(/\A```(?:json)?\s*/i, '').sub(/```\s*\z/, '').strip
+    data = JSON.parse(text)
+    data.is_a?(Hash) ? data : {}
+  rescue JSON::ParserError
+    {}
+  end
+
+  def to_string_keys(hash)
+    (hash || {}).each_with_object({}) { |(key, value), acc| acc[key.to_s] = value }
+  end
+
+  # One batched request: number the chunk, ask for a JSON {number => translation}, keep the validated ones.
+  def translate_batch(chunk, locale)
+    numbered = number_chunk(chunk)
+    reply = @complete.call(
+      system: batch_system_prompt(locale),
+      user: batch_user_prompt(numbered),
+      schema: object_schema(numbered.keys.map(&:to_s))
+    )
+    validated_batch(parse_forms(reply), numbered)
+  end
+
+  # Map each numbered item to its validated translation by key; drop empty/placeholder-breaking ones.
+  def validated_batch(parsed, numbered)
+    numbered.each_with_object({}) do |(index, string), out|
+      candidate = clean(parsed[index.to_s].to_s)
+      next if candidate.empty?
+
+      out[string[:key]] = candidate if TranslationValidator.placeholders_match?(string[:source], candidate)
+    end
+  end
+
+  def batch_system_prompt(locale)
+    "#{render_rules(locale)}\n#{BATCH_OUTPUT}"
+  end
+
+  def batch_user_prompt(numbered)
+    items = numbered.map { |index, string| batch_item_line(index, string) }
+    "Translate each numbered UI string below into the target language.\n\n#{items.join("\n")}"
+  end
+
+  # One prompt line per string: number, the reverse-DNS key (UI-role context), the English, and the dev note.
+  def batch_item_line(index, string)
+    line = "[#{index}] "
+    line << "(#{string[:key]}) " unless string[:key].to_s.empty?
+    line << string[:source].to_s
+    line << " — #{string[:comment]}" unless string[:comment].to_s.strip.empty?
+    line
+  end
+
+  def normalize_string(string)
+    { key: field(string, :key), source: field(string, :source), comment: field(string, :comment) }
+  end
+
+  def field(hash, name)
+    hash[name] || hash[name.to_s]
+  end
+
+  # Normalize to { key:, source:, comment: } hashes and drop entries with a blank source (nothing to translate).
+  def batchable_items(strings)
+    strings.map { |string| normalize_string(string) }.reject { |string| string[:source].to_s.strip.empty? }
+  end
+
+  # Number a chunk 1..N → { 1 => string, … } (the index the model maps its JSON reply by).
+  def number_chunk(chunk)
+    chunk.each_with_index.to_h { |string, index| [index + 1, string] }
+  end
+
+  def batch_job(custom_id, locale, numbered)
+    {
+      custom_id: custom_id,
+      system: batch_system_prompt(locale),
+      user: batch_user_prompt(numbered),
+      schema: object_schema(numbered.keys.map(&:to_s))
+    }
+  end
+
+  # Models occasionally wrap the answer in quotation marks or add a trailing newline despite the
+  # "only the translation" instruction; strip those cosmetic wrappers. Anything more substantial (a prose
+  # explanation that slipped through) almost always breaks the placeholder gate and is discarded there.
+  def clean(text)
+    stripped = text.strip
+    if stripped.length >= 2 &&
+       ((stripped.start_with?('"') && stripped.end_with?('"')) ||
+        (stripped.start_with?('“') && stripped.end_with?('”')))
+      stripped = stripped[1...-1].strip
+    end
+    stripped
+  end
+
+  # The dev note plus an explicit CLDR-category cue, so the model produces the correct grammatical plural
+  # form (e.g. the Polish `few` form) rather than guessing from the English source alone.
+  def plural_context(note, category)
+    [note, "Plural category: #{category}. Render the grammatically correct plural form for this category."]
+      .compact.reject(&:empty?).join(' ')
+  end
+end
+
+# Tiny CLI to eyeball quality against the real model (needs the `anthropic` gem + ANTHROPIC_API_KEY):
+#   ruby fastlane/lanes/ai_translator.rb fr "You have %1$d new posts" "Notification text. %1$d is the count."
+if __FILE__ == $PROGRAM_NAME
+  locale, source, context = ARGV
+  abort("usage: ruby #{File.basename(__FILE__)} <locale> \"<english>\" [\"<context>\"]") unless locale && source
+
+  result = AITranslator.with_anthropic.translate(source: source, locale: locale, context: context)
+  puts result.nil? ? '(no safe translation — placeholder check failed or empty reply)' : result
+end
diff --git a/fastlane/lanes/ai_translator_test.rb b/fastlane/lanes/ai_translator_test.rb
new file mode 100644
index 000000000000..1f0e77953547
--- /dev/null
+++ b/fastlane/lanes/ai_translator_test.rb
@@ -0,0 +1,289 @@
+# frozen_string_literal: true
+
+# Pure-Ruby unit suite for AITranslator. Run directly: `ruby fastlane/lanes/ai_translator_test.rb`.
+# Uses a canned-reply lambda for `complete:`, so it exercises all of the prompt-building / validation logic
+# without the `anthropic` gem or the network.
+require 'minitest/autorun'
+require_relative 'ai_translator'
+
+# Exercises prompt-building and the validator gate via a canned-reply `complete:` lambda (no gem / network).
+class AITranslatorTest < Minitest::Test # rubocop:disable Metrics/ClassLength -- exhaustive scenario coverage
+  # Builds a translator whose model "reply" is fixed, optionally recording the prompts it was called with.
+  def translator(reply:, prompts: nil)
+    complete = lambda do |system:, user:, schema: nil|
+      prompts&.replace({ system: system, user: user, schema: schema })
+      reply
+    end
+    AITranslator.new(complete: complete)
+  end
+
+  def test_returns_cleaned_translation
+    t = translator(reply: %("Réglages"\n)) # wrapped in quotes + trailing newline
+    assert_equal 'Réglages', t.translate(source: 'Settings', locale: 'fr')
+  end
+
+  def test_accepts_a_reply_that_preserves_placeholders
+    t = translator(reply: '%2$@ wurde von %1$@ eingeladen')
+    assert_equal '%2$@ wurde von %1$@ eingeladen',
+                 t.translate(source: '%1$@ invited %2$@', locale: 'de')
+  end
+
+  def test_rejects_a_reply_that_breaks_placeholders
+    t = translator(reply: '%1$d Beiträge') # object → int: must be discarded
+    assert_nil t.translate(source: '%1$@ posts', locale: 'de')
+  end
+
+  def test_blank_source_makes_no_model_call
+    called = false
+    complete = lambda do |**|
+      called = true
+      'x'
+    end
+    t = AITranslator.new(complete: complete)
+    assert_nil t.translate(source: "  \n", locale: 'fr')
+    refute called
+  end
+
+  def test_blank_reply_returns_nil
+    assert_nil translator(reply: "  \n").translate(source: 'Settings', locale: 'fr')
+  end
+
+  def test_prompt_carries_language_brands_and_context
+    prompts = {}
+    t = translator(reply: 'Publier', prompts: prompts)
+    t.translate(source: 'Publish', locale: 'fr', context: 'Button to publish a post')
+
+    assert_includes prompts[:system], 'French'
+    assert_includes prompts[:system], 'WordPress'
+    assert_includes prompts[:user], 'Button to publish a post'
+    assert_includes prompts[:user], 'Publish'
+  end
+
+  def test_for_plural_adapter_maps_arguments_and_cues_category
+    prompts = {}
+    t = translator(reply: '%1$d Beiträge pro Woche', prompts: prompts)
+    out = t.for_plural(
+      id: 'blogging.reminders.weeklyCount|==|plural.other',
+      source: '%1$d times a week',
+      category: 'other',
+      note: 'Number of blogging reminders per week.',
+      locale: 'de'
+    )
+
+    assert_equal '%1$d Beiträge pro Woche', out
+    assert_includes prompts[:user], 'Number of blogging reminders per week.'
+    assert_includes prompts[:user], 'other' # the CLDR-category cue reaches the prompt
+  end
+
+  def test_translate_plural_returns_all_requested_forms
+    reply = '{"one":"%1$ld słowo","few":"%1$ld słowa","many":"%1$ld słów","other":"%1$ld słowa"}'
+    out = translator(reply: reply).translate_plural(
+      english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
+      categories: %w[one few many other], locale: 'pl', note: 'Number of words.'
+    )
+    assert_equal(
+      { 'one' => '%1$ld słowo', 'few' => '%1$ld słowa', 'many' => '%1$ld słów', 'other' => '%1$ld słowa' }, out
+    )
+  end
+
+  def test_translate_plural_drops_a_form_that_breaks_placeholders
+    # 'few' switched %1$ld -> %1$d (length change) — drop it; the rest survive.
+    reply = '{"one":"%1$ld słowo","few":"%1$d słowa","other":"%1$ld słowa"}'
+    out = translator(reply: reply).translate_plural(
+      english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
+      categories: %w[one few other], locale: 'pl'
+    )
+    assert_equal %w[one other], out.keys.sort
+    refute out.key?('few')
+  end
+
+  def test_translate_plural_excludes_anchors_and_passes_them_as_context
+    prompts = {}
+    reply = '{"few":"%1$ld słowa","many":"%1$ld słów","other":"%1$ld słowa"}'
+    out = translator(reply: reply, prompts: prompts).translate_plural(
+      english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
+      categories: %w[one few many other], locale: 'pl', anchors: { 'one' => '%1$ld słowo' }
+    )
+    refute out.key?('one') # human-anchored — not produced
+    assert_equal %w[few many other], out.keys.sort
+    assert_includes prompts[:user], '%1$ld słowo' # anchor shown to the model as fixed context
+  end
+
+  def test_translate_plural_falls_back_to_empty_on_bad_json
+    out = translator(reply: 'sorry — here are your forms!').translate_plural(
+      english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
+      categories: %w[one other], locale: 'pl'
+    )
+    assert_empty out
+  end
+
+  def test_translate_plural_tolerates_json_code_fences
+    reply = "```json\n{\"one\":\"%1$ld słowo\",\"other\":\"%1$ld słowa\"}\n```"
+    out = translator(reply: reply).translate_plural(
+      english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
+      categories: %w[one other], locale: 'pl'
+    )
+    assert_equal({ 'one' => '%1$ld słowo', 'other' => '%1$ld słowa' }, out)
+  end
+
+  def test_translate_plural_validates_fallback_category_against_other
+    # 'many' has no English form of its own → validated against the English 'other' (%1$ld words).
+    out = translator(reply: '{"many":"%1$ld słów"}').translate_plural(
+      english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
+      categories: %w[many], locale: 'pl'
+    )
+    assert_equal({ 'many' => '%1$ld słów' }, out)
+  end
+
+  def test_translate_all_maps_keys_and_validates
+    reply = '{"1":"Réglages","2":"%1$@ articles"}'
+    out = translator(reply: reply).translate_all(
+      [{ key: 'settings.title', source: 'Settings', comment: 'Screen title' },
+       { key: 'posts.count', source: '%1$@ posts', comment: 'Count' }],
+      locale: 'fr'
+    )
+    assert_equal({ 'settings.title' => 'Réglages', 'posts.count' => '%1$@ articles' }, out)
+  end
+
+  def test_translate_all_drops_a_placeholder_breaker
+    reply = '{"1":"Réglages","2":"%1$d articles"}' # item 2 changed %1$@ -> %1$d
+    out = translator(reply: reply).translate_all(
+      [{ key: 'settings.title', source: 'Settings' }, { key: 'posts.count', source: '%1$@ posts' }],
+      locale: 'fr'
+    )
+    assert_equal({ 'settings.title' => 'Réglages' }, out)
+    refute out.key?('posts.count')
+  end
+
+  def test_translate_all_skips_blank_sources
+    out = translator(reply: '{"1":"Réglages"}').translate_all(
+      [{ key: 'settings.title', source: 'Settings' }, { key: 'blank', source: '   ' }],
+      locale: 'fr'
+    )
+    assert_equal({ 'settings.title' => 'Réglages' }, out)
+  end
+
+  def test_translate_all_chunks_and_merges
+    calls = 0
+    complete = lambda do |**|
+      calls += 1
+      '{"1":"x","2":"y"}'
+    end
+    out = AITranslator.new(complete: complete).translate_all(
+      [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }, { key: 'c', source: 'Three' }],
+      locale: 'fr', batch_size: 2
+    )
+    assert_equal 2, calls # 3 items / batch 2 = 2 requests
+    assert_equal({ 'a' => 'x', 'b' => 'y', 'c' => 'x' }, out)
+  end
+
+  def test_translate_all_bad_json_batch_falls_back
+    out = translator(reply: 'not json at all').translate_all([{ key: 'a', source: 'One' }], locale: 'fr')
+    assert_empty out
+  end
+
+  def test_translate_all_empty_input_makes_no_call
+    called = false
+    complete = lambda do |**|
+      called = true
+      '{}'
+    end
+    assert_empty AITranslator.new(complete: complete).translate_all([], locale: 'fr')
+    refute called
+  end
+
+  def test_translate_all_prompt_carries_key_context_and_language
+    prompts = {}
+    translator(reply: '{"1":"Publier"}', prompts: prompts).translate_all(
+      [{ key: 'editor.publish', source: 'Publish', comment: 'Publish button' }], locale: 'fr'
+    )
+    assert_includes prompts[:system], 'French'
+    assert_includes prompts[:user], 'editor.publish'
+    assert_includes prompts[:user], 'Publish button'
+    assert_includes prompts[:user], 'Publish'
+  end
+
+  def test_translate_plural_passes_a_schema_of_its_categories
+    prompts = {}
+    translator(reply: '{"one":"%1$ld słowo","other":"%1$ld słowa"}', prompts: prompts).translate_plural(
+      english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, categories: %w[one other], locale: 'pl'
+    )
+    assert_equal %w[one other], prompts[:schema]['required'].sort
+    assert_equal false, prompts[:schema]['additionalProperties']
+  end
+
+  def test_translate_all_passes_a_numbered_schema
+    prompts = {}
+    translator(reply: '{"1":"a","2":"b"}', prompts: prompts).translate_all(
+      [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }], locale: 'fr'
+    )
+    assert_equal %w[1 2], prompts[:schema]['required'].sort
+  end
+
+  def test_single_translate_passes_no_schema
+    prompts = {}
+    translator(reply: 'Publier', prompts: prompts).translate(source: 'Publish', locale: 'fr')
+    assert_nil prompts[:schema]
+  end
+
+  def test_glossary_terms_and_register_reach_the_prompt
+    prompts = {}
+    glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article' } }, register: { 'fr' => 'Use formal vous.' })
+    complete = lambda do |system:, user:, schema: nil|
+      prompts.replace({ system: system, user: user, schema: schema })
+      'Publier'
+    end
+    AITranslator.new(complete: complete, glossary: glossary).translate(source: 'Publish', locale: 'fr')
+    assert_includes prompts[:system], 'post -> article'
+    assert_includes prompts[:system], 'Register: Use formal vous.'
+  end
+
+  def test_prepare_batch_chunks_each_locale_into_jobs
+    prep = translator(reply: '{}').prepare_batch(
+      { 'fr' => [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }, { key: 'c', source: 'Three' }],
+        'de' => [{ key: 'a', source: 'One' }] },
+      batch_size: 2
+    )
+    assert_equal(%w[fr_0 fr_1 de_0], prep[:jobs].map { |job| job[:custom_id] })
+    assert_equal %w[1 2], prep[:jobs].first[:schema]['required'].sort
+  end
+
+  def test_prepare_batch_manifest_maps_custom_id_to_locale_and_strings
+    prep = translator(reply: '{}').prepare_batch(
+      { 'fr' => [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }] }, batch_size: 25
+    )
+    assert_equal 'fr', prep[:manifest]['fr_0'][:locale]
+    assert_equal(%w[a b], prep[:manifest]['fr_0'][:numbered].values.map { |string| string[:key] })
+  end
+
+  def test_prepare_batch_custom_ids_match_the_api_pattern
+    # The Batch API requires custom_id =~ ^[a-zA-Z0-9_-]{1,64}$ — hyphenated locales like pt-BR must still pass.
+    prep = translator(reply: '{}').prepare_batch({ 'pt-BR' => [{ key: 'a', source: 'One' }] }, batch_size: 25)
+    prep[:jobs].each { |job| assert_match(/\A[a-zA-Z0-9_-]{1,64}\z/, job[:custom_id]) }
+  end
+
+  def test_collect_batch_validates_and_groups_by_locale
+    t = translator(reply: '{}')
+    prep = t.prepare_batch(
+      { 'fr' => [{ key: 'settings', source: 'Settings' }, { key: 'count', source: '%1$@ items' }] }, batch_size: 25
+    )
+    texts = { 'fr_0' => '{"1":"Réglages","2":"%1$@ éléments"}' }
+    assert_equal({ 'fr' => { 'settings' => 'Réglages', 'count' => '%1$@ éléments' } },
+                 t.collect_batch(texts, prep[:manifest]))
+  end
+
+  def test_collect_batch_drops_invalid_and_missing
+    t = translator(reply: '{}')
+    prep = t.prepare_batch(
+      { 'fr' => [{ key: 'settings', source: 'Settings' }, { key: 'count', source: '%1$@ items' }] }, batch_size: 25
+    )
+    texts = { 'fr_0' => '{"1":"Réglages","2":"%1$d éléments"}' } # item 2 breaks the placeholder
+    assert_equal({ 'fr' => { 'settings' => 'Réglages' } }, t.collect_batch(texts, prep[:manifest]))
+  end
+
+  def test_collect_batch_handles_a_missing_batch_reply
+    t = translator(reply: '{}')
+    prep = t.prepare_batch({ 'fr' => [{ key: 'a', source: 'One' }] }, batch_size: 25)
+    assert_equal({ 'fr' => {} }, t.collect_batch({}, prep[:manifest]))
+  end
+end
diff --git a/fastlane/lanes/anthropic_batch.rb b/fastlane/lanes/anthropic_batch.rb
new file mode 100644
index 000000000000..89ec02dd62e2
--- /dev/null
+++ b/fastlane/lanes/anthropic_batch.rb
@@ -0,0 +1,98 @@
+# frozen_string_literal: true
+
+require 'json'
+
+# SDK glue for the Anthropic Ruby client: the message create-params shape, response-text extraction, and the
+# Message Batches submit/poll/collect cycle. Isolated here so `AITranslator` stays pure prompt-building +
+# validation, and all knowledge of the SDK's request/response shape lives in ONE place — the synchronous path
+# (`AITranslator.with_anthropic`) and the async batch path share `message_params` / `text_of`, so the request
+# shape can't drift between them.
+#
+# The batch path is the cost/throughput lever for a full backfill: one async job covering many (locale, chunk)
+# requests at ~50% the per-token price. Flow: `AITranslator#prepare_batch` → `submit` → poll `ready?` →
+# `results` → `AITranslator#collect_batch`.
+module AnthropicBatch
+  MAX_TOKENS = 8192 # generous so a batch's JSON object can't truncate (a truncated reply fails the JSON parse)
+
+  module_function
+
+  # `messages.create` params for one request; adds output_config (structured outputs) when a schema is given.
+  def message_params(model:, system:, user:, schema: nil)
+    params = {
+      model: model.to_sym,
+      max_tokens: MAX_TOKENS,
+      system_: [{ type: 'text', text: system, cache_control: { type: 'ephemeral' } }],
+      messages: [{ role: 'user', content: user }]
+    }
+    params[:output_config] = { format: { type: :json_schema, schema: schema } } unless schema.nil?
+    params
+  end
+
+  # Concatenate the text blocks of a Message response.
+  def text_of(message)
+    message.content.select { |block| block.type == :text }.map(&:text).join("\n")
+  end
+
+  # Submit jobs ({ custom_id:, system:, user:, schema: }) as one Message Batch; returns the batch id.
+  def submit(jobs, client:, model:)
+    requests = jobs.map do |job|
+      { custom_id: job[:custom_id], params: message_params(model: model, system: job[:system], user: job[:user], schema: job[:schema]) }
+    end
+    client.messages.batches.create(requests: requests).id
+  end
+
+  # True once the batch has finished processing (results are available to stream).
+  def ready?(batch_id, client:)
+    client.messages.batches.retrieve(batch_id).processing_status.to_s == 'ended'
+  end
+
+  # { custom_id => reply text } for the succeeded requests. `results_streaming` yields raw JSONL lines (one per
+  # request) — the SDK's lenient coercion passes the line through as a String — so each is parsed here.
+  # Errored/expired/canceled entries (and any unparseable line) are skipped, so the strings they covered fall
+  # back to human/English at collect time.
+  def results(batch_id, client:)
+    client.messages.batches.results_streaming(batch_id).each_with_object({}) do |line, out|
+      record = parse_line(line)
+      result = record['result'] || {}
+      out[record['custom_id']] = content_text(result.dig('message', 'content')) if result['type'] == 'succeeded'
+    end
+  end
+
+  # Parse a JSONL result line into a Hash; {} on anything unparseable. Tolerates a Hash (already parsed).
+  def parse_line(line)
+    line.is_a?(String) ? JSON.parse(line) : line
+  rescue JSON::ParserError
+    {}
+  end
+
+  # Join the text blocks of a parsed message-content array (Hash blocks, not the typed objects `text_of` takes).
+  def content_text(content)
+    Array(content).select { |block| block['type'] == 'text' }.map { |block| block['text'] }.join("\n")
+  end
+
+  # Poll until the batch finishes, then return its results (same shape as `results`); returns nil if it hasn't
+  # finished within `timeout`. `interval`/`timeout` are seconds; `sleeper` is injected so tests run instantly.
+  # Yields elapsed seconds after each not-ready check (progress reporting). Timeout is approximate (summed
+  # intervals, not wall clock).
+  #
+  # This is the simple synchronous "submit and wait" mechanism. For a huge backfill that may run for a long
+  # time, prefer submitting, persisting the batch id, and collecting in a later step over blocking on this —
+  # `submit` returns the id immediately, and `ready?` / `results` let a separate step pick it up.
+  def await(batch_id, client:, interval: 30, timeout: 3600, sleeper: ->(seconds) { sleep(seconds) })
+    waited = 0
+    loop do
+      return results(batch_id, client: client) if ready?(batch_id, client: client)
+      return nil if waited >= timeout
+
+      yield waited if block_given?
+      sleeper.call(interval)
+      waited += interval
+    end
+  end
+
+  # A raw Anthropic client for the batch calls (needs the `anthropic` gem + ANTHROPIC_API_KEY).
+  def client(api_key: ENV.fetch('ANTHROPIC_API_KEY', nil))
+    require 'anthropic'
+    Anthropic::Client.new(api_key: api_key)
+  end
+end
diff --git a/fastlane/lanes/anthropic_batch_test.rb b/fastlane/lanes/anthropic_batch_test.rb
new file mode 100644
index 000000000000..978aa104203f
--- /dev/null
+++ b/fastlane/lanes/anthropic_batch_test.rb
@@ -0,0 +1,104 @@
+# frozen_string_literal: true
+
+# Pure-Ruby unit suite for AnthropicBatch. Run: `ruby fastlane/lanes/anthropic_batch_test.rb`.
+# Drives the submit / poll / results glue against a fake client that mimics the SDK's shape (no gem, no network).
+require 'minitest/autorun'
+require 'json'
+require_relative 'anthropic_batch'
+
+# Exercises the submit / poll / results glue via a fake client that mimics the SDK shape. `create`/`retrieve`
+# return typed-ish objects (a Batch struct); `results_streaming` yields raw JSONL strings, as the real SDK does.
+class AnthropicBatchTest < Minitest::Test
+  Batch = Struct.new(:id, :processing_status)
+
+  # Mimics client.messages.batches.{create,retrieve,results_streaming}.
+  class FakeBatches
+    attr_reader :created_requests
+
+    def initialize(status:, entries:, ready_after: nil)
+      @status = status
+      @entries = entries
+      @ready_after = ready_after # report :ended only once `retrieve` has been called this many times
+      @retrieve_calls = 0
+    end
+
+    def create(requests:)
+      @created_requests = requests
+      Batch.new('batch_1', :in_progress)
+    end
+
+    def retrieve(_id)
+      @retrieve_calls += 1
+      Batch.new('batch_1', effective_status)
+    end
+
+    def results_streaming(_id)
+      @entries
+    end
+
+    private
+
+    def effective_status
+      return @status if @ready_after.nil?
+
+      @retrieve_calls >= @ready_after ? :ended : :in_progress
+    end
+  end
+
+  def fake_client(status: :ended, entries: [], ready_after: nil)
+    batches = FakeBatches.new(status: status, entries: entries, ready_after: ready_after)
+    Struct.new(:messages).new(Struct.new(:batches).new(batches))
+  end
+
+  # Build a raw JSONL result line, the way results_streaming yields them.
+  def succeeded_line(custom_id, json)
+    JSON.generate('custom_id' => custom_id,
+                  'result' => { 'type' => 'succeeded', 'message' => { 'content' => [{ 'type' => 'text', 'text' => json }] } })
+  end
+
+  def errored_line(custom_id)
+    JSON.generate('custom_id' => custom_id, 'result' => { 'type' => 'errored' })
+  end
+
+  def test_message_params_adds_output_config_only_with_a_schema
+    bare = AnthropicBatch.message_params(model: 'claude-opus-4-8', system: 's', user: 'u')
+    refute bare.key?(:output_config)
+    assert_equal :'claude-opus-4-8', bare[:model]
+
+    with_schema = AnthropicBatch.message_params(model: 'claude-opus-4-8', system: 's', user: 'u', schema: { 'type' => 'object' })
+    assert_equal({ format: { type: :json_schema, schema: { 'type' => 'object' } } }, with_schema[:output_config])
+  end
+
+  def test_submit_builds_requests_and_returns_the_id
+    client = fake_client
+    jobs = [{ custom_id: 'fr_0', system: 'sys', user: 'usr', schema: { 'type' => 'object' } }]
+    id = AnthropicBatch.submit(jobs, client: client, model: 'claude-opus-4-8')
+
+    assert_equal 'batch_1', id
+    request = client.messages.batches.created_requests.first
+    assert_equal 'fr_0', request[:custom_id]
+    assert_equal :'claude-opus-4-8', request[:params][:model]
+  end
+
+  def test_ready_reflects_processing_status
+    refute AnthropicBatch.ready?('b', client: fake_client(status: :in_progress))
+    assert AnthropicBatch.ready?('b', client: fake_client(status: :ended))
+  end
+
+  def test_results_returns_text_for_succeeded_requests_only
+    entries = [succeeded_line('fr_0', '{"1":"Bonjour"}'), errored_line('fr_1')]
+    out = AnthropicBatch.results('b', client: fake_client(entries: entries))
+    assert_equal({ 'fr_0' => '{"1":"Bonjour"}' }, out)
+  end
+
+  def test_await_polls_until_ready_then_returns_results
+    client = fake_client(ready_after: 3, entries: [succeeded_line('fr_0', '{"1":"Bonjour"}')])
+    out = AnthropicBatch.await('b', client: client, interval: 1, sleeper: ->(_seconds) {})
+    assert_equal({ 'fr_0' => '{"1":"Bonjour"}' }, out)
+  end
+
+  def test_await_returns_nil_on_timeout
+    client = fake_client(status: :in_progress)
+    assert_nil AnthropicBatch.await('b', client: client, interval: 30, timeout: 60, sleeper: ->(_seconds) {})
+  end
+end
diff --git a/fastlane/lanes/translation_glossary.rb b/fastlane/lanes/translation_glossary.rb
new file mode 100644
index 000000000000..c7ed0d6a5c2e
--- /dev/null
+++ b/fastlane/lanes/translation_glossary.rb
@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+
+# Terminology configuration for the translator: brand/product names kept verbatim, plus per-locale glossary
+# terms (the preferred translation for an English term) and a register/style note. A pure value object —
+# SOURCING this data (the WordPress.org per-locale glossaries + style guides, a committed YAML, …) is
+# pre-processing done elsewhere and handed in here, so this stays I/O-free and unit-testable.
+class Glossary
+  # Brand / product proper nouns kept verbatim in every locale. Deliberately tight to unambiguous proper nouns
+  # — feature words locales legitimately translate ("Reader", "Stats") are intentionally NOT here.
+  DEFAULT_DO_NOT_TRANSLATE = [
+    'WordPress', 'WordPress.com', 'Jetpack', 'WooCommerce', 'Woo',
+    'Akismet', 'Gravatar', 'Gutenberg', 'Tumblr', 'Simplenote', 'Crowdsignal'
+  ].freeze
+
+  attr_reader :do_not_translate
+
+  # @param do_not_translate [Array<String>] brand/product names kept verbatim.
+  # @param terms [Hash{String=>Hash{String=>String}}] locale => { english term => preferred translation }.
+  # @param register [Hash{String=>String}] locale => style/register note (e.g. "Use the informal 'du' form.").
+  def initialize(do_not_translate: DEFAULT_DO_NOT_TRANSLATE, terms: {}, register: {})
+    @do_not_translate = do_not_translate
+    @terms = terms
+    @register = register
+  end
+
+  # The default brand-only glossary (no per-locale terms or register).
+  def self.default
+    new
+  end
+
+  # Prompt fragment with this locale's preferred terms + register note (or '' if neither applies). Appended to
+  # the shared rules so the model uses the community's terminology and tone.
+  def guidance(locale)
+    [term_guidance(locale), register_note(locale)].reject(&:empty?).join("\n")
+  end
+
+  private
+
+  def term_guidance(locale)
+    pairs = @terms[locale]
+    return '' if pairs.nil? || pairs.empty?
+
+    lines = pairs.map { |english, translation| "  #{english} -> #{translation}" }
+    "Use these exact translations for these terms, consistently:\n#{lines.join("\n")}"
+  end
+
+  def register_note(locale)
+    note = @register[locale].to_s.strip
+    note.empty? ? '' : "Register: #{note}"
+  end
+end
diff --git a/fastlane/lanes/translation_glossary_test.rb b/fastlane/lanes/translation_glossary_test.rb
new file mode 100644
index 000000000000..4bc29817f3e2
--- /dev/null
+++ b/fastlane/lanes/translation_glossary_test.rb
@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+
+# Pure-Ruby unit suite for Glossary. Run directly: `ruby fastlane/lanes/translation_glossary_test.rb`.
+require 'minitest/autorun'
+require_relative 'translation_glossary'
+
+# Covers the brand list, per-locale term guidance, register note, the combination, and empty cases.
+class GlossaryTest < Minitest::Test
+  def test_default_is_brands_only
+    glossary = Glossary.default
+    assert_includes glossary.do_not_translate, 'WordPress'
+    assert_equal '', glossary.guidance('fr')
+  end
+
+  def test_term_guidance_is_per_locale
+    glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article', 'tag' => 'étiquette' } })
+    assert_includes glossary.guidance('fr'), 'post -> article'
+    assert_includes glossary.guidance('fr'), 'tag -> étiquette'
+    assert_equal '', glossary.guidance('de') # no terms for de
+  end
+
+  def test_register_note
+    glossary = Glossary.new(register: { 'de' => "Use the informal 'du' form." })
+    assert_includes glossary.guidance('de'), "Register: Use the informal 'du' form."
+  end
+
+  def test_terms_and_register_combined
+    glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article' } }, register: { 'fr' => 'Use formal vous.' })
+    guidance = glossary.guidance('fr')
+    assert_includes guidance, 'post -> article'
+    assert_includes guidance, 'Register: Use formal vous.'
+  end
+
+  def test_custom_do_not_translate
+    assert_equal %w[Foo Bar], Glossary.new(do_not_translate: %w[Foo Bar]).do_not_translate
+  end
+end
diff --git a/fastlane/lanes/translation_validator.rb b/fastlane/lanes/translation_validator.rb
new file mode 100644
index 000000000000..29b9e9c887fa
--- /dev/null
+++ b/fastlane/lanes/translation_validator.rb
@@ -0,0 +1,108 @@
+# frozen_string_literal: true
+
+# Format-specifier safety gate for machine-translated strings.
+#
+# The one correctness invariant for a translated `.strings` / `.xcstrings` value: it must preserve the
+# source's printf / NSString format ARGUMENTS exactly — same count, same types, and (for positional
+# `%1$@` specifiers) the same index→type mapping. The surrounding prose is free to change; the argument
+# contract is not. Break it and the app reads the wrong vararg off the stack — a crash or garbage at
+# runtime, in a locale the author can't read and CI can't catch.
+#
+# This is deliberately plain Ruby with no dependencies, so it can gate EVERY machine translation before it
+# is written and be unit-tested directly. It's the floor under the `human ?? AI ?? English` resolution in
+# `PluralStrings.fold_cell`: an AI cell that fails this check is discarded (the caller falls through to the
+# English source, flagged needs_review) rather than shipped.
+module TranslationValidator
+  module_function
+
+  # printf / NSString format specifier: optional positional `N$`, flags, width, precision, length modifier,
+  # conversion. The space flag (`% d`) is deliberately EXCLUDED — exactly as `CatalogHelper::FORMAT_SPECIFIER`
+  # excludes it — because `% <letter>` matches inside ordinary prose ("100% done" → "% d"), which would make
+  # the validator hallucinate an argument in plain text and reject a perfectly good translation.
+  FORMAT_SPECIFIER = /
+    %                                    # leading percent
+    (?:(?<position>\d+)\$)?              # optional positional index: 1$, 2$, …
+    [\#0\-+']*                           # flags (NOT space — see note above)
+    (?:\d+|\*)?                          # field width
+    (?:\.(?:\d+|\*))?                    # precision
+    (?<length>hh|h|ll|l|L|q|z|t|j)?      # length modifier
+    (?<conv>[@dDiuUxXoOfFeEgGaAcCsSpn%]) # conversion
+  /x
+
+  # Conversion char → coarse argument type-class. We compare by class, not by exact letter, so cosmetic
+  # swaps that don't change the consumed argument (`%x`↔`%X`, `%d`↔`%i`) pass, while a real type change
+  # that WOULD crash (`%@`→`%d`: object vs integer) is caught. The length modifier is kept separately in the
+  # signature, because `%d`↔`%ld` is a genuine ABI difference (int vs long) that can crash on mismatch.
+  TYPE_CLASS = {
+    '@' => :object,
+    'd' => :int, 'D' => :int, 'i' => :int, 'u' => :int, 'U' => :int,
+    'x' => :int, 'X' => :int, 'o' => :int, 'O' => :int,
+    'f' => :float, 'F' => :float, 'e' => :float, 'E' => :float,
+    'g' => :float, 'G' => :float, 'a' => :float, 'A' => :float,
+    's' => :cstring, 'S' => :cstring, 'c' => :char, 'C' => :char, 'p' => :pointer
+  }.freeze
+  private_constant :TYPE_CLASS
+
+  # Two parallel views of a string's format arguments:
+  #   positional — { index => "length:type-class" }; order-INDEPENDENT (reordering `%1$@`/`%2$@` to suit
+  #                target grammar is the whole point of positional specifiers).
+  #   sequential — [ "length:type-class", … ]; order-DEPENDENT (a non-positional specifier's argument is
+  #                bound by appearance order, so `%@ %d` and `%d %@` are NOT interchangeable).
+  # `%%` (a literal percent) consumes no argument and is excluded from both.
+  Signature = Struct.new(:positional, :sequential)
+  private_constant :Signature
+
+  # True when `candidate` preserves `source`'s format-argument contract.
+  def placeholders_match?(source, candidate)
+    mismatch_reason(source, candidate).nil?
+  end
+
+  # nil when the contract is preserved; otherwise a short human-readable reason (for logging which AI cells
+  # were rejected and why).
+  def mismatch_reason(source, candidate)
+    src = signature(source)
+    cand = signature(candidate)
+
+    if src.positional != cand.positional
+      "positional placeholders differ (source: #{describe_positional(src.positional)}; " \
+        "translation: #{describe_positional(cand.positional)})"
+    elsif src.sequential != cand.sequential
+      "sequential placeholders differ (source: #{src.sequential.inspect}; translation: #{cand.sequential.inspect})"
+    end
+  end
+
+  # Parsed argument signature of `str` (see the Signature struct above).
+  def signature(str)
+    positional = {}
+    sequential = []
+    each_specifier(str.to_s) do |match|
+      next if match[:conv] == '%' # literal %% — not an argument
+
+      token = "#{match[:length]}:#{TYPE_CLASS.fetch(match[:conv], match[:conv])}"
+      if match[:position]
+        positional[match[:position].to_i] = token
+      else
+        sequential << token
+      end
+    end
+    Signature.new(positional, sequential)
+  end
+
+  # Yields each format-specifier MatchData in appearance order. Scans forward from the end of each match, so
+  # adjacent specifiers (`%d%@`) and specifiers embedded in text are all found.
+  def each_specifier(str)
+    pos = 0
+    while (match = FORMAT_SPECIFIER.match(str, pos))
+      yield match
+      pos = match.end(0)
+    end
+  end
+  private_class_method :each_specifier
+
+  def describe_positional(positional)
+    return 'none' if positional.empty?
+
+    positional.sort.map { |index, token| "%#{index}$(#{token})" }.join(', ')
+  end
+  private_class_method :describe_positional
+end
diff --git a/fastlane/lanes/translation_validator_test.rb b/fastlane/lanes/translation_validator_test.rb
new file mode 100644
index 000000000000..8cbd91a2f38f
--- /dev/null
+++ b/fastlane/lanes/translation_validator_test.rb
@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+
+# Pure-Ruby unit suite for TranslationValidator. Run directly: `ruby fastlane/lanes/translation_validator_test.rb`.
+require 'minitest/autorun'
+require_relative 'translation_validator'
+
+# Exercises the format-specifier contract: positional reordering allowed, type/length/count changes rejected.
+class TranslationValidatorTest < Minitest::Test
+  V = TranslationValidator
+
+  def test_no_specifiers_anything_matches
+    assert V.placeholders_match?('Settings', 'Réglages')
+    assert V.placeholders_match?('', '')
+  end
+
+  def test_positional_reordering_is_allowed
+    # Reordering %1$@ / %2$@ to suit target grammar is the whole point of positional specifiers.
+    assert V.placeholders_match?('%1$@ invited %2$@', '%2$@ wurde von %1$@ eingeladen')
+  end
+
+  def test_positional_type_change_is_rejected
+    # %1$@ (object) → %1$d (int) would read the wrong vararg — a crash vector.
+    refute V.placeholders_match?('%1$@ posts', '%1$d posts')
+  end
+
+  def test_sequential_order_must_be_preserved
+    refute V.placeholders_match?('%@: %d', '%d : %@') # flipped non-positional args
+    assert V.placeholders_match?('%@: %d', 'Total %@: %d') # same order, prose changed
+  end
+
+  def test_count_mismatch_is_rejected
+    refute V.placeholders_match?('Hello %@', 'Bonjour') # dropped an argument
+    refute V.placeholders_match?('Hello %@', 'Bonjour %@ %@') # added an argument
+  end
+
+  def test_literal_percent_is_ignored
+    assert V.placeholders_match?('100% done', '100% terminé') # no real specifier (space after %)
+    assert V.placeholders_match?('%d%% complete', '%d%% terminé') # %% literal, %d preserved
+    refute V.placeholders_match?('%d%% complete', '%% terminé') # dropped the %d argument
+  end
+
+  def test_length_modifier_change_is_rejected
+    # %ld (long) → %d (int) is a genuine ABI difference that can crash on mismatch.
+    refute V.placeholders_match?('%1$ld words', '%1$d words')
+    assert V.placeholders_match?('%1$ld words', '%1$ld mots')
+  end
+
+  def test_case_only_conversion_change_is_allowed
+    assert V.placeholders_match?('%x', '%X') # cosmetic; same integer type-class
+  end
+
+  def test_mismatch_reason_is_descriptive
+    reason = V.mismatch_reason('%1$@ posts', '%1$d posts')
+    refute_nil reason
+    assert_includes reason, 'positional'
+
+    assert_nil V.mismatch_reason('%1$@ invited %2$@', '%2$@ a invité %1$@')
+  end
+end

From 74128501416124f3961c99d3c6756afd54be5fcd Mon Sep 17 00:00:00 2001
From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com>
Date: Thu, 25 Jun 2026 22:55:21 -0600
Subject: [PATCH 2/4] Localization: run the AI translation tooling unit tests
 in CI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pure-Ruby unit suites (TranslationValidator, Glossary, AnthropicBatch,
AITranslator) weren't executed by any pipeline step — the "Unit Tests" jobs are
the Xcode/XCTest suites, and rubocop (via Danger) only lints them. Add a
lightweight Buildkite step that runs each fastlane/lanes/*_test.rb with plain
ruby (stdlib minitest — no Xcode, no app build, no bundle).

Runs unconditionally rather than behind should-skip-job.sh --job-type validation,
which skips on tooling-only changes — i.e. exactly the PRs that touch these files.
---
 .../commands/test-localization-tooling.sh     | 22 +++++++++++++++++++
 .buildkite/pipeline.yml                       |  7 ++++++
 2 files changed, 29 insertions(+)
 create mode 100755 .buildkite/commands/test-localization-tooling.sh

diff --git a/.buildkite/commands/test-localization-tooling.sh b/.buildkite/commands/test-localization-tooling.sh
new file mode 100755
index 000000000000..2e129cd95470
--- /dev/null
+++ b/.buildkite/commands/test-localization-tooling.sh
@@ -0,0 +1,22 @@
+#!/bin/bash -eu
+
+# Runs the localization tooling's pure-Ruby unit suites (stdlib minitest — no Xcode, no app build, no bundle).
+# Intentionally always runs (no should-skip-job guard): these guard the fastlane localization helpers, and the
+# `validation` skip rule skips on tooling-only changes — exactly when these tests matter most.
+
+echo "--- :test_tube: Localization tooling unit tests"
+
+shopt -s nullglob
+tests=(fastlane/lanes/*_test.rb)
+if [[ ${#tests[@]} -eq 0 ]]; then
+  echo "No *_test.rb files found under fastlane/lanes/."
+  exit 0
+fi
+
+status=0
+for test in "${tests[@]}"; do
+  echo "+++ :ruby: ${test}"
+  ruby "${test}" || status=1
+done
+
+exit "${status}"
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 6cb0910aa3fe..9ef4cd1b7e58 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -163,6 +163,13 @@ steps:
           - github_commit_status:
               context: "Verify String Catalog Coverage"
 
+      - label: ":test_tube: Localization Tooling Unit Tests"
+        command: .buildkite/commands/test-localization-tooling.sh
+        plugins: [$CI_TOOLKIT_PLUGIN]
+        notify:
+          - github_commit_status:
+              context: "Localization Tooling Unit Tests"
+
   #################
   # Claude Build Analysis - dynamically uploaded so Build result conditions evaluate at runtime after the wait
   #################

From bbd70ac795bf10d0898d0188fbdabceb535aca3b Mon Sep 17 00:00:00 2001
From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com>
Date: Fri, 26 Jun 2026 14:16:36 -0600
Subject: [PATCH 3/4] Wire AI plural translation into the reverse fold
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the ai_translate_plural -> nil stub with the AI tier. download_localized_plurals
now builds AITranslator.with_anthropic once (gated on ANTHROPIC_API_KEY; absent => the
prior English-fallback behavior, unchanged) and PluralStrings.fold_translations! invokes
it once per (key, locale) with the whole form-set via translate_plural — keeping one
consistent stem across forms — passing the human-translated forms as anchors.

The fold's ai_translator contract changes from per-cell to per-form-set; a per-set API
failure degrades to English (needs_review) instead of aborting the fold. Adds
plural_strings_helper_test.rb covering provenance and the form-set/anchors contract; the
nokogiri require is made lazy so that pure suite needs no gems (matches the no-bundle CI job).
---
 fastlane/lanes/localization_plurals.rb       |  38 +++--
 fastlane/lanes/plural_strings_helper.rb      |  60 +++++--
 fastlane/lanes/plural_strings_helper_test.rb | 162 +++++++++++++++++++
 3 files changed, 236 insertions(+), 24 deletions(-)
 create mode 100644 fastlane/lanes/plural_strings_helper_test.rb

diff --git a/fastlane/lanes/localization_plurals.rb b/fastlane/lanes/localization_plurals.rb
index 9cc9692fab89..7a8e1d0288a4 100644
--- a/fastlane/lanes/localization_plurals.rb
+++ b/fastlane/lanes/localization_plurals.rb
@@ -81,7 +81,7 @@
       catalog,
       categories_by_locale: categories_by_locale,
       translations_by_locale: plural_translations_by_locale(File.join(PROJECT_ROOT_FOLDER, 'WordPress', 'Resources')),
-      ai_translator: method(:ai_translate_plural)
+      ai_translator: plural_ai_translator
     )
     File.write(PLURALS_CATALOG, "#{JSON.pretty_generate(catalog)}\n")
     UI.message("Folded plural translations from Localizable.strings into #{File.basename(PLURALS_CATALOG)} (#{written} locale variations).")
@@ -138,13 +138,33 @@ def plural_translations_by_locale(dir)
     end
   end
 
-  # Machine-translation floor for the reverse fold: invoked for every plural slot with no human translation.
-  # Returns nil until wired to a translation service, leaving such slots to fall back to the English source
-  # (flagged needs_review). The named `category` + dev `note` let the prompt request the correct grammatical
-  # form (e.g. "give me the Polish *few* form of …").
-  # rubocop:disable Lint/UnusedMethodArgument -- keyword names are the documented call contract
-  def ai_translate_plural(id:, source:, category:, note:, locale:)
-    nil # TODO: call the translation service.
+  # The machine-translation tier for the reverse fold (the AI rung of the `human ?? AI ?? English` floor), or
+  # nil when ANTHROPIC_API_KEY isn't configured — in which case untranslated plural cells keep falling back to
+  # the English source (flagged needs_review), exactly as before this was wired. Built once and reused for
+  # every (key, locale) form-set.
+  #
+  # The returned callable matches PluralStrings.fold_translations!'s form-set contract and is wrapped to
+  # DEGRADE, not crash: a per-set API failure logs and returns {} so that one set falls back to English while
+  # the rest of the fold proceeds and commits (the whole reverse step is also guarded by `run_plural_step`).
+  # Going through `AITranslator#translate_plural` — the whole form-set in one request — keeps one consistent
+  # word/stem across the forms (a per-cell call lets the model drift between synonyms, e.g. Polish słowo →
+  # wyrazy → słów).
+  def plural_ai_translator
+    if ENV['ANTHROPIC_API_KEY'].to_s.empty?
+      UI.important('ANTHROPIC_API_KEY not set — skipping AI plural translation; untranslated plurals fall back to English (needs_review).')
+      return nil
+    end
+
+    require_relative 'ai_translator'
+    translator = AITranslator.with_anthropic
+    lambda do |english_forms:, categories:, locale:, note:, anchors:|
+      translator.translate_plural(english_forms: english_forms, categories: categories, locale: locale, note: note, anchors: anchors)
+    rescue StandardError => e
+      UI.error("AI plural translation failed for #{locale} (#{e.message}); falling back to English for this form-set.")
+      {}
+    end
+  rescue LoadError => e
+    UI.important("AI translation tier unavailable (#{e.message}); untranslated plurals fall back to English.")
+    nil
   end
-  # rubocop:enable Lint/UnusedMethodArgument
 end
diff --git a/fastlane/lanes/plural_strings_helper.rb b/fastlane/lanes/plural_strings_helper.rb
index 01d9ee8ee73f..2c17b1029646 100644
--- a/fastlane/lanes/plural_strings_helper.rb
+++ b/fastlane/lanes/plural_strings_helper.rb
@@ -1,7 +1,6 @@
 # frozen_string_literal: true
 
 require 'json'
-require 'nokogiri'
 
 # Logic for the String Catalog ⇄ GlotPress plural pipeline. Plain Ruby with no fastlane dependencies, so it's
 # unit-testable directly — the lanes in `localization_plurals.rb` call into it.
@@ -11,7 +10,7 @@
 # `<catalog-key>|==|plural.<cldr-category>` — the same id Apple's `xcodebuild -exportLocalizations`
 # uses. Translations fold back into the catalog JSON using a per-locale CLDR category map that the reverse
 # derives from Apple's exporter at fold time (a throwaway one-plural project — categories are a locale property).
-module PluralStrings
+module PluralStrings # rubocop:disable Metrics/ModuleLength -- one cohesive pipeline of small, single-purpose, individually-documented helpers
   XLIFF_NS = { 'x' => 'urn:oasis:names:tc:xliff:document:1.2' }.freeze
   INFIX = '|==|plural.'
   CLDR_ORDER = %w[zero one two few many other].freeze
@@ -88,6 +87,7 @@ def serialize_legacy_strings(entries)
   # Apple owns the truth; the reverse derives this at fold time from a throwaway-fixture export.
   # @return [Hash{String=>Array<String>}] locale => categories (CLDR order).
   def categories_by_locale_from_skeletons(xliff_paths)
+    require 'nokogiri' # only the exporter-skeleton path needs it; kept lazy so the pure fold has no gem dependency
     xliff_paths.each_with_object({}) do |path, acc|
       cats = Nokogiri::XML(File.read(path)).xpath('//x:trans-unit', XLIFF_NS).filter_map do |tu|
         id = tu['id'].to_s
@@ -101,8 +101,16 @@ def categories_by_locale_from_skeletons(xliff_paths)
   # variations — the inverse of `flat_originals`. For each plural key and target locale, emit exactly the
   # categories that locale needs (per `categories_by_locale`), filling each with `human ?? AI ?? English`.
   # Human cells are `translated`; AI / English-fallback cells are `needs_review` (machine output to re-check).
-  # `ai_translator` is optional and may return nil (the floor falls through to English). Mutates `catalog`;
-  # returns the count of (key, locale) variations written.
+  # Mutates `catalog`; returns the count of (key, locale) variations written.
+  #
+  # `ai_translator` (optional) is invoked ONCE per (key, locale) with the whole form-set — not per cell — so
+  # the model keeps one consistent stem across the forms; a per-category call lets it drift between synonyms
+  # (e.g. Polish słowo -> wyrazy -> słów). It is called as:
+  #   ai_translator.call(english_forms:, categories:, locale:, note:, anchors:) => { <cat> => translation }
+  # where `anchors` are the forms a human already translated (passed as fixed context to stay consistent with,
+  # and excluded from what's asked for). It may return nil / {} or omit any category — those cells fall through
+  # to English. `AITranslator#translate_plural` implements this contract directly, so wiring the live tier is
+  # `ai_translator: translator.method(:translate_plural)`.
   #
   # @param categories_by_locale [Hash{String=>Array<String>}] locale => CLDR categories it needs
   # @param translations_by_locale [Hash{String=>Hash{String=>String}}] locale => { "<key>|==|plural.<cat>" => value }
@@ -134,23 +142,45 @@ def cldr_sort(categories)
   end
   private_class_method :cldr_sort
 
-  # One locale's plural variation hash: { 'variations' => { 'plural' => { <cat> => stringUnit } } }.
+  # One locale's plural variation hash: { 'variations' => { 'plural' => { <cat> => stringUnit } } }. Resolve the
+  # English and human forms first, ask the AI tier (once, whole form-set) for whatever's still missing, then
+  # write each cell as human ?? AI ?? English.
   def plural_variation(entry, cats, human, ai_translator, locale)
-    forms = cats.to_h { |cat| [cat, fold_cell(entry, cat, human, ai_translator, locale)] }
+    english_forms = english_forms_for(entry.plural, cats)
+    human_forms = human_forms_for(entry.key, cats, human)
+    ai_forms = ai_translator.nil? ? {} : ai_translator.call(english_forms: english_forms, categories: cats, locale: locale, note: entry.comment, anchors: human_forms) || {}
+
+    forms = cats.to_h { |cat| [cat, fold_cell(cat, human_forms, ai_forms, english_forms)] }
     { 'variations' => { 'plural' => forms } }
   end
   private_class_method :plural_variation
 
-  # One target stringUnit for (entry, cat, locale): human ?? AI ?? English source; state reflects provenance
-  # (human => translated; AI / English fallback => needs_review).
-  def fold_cell(entry, cat, human, ai_translator, locale)
-    id = "#{entry.key}#{INFIX}#{cat}"
-    human_value = human[id]
-    return cell('translated', human_value) unless human_value.to_s.empty?
+  # English value per needed category — the form's own English, or the `other` value for categories English
+  # doesn't itself distinguish (zero/two/few/many). CLDR guarantees `other`, so it's always present.
+  def english_forms_for(plural, cats)
+    other = plural.dig('other', 'stringUnit', 'value')
+    cats.to_h { |cat| [cat, plural.dig(cat, 'stringUnit', 'value') || other] }
+  end
+  private_class_method :english_forms_for
+
+  # Human (GlotPress) translations present for this key, keyed by CLDR category. These ship as `translated` and
+  # double as the AI request's anchors so the machine-filled forms stay consistent with the human's word choice.
+  def human_forms_for(key, cats, human)
+    cats.each_with_object({}) do |cat, acc|
+      value = human["#{key}#{INFIX}#{cat}"]
+      acc[cat] = value unless value.to_s.empty?
+    end
+  end
+  private_class_method :human_forms_for
+
+  # One target stringUnit for a category: human ?? AI ?? English; state reflects provenance (human =>
+  # translated; AI / English fallback => needs_review, i.e. machine output to re-check).
+  def fold_cell(cat, human_forms, ai_forms, english_forms)
+    human = human_forms[cat]
+    return cell('translated', human) unless human.to_s.empty?
 
-    english = entry.plural.dig(cat, 'stringUnit', 'value') || entry.plural.dig('other', 'stringUnit', 'value')
-    ai = ai_translator&.call(id: id, source: english, category: cat, note: entry.comment, locale: locale)
-    cell('needs_review', ai.to_s.empty? ? english : ai)
+    ai = ai_forms[cat]
+    cell('needs_review', ai.to_s.empty? ? english_forms[cat] : ai)
   end
   private_class_method :fold_cell
 
diff --git a/fastlane/lanes/plural_strings_helper_test.rb b/fastlane/lanes/plural_strings_helper_test.rb
new file mode 100644
index 000000000000..25911eec026a
--- /dev/null
+++ b/fastlane/lanes/plural_strings_helper_test.rb
@@ -0,0 +1,162 @@
+# frozen_string_literal: true
+
+# Pure-Ruby unit suite for PluralStrings.fold_translations! — the reverse fold that folds downloaded plural
+# translations back into the String Catalog with the `human ?? AI ?? English` floor. Run directly:
+# `ruby fastlane/lanes/plural_strings_helper_test.rb`. No bundle / network (the AI tier is a stub lambda).
+require 'minitest/autorun'
+require_relative 'plural_strings_helper'
+
+# Exercises provenance (human => translated; AI / English fallback => needs_review) and the form-set contract:
+# the AI tier is called ONCE per (key, locale) with the whole set of needed categories and the human forms as
+# anchors — never per cell.
+class PluralStringsFoldTest < Minitest::Test # rubocop:disable Metrics/ClassLength -- exhaustive scenario coverage
+  KEY = 'posts.count'
+  INFIX = PluralStrings::INFIX
+
+  def unit(state, value)
+    { 'stringUnit' => { 'state' => state, 'value' => value } }
+  end
+
+  # A catalog with one English plural (one/other). `extra` adds sibling entries (e.g. a non-plural string).
+  def catalog(extra: {})
+    {
+      'sourceLanguage' => 'en',
+      'version' => '1.0',
+      'strings' => {
+        KEY => {
+          'comment' => 'Number of posts.',
+          'localizations' => { 'en' => { 'variations' => { 'plural' => {
+            'one' => unit('translated', '%lld post'),
+            'other' => unit('translated', '%lld posts')
+          } } } }
+        }
+      }.merge(extra)
+    }
+  end
+
+  # The full stringUnit wrapper a fold wrote for (locale, category) of the plural key under test.
+  def cell(cat, catalog:, locale:)
+    catalog.dig('strings', KEY, 'localizations', locale, 'variations', 'plural', cat)
+  end
+
+  # An AI stub returning `reply`, recording every call's kwargs so the form-set contract can be asserted.
+  def recording_translator(reply:, calls:)
+    lambda do |english_forms:, categories:, locale:, note:, anchors:|
+      calls << { english_forms: english_forms, categories: categories, locale: locale, note: note, anchors: anchors }
+      reply
+    end
+  end
+
+  def fold(cat, categories_by_locale:, translations_by_locale: {}, ai_translator: nil)
+    PluralStrings.fold_translations!(cat, categories_by_locale: categories_by_locale, translations_by_locale: translations_by_locale, ai_translator: ai_translator)
+  end
+
+  # Polish needs four categories but only `one` is human-translated — the setup the form-set contract is about.
+  # Folds with the supplied AI reply and returns [catalog, recorded_calls].
+  def fold_polish(reply:)
+    cat = catalog
+    calls = []
+    fold(cat,
+         categories_by_locale: { 'pl' => %w[one few many other] },
+         translations_by_locale: { 'pl' => { "#{KEY}#{INFIX}one" => '%lld wpis' } },
+         ai_translator: recording_translator(reply: reply, calls: calls))
+    [cat, calls]
+  end
+
+  POLISH_AI = { 'few' => '%lld wpisy', 'many' => '%lld wpisów', 'other' => '%lld wpisu' }.freeze
+
+  def test_human_translation_wins_and_is_marked_translated
+    cat = catalog
+    written = fold(cat, categories_by_locale: { 'fr' => %w[one other] }, translations_by_locale: {
+                     'fr' => { "#{KEY}#{INFIX}one" => '%lld article', "#{KEY}#{INFIX}other" => '%lld articles' }
+                   })
+
+    assert_equal 1, written
+    assert_equal unit('translated', '%lld article'), cell('one', catalog: cat, locale: 'fr')
+    assert_equal unit('translated', '%lld articles'), cell('other', catalog: cat, locale: 'fr')
+  end
+
+  def test_english_fallback_when_no_human_and_no_ai
+    cat = catalog
+    fold(cat, categories_by_locale: { 'fr' => %w[one other] })
+
+    # No human, no AI tier wired: each cell falls through to the English source, flagged for review.
+    assert_equal unit('needs_review', '%lld post'), cell('one', catalog: cat, locale: 'fr')
+    assert_equal unit('needs_review', '%lld posts'), cell('other', catalog: cat, locale: 'fr')
+  end
+
+  def test_ai_fills_missing_cells_and_marks_needs_review
+    cat = catalog
+    ai = recording_translator(reply: { 'one' => '%lld article', 'other' => '%lld articles' }, calls: [])
+    fold(cat, categories_by_locale: { 'fr' => %w[one other] }, ai_translator: ai)
+
+    assert_equal unit('needs_review', '%lld article'), cell('one', catalog: cat, locale: 'fr')
+    assert_equal unit('needs_review', '%lld articles'), cell('other', catalog: cat, locale: 'fr')
+  end
+
+  def test_formset_call_carries_english_forms_anchors_and_note
+    _cat, calls = fold_polish(reply: POLISH_AI)
+
+    assert_equal 1, calls.size, 'expected a single form-set call, not one per category'
+    call = calls.first
+    assert_equal %w[one few many other], call[:categories]
+    assert_equal 'pl', call[:locale]
+    assert_equal 'Number of posts.', call[:note]
+    assert_equal({ 'one' => '%lld wpis' }, call[:anchors])
+    # few/many/other have no English form of their own, so they fall back to the English `other` value.
+    assert_equal({ 'one' => '%lld post', 'few' => '%lld posts', 'many' => '%lld posts', 'other' => '%lld posts' }, call[:english_forms])
+  end
+
+  def test_formset_result_merges_human_and_ai_by_provenance
+    cat, = fold_polish(reply: POLISH_AI)
+
+    assert_equal unit('translated', '%lld wpis'), cell('one', catalog: cat, locale: 'pl') # human
+    assert_equal unit('needs_review', '%lld wpisy'), cell('few', catalog: cat, locale: 'pl') # AI
+    assert_equal unit('needs_review', '%lld wpisów'), cell('many', catalog: cat, locale: 'pl')
+    assert_equal unit('needs_review', '%lld wpisu'), cell('other', catalog: cat, locale: 'pl')
+  end
+
+  def test_ai_omitted_category_falls_back_to_english
+    cat = catalog
+    ai = recording_translator(reply: { 'one' => '%lld Beitrag' }, calls: []) # 'other' omitted
+    fold(cat, categories_by_locale: { 'de' => %w[one other] }, ai_translator: ai)
+
+    assert_equal unit('needs_review', '%lld Beitrag'), cell('one', catalog: cat, locale: 'de')
+    assert_equal unit('needs_review', '%lld posts'), cell('other', catalog: cat, locale: 'de') # English fallback
+  end
+
+  def test_ai_nil_return_falls_back_to_english
+    cat = catalog
+    fold(cat, categories_by_locale: { 'de' => %w[one other] }, ai_translator: ->(**) {}) # declines entirely (nil)
+
+    assert_equal unit('needs_review', '%lld post'), cell('one', catalog: cat, locale: 'de')
+    assert_equal unit('needs_review', '%lld posts'), cell('other', catalog: cat, locale: 'de')
+  end
+
+  def test_source_locale_is_not_folded
+    cat = catalog
+    original_en = cat.dig('strings', KEY, 'localizations', 'en')
+    written = fold(cat, categories_by_locale: { 'en' => %w[one other], 'fr' => %w[one other] })
+
+    assert_equal 1, written, 'the source locale must be excluded from the fold'
+    assert_same original_en, cat.dig('strings', KEY, 'localizations', 'en'), 'source localization left untouched'
+    refute_nil cell('one', catalog: cat, locale: 'fr')
+  end
+
+  def test_non_plural_entries_are_skipped
+    extra = { 'app.title' => { 'localizations' => { 'en' => unit('translated', 'WordPress') } } }
+    cat = catalog(extra: extra)
+    written = fold(cat, categories_by_locale: { 'fr' => %w[one other] })
+
+    assert_equal 1, written, 'only the plural entry is counted'
+    # The non-plural entry is left exactly as it was — no `fr` localization invented for it.
+    assert_equal({ 'en' => unit('translated', 'WordPress') }, cat.dig('strings', 'app.title', 'localizations'))
+  end
+
+  def test_counts_variations_across_locales
+    cat = catalog
+    written = fold(cat, categories_by_locale: { 'fr' => %w[one other], 'de' => %w[one other] })
+
+    assert_equal 2, written
+  end
+end

From 068fe82000dc0fd5fbe6f4a9d442a8a4bc6202d0 Mon Sep 17 00:00:00 2001
From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com>
Date: Fri, 26 Jun 2026 15:10:02 -0600
Subject: [PATCH 4/4] Document the translation pipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds docs/localization-pipeline.md: the GlotPress + AI round trip, the human ?? AI ?? English
floor, the AI tier (gating, placeholder gate, form-set plurals), and why regular-string MT is
deferred to the String Catalog cutover — machine translations only ship from a state-bearing
store (the catalog's needs_review), never from the live legacy .strings. Linked from AGENTS.md.
---
 AGENTS.md                     |  2 +-
 docs/localization-pipeline.md | 81 +++++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 docs/localization-pipeline.md

diff --git a/AGENTS.md b/AGENTS.md
index 698aeb6be1cb..688642d6f44e 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -46,7 +46,7 @@ WordPress-iOS uses a modular architecture with the main app and separate Swift p
 ### Important Considerations
 - **Multi-site Support**: Code must handle both WordPress.com and self-hosted sites
 - **Accessibility**: Use proper accessibility labels and traits
-- **Localization**: follow best practices from @docs/localization.md
+- **Localization**: follow best practices from @docs/localization.md. For how strings flow through GlotPress and the AI translation tier (the `human ?? AI ?? English` floor), see @docs/localization-pipeline.md.
 
 ## Xcode Schemes
 - `WordPress` builds the WordPress iOS app and runs `WordPressUnitTests.xctestplan` — default for builds and the full unit test suite. Use this scheme to run unit tests.
diff --git a/docs/localization-pipeline.md b/docs/localization-pipeline.md
new file mode 100644
index 000000000000..691f0c9e2204
--- /dev/null
+++ b/docs/localization-pipeline.md
@@ -0,0 +1,81 @@
+# Localization translation pipeline
+
+How user-facing strings get from English source into every shipped locale. This is the **release/tooling** view (the fastlane lanes under `fastlane/lanes/`); for how to *write* localizable strings in app code, see [localization.md](./localization.md).
+
+> The contract for every shipped string is **`human ?? AI ?? English`**: a human (GlotPress) translation if one exists, otherwise a machine translation, otherwise the English source. Nothing ships a broken placeholder — machine output that fails the format-specifier gate falls back to English.
+
+## The round trip
+
+Strings make two trips, both driven from fastlane.
+
+### Forward (code freeze) — English → GlotPress
+
+Run as part of `complete_code_freeze` (`generate_strings_file_for_glotpress`):
+
+- **Regular strings** are extracted from source (`ios_generate_strings_file_from_code`, i.e. `genstrings` over `NSLocalizedString` / `AppLocalizedString`) into `WordPress/Resources/en.lproj/Localizable.strings`, then the manually-maintained `.strings` files are merged in. These English originals are uploaded to the [apps/ios GlotPress project](https://translate.wordpress.org/projects/apps/ios/dev/).
+- **Plurals** are authored in `WordPress/Classes/Plurals.xcstrings` (English `one`/`other`). The forward lane (`generate_plural_strings_for_glotpress`) flattens each plural form into an independent string keyed `<key>|==|plural.<cldr-category>` and merges those originals into the same `Localizable.strings`, so they ride the same GlotPress project as everything else.
+
+Translators then do their work in GlotPress.
+
+### Reverse (release prep) — GlotPress → app
+
+`download_localized_strings` (called by `complete_code_freeze` / `finalize_release`) runs, in order:
+
+1. **Download** each locale's `Localizable.strings` from GlotPress (`ios_download_strings_files_from_glotpress`) into `WordPress/Resources/<locale>.lproj/`, and commit. The export filter is `status: current`, so **only translated strings come back** — untranslated ones are *omitted entirely* (not emitted as empty values; the action even errors if it finds an empty value). This is why `pl` ships ~1,650 of ~4,280 keys while `fr` ships ~all of them.
+2. **Re-dispatch** the relevant subset back to the manually-maintained `.strings` files (`ios_extract_keys_from_strings_files`), and commit.
+3. **Plural fold** (`download_localized_plurals`): pull the flat plural translations back out of the downloaded `Localizable.strings`, fold them into `Plurals.xcstrings`, and fill the gaps with the AI tier (below).
+
+Step 3 runs via `run_plural_step`, which logs and continues on failure — the AI tier can never break a release.
+
+## The AI tier
+
+The machine-translation rung of the floor. It is **injected and gated**, never mandatory:
+
+- **Gate**: `ANTHROPIC_API_KEY`. Absent ⇒ the AI tier is skipped entirely and untranslated cells keep their English fallback — i.e. exactly the pre-AI behavior. Providing the key (e.g. in the release environment) is what turns it on.
+- **Placeholder gate**: every machine cell must preserve the source's `printf`/`NSString` format specifiers exactly (count + type; positional `%1$@` may reorder). A mismatch is rejected and the cell falls back to English. So the AI tier can only ever produce a *safe* translation or nothing.
+- **Model**: `claude-opus-4-8` by default (see `AITranslator::DEFAULT_MODEL`).
+
+The reusable primitives live in `fastlane/lanes/`: `AITranslator` (prompt building + validation; `translate` / `translate_plural` / `translate_all` / the async Message-Batches path), `TranslationValidator` (the placeholder gate), `Glossary` (brand do-not-translate list + per-locale terms), and `AnthropicBatch` (SDK glue). All the logic is pure and unit-tested with a canned-reply lambda; only `AITranslator.with_anthropic` touches the network.
+
+## What's wired today: plurals
+
+The plural reverse-fold (`PluralStrings.fold_translations!`) fills each `(key, locale)` cell of `Plurals.xcstrings` as `human ?? AI ?? English` — human ⇒ `translated`; AI / English ⇒ `needs_review`. The AI tier is called **once per `(key, locale)` form-set** (`AITranslator#translate_plural`), not per cell, with the already-human-translated forms passed as **anchors**. Translating the whole set in one request keeps a single consistent stem across the forms — a per-category call lets the model drift between synonyms (Polish `słowo` → `wyrazy` → `słów`), which it structurally can't prevent.
+
+**`Plurals.xcstrings` is a String Catalog, which is why this works**: the catalog carries a real `needs_review` state, so a machine cell is recorded as machine output and a human translation supersedes it on the next download.
+
+> **This does not ship machine translations yet.** `Plurals.xcstrings` is built into the app but **not consumed at runtime** — no code reads from it; the app still renders plurals the legacy way. The fold *pre-populates* the catalog so it's ready when plurals cut over to it. Until that cutover, the AI plural translations sit in the catalog unused.
+
+## What's deferred: regular strings
+
+Regular (non-plural) strings are **not** machine-translated, by design. The app still ships the legacy `WordPress/Resources/<locale>.lproj/Localizable.strings` for them — `Localizable.xcstrings` (`generate_strings_catalog`) is generated as the future backing store but isn't the runtime store yet. A machine translation written into the legacy `.strings` would be **live immediately**, and we don't want machine-translated regular strings shipping before the catalog cutover.
+
+So regular-string MT waits for the same shape as plurals: once `Localizable.xcstrings` becomes the runtime store, a regular-string **catalog reverse-fold** folds the human translations in and AI-fills the `needs_review` gaps, staged in the catalog (not shipped) until cutover — exactly as the plural fold does today.
+
+When that's built, two facts established here will carry over:
+
+- **"Undefined by GlotPress" = absent**, not empty. The export omits untranslated strings (`status: current`; verified no empty-valued entries), so absence is the untranslated signal.
+- **Humans always supersede MT**, and machine output never returns to GlotPress — so there's no translation-memory pollution and no manual reconciliation, as long as MT lives in a state-bearing store (the catalog's `needs_review`).
+
+## Why these choices
+
+- **Why translate whole plural form-sets at once?** Per-category calls let the model pick different synonyms for different forms of the same word. One request for the whole set, with human forms as anchors, keeps one stem.
+- **Why is the AI tier gated and non-fatal?** Cost and safety: it runs only where a key is configured, and a failure logs and continues rather than breaking a release.
+- **Why does regular-string MT need the catalog, not legacy `.strings`?** The catalog's `needs_review` state lets a machine translation be *staged* (built but not shipped until cutover) and lets humans supersede it automatically. Legacy `.strings` has no state and is live, so anything written there ships immediately — which is exactly what we don't want before cutover.
+
+## Operational notes
+
+- **Eyeball one string against the live model** (needs `ANTHROPIC_API_KEY` + `bundle install`):
+  `ruby fastlane/lanes/ai_translator.rb fr "You have %1$d new posts" "Notification text. %1$d is the count."`
+- **Tests** are pure stdlib minitest and run in CI (`.buildkite/commands/test-localization-tooling.sh`): `ruby fastlane/lanes/*_test.rb`.
+
+## Code map
+
+| Concern | File |
+| --- | --- |
+| Translation tier (prompts, validation, `translate*`) | `fastlane/lanes/ai_translator.rb` |
+| Placeholder safety gate | `fastlane/lanes/translation_validator.rb` |
+| Brand do-not-translate + per-locale terms | `fastlane/lanes/translation_glossary.rb` |
+| Anthropic SDK glue + Message Batches | `fastlane/lanes/anthropic_batch.rb` |
+| Plural fold (`Localizable.strings` ⇄ `Plurals.xcstrings`) + AI wiring | `fastlane/lanes/plural_strings_helper.rb`, `fastlane/lanes/localization_plurals.rb` |
+| Catalog generation (future regular-string backing store) | `fastlane/lanes/localization_catalog.rb` |
+| Download/upload orchestration | `fastlane/lanes/localization.rb` |