From e384da91ebba6d8679f6c8e16270d72bdc72e9ee Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Fri, 19 Jun 2026 08:59:52 -0600 Subject: [PATCH 1/5] Add localization placeholder-compatibility guardrail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Part of "Faster Releases" Phase 2 (continuous translations). A Fastlane helper (`StringPlaceholders`) that flags any localization key present in both the old and the new English source whose placeholder shape — count, position, or argument type — changed. New and removed keys are ignored, since copy that needs a fresh translation is expected to land under a new key. Exposed as the `validate_string_placeholders` lane, and reused to validate that AI-backfilled translations preserve their source's placeholders. Enforces the invariant the continuous-translation model relies on: never reuse a key for placeholder-incompatible copy. --- fastlane/Fastfile | 2 + fastlane/helpers/string_placeholders.rb | 93 +++++++++++++++++++++++++ fastlane/lanes/localization.rb | 31 +++++++++ 3 files changed, 126 insertions(+) create mode 100644 fastlane/helpers/string_placeholders.rb diff --git a/fastlane/Fastfile b/fastlane/Fastfile index c2872595ffd2..c7c34db56816 100644 --- a/fastlane/Fastfile +++ b/fastlane/Fastfile @@ -5,6 +5,8 @@ fastlane_require 'dotenv' fastlane_require 'open-uri' fastlane_require 'git' +require_relative 'helpers/string_placeholders' + UI.user_error!('Please run fastlane via `bundle exec`') unless FastlaneCore::Helper.bundler? ######################################################################## diff --git a/fastlane/helpers/string_placeholders.rb b/fastlane/helpers/string_placeholders.rb new file mode 100644 index 000000000000..8bc00e1f320f --- /dev/null +++ b/fastlane/helpers/string_placeholders.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require 'json' +require 'open3' + +# Compares the placeholder "shape" — the count, position, and argument type of +# the format specifiers — of localized strings. +# +# Used in two places in the "Faster Releases" RFC, Phase 2 (continuous +# translations): +# 1. The localization guardrail: an existing key's English value must not +# change its placeholders without getting a new key, or existing +# translations would silently break. +# 2. Validating AI-backfilled translations: a machine translation that drops +# or reorders a `%@` / `%1$d` must be rejected rather than shipped. +module StringPlaceholders + # printf / NSString conversion characters grouped by the argument type a + # translator must preserve. `%d` <-> `%i` is fine (same int arg); `%d` <-> `%@` + # is not (int vs object). + CONVERSION_CLASSES = { + '@' => 'object', + 'd' => 'int', 'i' => 'int', 'u' => 'int', 'o' => 'int', 'x' => 'int', 'X' => 'int', + 'f' => 'float', 'e' => 'float', 'E' => 'float', 'g' => 'float', 'G' => 'float', 'a' => 'float', 'A' => 'float', + 'c' => 'char', 'C' => 'char', + 's' => 'cstring', 'S' => 'cstring', + 'p' => 'pointer' + }.freeze + + # A single format specifier: optional positional arg (`1$`), flags, width, + # precision, length modifier, then the conversion character. `%%` (literal + # percent) is matched too, so it can be explicitly skipped. + SPECIFIER = /%(?\d+\$)?[-+ 0#]*(?:\d+|\*)?(?:\.(?:\d+|\*))?(?:hh|h|ll|l|q|L|z|t|j)?(?[@diouxXeEfgGaAcCsSpn%])/ + + module_function + + # Parses a `.strings` file into a `{ key => value }` hash using `plutil` + # (`.strings` is an old-style property list, and `plutil` is the most reliable + # parser for it — handling escapes, comments, and Unicode). + def parse_file(path) + raise "File not found: #{path}" unless File.exist?(path) + + json, stderr, status = Open3.capture3('plutil', '-convert', 'json', '-o', '-', path) + raise "Failed to parse #{path} with plutil:\n#{stderr}" unless status.success? + + JSON.parse(json) + end + + # A canonical signature of the placeholders in a string value, or '' if there + # are none. Two values with the same signature are placeholder-compatible. + def signature(value) + specifiers = [] + value.to_s.scan(SPECIFIER) do + match = Regexp.last_match + conversion = match[:conversion] + next if conversion == '%' # literal percent, not a placeholder + + position = match[:position]&.delete('$')&.to_i + specifiers << { position: position, klass: CONVERSION_CLASSES.fetch(conversion, conversion) } + end + + return '' if specifiers.empty? + + if specifiers.all? { |s| s[:position] } + # Positional args (`%1$@`): compare the position -> type mapping. + specifiers.sort_by { |s| s[:position] }.map { |s| "#{s[:position]}:#{s[:klass]}" }.join(',') + elsif specifiers.none? { |s| s[:position] } + # Non-positional args: compare types in order of appearance. + specifiers.map { |s| s[:klass] }.join(',') + else + # Mixed positional/non-positional is unusual; be conservative and keep both. + specifiers.map { |s| "#{s[:position] || '_'}:#{s[:klass]}" }.join(',') + end + end + + # Whether two string values share the same placeholder shape. + def compatible?(old_value, new_value) + signature(old_value) == signature(new_value) + end + + # Given two `{ key => value }` hashes, returns the keys present in BOTH whose + # placeholder signature changed, as an array of detail hashes. New and removed + # keys are ignored on purpose — copy that needs a fresh translation is expected + # to land under a new key (which shows up as remove-old + add-new). + def incompatible_changes(old_strings, new_strings) + (old_strings.keys & new_strings.keys).sort.filter_map do |key| + old_signature = signature(old_strings[key]) + new_signature = signature(new_strings[key]) + next if old_signature == new_signature + + { key: key, old: old_strings[key], new: new_strings[key], old_signature: old_signature, new_signature: new_signature } + end + end +end diff --git a/fastlane/lanes/localization.rb b/fastlane/lanes/localization.rb index e29a0f9c8876..79928c381406 100644 --- a/fastlane/lanes/localization.rb +++ b/fastlane/lanes/localization.rb @@ -203,6 +203,37 @@ def generate_strings_file(gutenberg_path:, derived_data_path:) ) end + # Fails if any key present in BOTH the old and the newly-generated English + # source changed its placeholder shape (count / position / argument type). + # New and removed keys are ignored — copy that needs a fresh translation is + # expected to land under a new key. See the `StringPlaceholders` helper. + # + # This enforces, as a check, the invariant the continuous-translation model + # relies on: never reuse a key for placeholder-incompatible copy. + # + # @param [String] old Path to the previous `.strings` file. + # @param [String] new Path to the newly-generated `.strings` file. + # + desc 'Validate that no localized key changed its placeholders incompatibly' + lane :validate_string_placeholders do |old:, new:| + violations = StringPlaceholders.incompatible_changes( + StringPlaceholders.parse_file(old), + StringPlaceholders.parse_file(new) + ) + + if violations.empty? + UI.success('No incompatible placeholder changes.') + next + end + + violations.each do |violation| + UI.error(violation[:key]) + UI.error(" was: #{violation[:old].inspect} [#{violation[:old_signature].empty? ? 'none' : violation[:old_signature]}]") + UI.error(" now: #{violation[:new].inspect} [#{violation[:new_signature].empty? ? 'none' : violation[:new_signature]}]") + end + UI.user_error!("#{violations.size} key(s) changed placeholders incompatibly. Give changed copy a new key so existing translations stay valid.") + end + # Updates the `AppStoreStrings.po` files (WP+JP) with the latest content from the `release_notes.txt` files and the other text sources. # # @param [String] version The current `x.y` version of the app. Used to derive the `release_notes_xxy` key to use in the `.po` file. From cfaa04a9accb4b8ae86b7ce2f53508c4560c8509 Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Fri, 19 Jun 2026 10:21:05 -0600 Subject: [PATCH 2/5] Add AI backfill for untranslated strings Part of "Faster Releases" Phase 2 (continuous translations). The daily sync runs this so the app never ships an untranslated string. - `backfill_missing_translations` lane: for each locale, translate any key that exists in the English base but is missing from the locale, validate that each result preserves its placeholders, and append it to the locale's `Localizable.strings` under a marker comment. - `AITranslator` helper wraps the Anthropic API (sonnet-4-6, batched, JSON responses) and drops any translation whose placeholders don't match the source. - Adds the `anthropic` gem. Human translations from GlotPress overwrite these on the next sync; the AI output is never uploaded to GlotPress. --- Gemfile | 2 + Gemfile.lock | 10 ++- fastlane/Fastfile | 1 + fastlane/helpers/ai_translator.rb | 95 ++++++++++++++++++++++++++ fastlane/lanes/localization.rb | 110 ++++++++++++++++++++++++++++++ 5 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 fastlane/helpers/ai_translator.rb diff --git a/Gemfile b/Gemfile index fa6bc472ba4c..e32c326374b5 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,8 @@ source 'https://rubygems.org' +# Used to AI-translate still-untranslated strings during the daily translation sync. +gem 'anthropic' gem 'danger-dangermattic', '~> 1.3' gem 'dotenv' # 2.223.1 includes a fix for an ASC-interfacing issue diff --git a/Gemfile.lock b/Gemfile.lock index d8b47a9c428b..b80291c95956 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -5,6 +5,10 @@ GEM abbrev (0.1.2) addressable (2.9.0) public_suffix (>= 2.0.2, < 8.0) + anthropic (1.49.0) + cgi + connection_pool + standardwebhooks artifactory (3.0.17) ast (2.4.3) atomos (0.1.3) @@ -33,6 +37,7 @@ GEM bigdecimal (4.1.2) buildkit (1.6.1) sawyer (>= 0.6) + cgi (0.5.1) chroma (0.2.0) claide (1.1.0) claide-plugins (0.9.2) @@ -43,6 +48,7 @@ GEM colored2 (3.1.2) commander (4.6.0) highline (~> 2.0.0) + connection_pool (3.0.2) cork (0.3.0) colored2 (~> 3.1) csv (3.3.5) @@ -348,6 +354,7 @@ GEM CFPropertyList naturally singleton (0.3.0) + standardwebhooks (1.0.1) terminal-notifier (2.0.0) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) @@ -376,6 +383,7 @@ PLATFORMS ruby DEPENDENCIES + anthropic danger-dangermattic (~> 1.3) dotenv fastlane (~> 2.236) @@ -388,4 +396,4 @@ DEPENDENCIES rubocop-rake (~> 0.7) BUNDLED WITH - 2.4.22 + 2.6.8 diff --git a/fastlane/Fastfile b/fastlane/Fastfile index c7c34db56816..f776e23d652f 100644 --- a/fastlane/Fastfile +++ b/fastlane/Fastfile @@ -6,6 +6,7 @@ fastlane_require 'open-uri' fastlane_require 'git' require_relative 'helpers/string_placeholders' +require_relative 'helpers/ai_translator' UI.user_error!('Please run fastlane via `bundle exec`') unless FastlaneCore::Helper.bundler? diff --git a/fastlane/helpers/ai_translator.rb b/fastlane/helpers/ai_translator.rb new file mode 100644 index 000000000000..f3f45743b3e4 --- /dev/null +++ b/fastlane/helpers/ai_translator.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require 'json' + +# Translates UI strings with Claude. Used by the daily translation sync to +# backfill locales that GlotPress hasn't fully translated yet, so the app never +# ships an untranslated string. Human translations from GlotPress overwrite +# these on the next sync; the AI output is never pushed back to GlotPress. +# +# See the "Faster Releases" RFC, Phase 2 (continuous translations). +module AITranslator + # Matches the Claude model already used elsewhere in CI (`.buildkite/claude-analysis.yml`). + MODEL = :"claude-sonnet-4-6" + # Keep batches small so each request's JSON response stays well under the + # non-streaming token ceiling and a single failure costs little to retry. + BATCH_SIZE = 40 + MAX_TOKENS = 8192 + + module_function + + # Translates a set of strings, dropping any result whose placeholders don't + # match the source (we never ship a translation that would break a `%@`). + # + # @param strings [Hash] `{ key => english_value }` to translate. + # @param language_code [String] the `.lproj` locale code, e.g. `pt-BR`. + # @param language_name [String] a human language name for the prompt, e.g. `Brazilian Portuguese`. + # @return [Hash] `{ key => translation }` for entries that passed validation. + def translate(strings:, language_code:, language_name:) + return {} if strings.empty? + + # Required late so loading the Fastfile doesn't depend on the gem being + # installed — only this lane needs it. + require 'anthropic' + client = Anthropic::Client.new # reads ANTHROPIC_API_KEY from the environment + + strings.each_slice(BATCH_SIZE).each_with_object({}) do |batch, translations| + batch_hash = batch.to_h + translate_batch(client: client, strings: batch_hash, language_code: language_code, language_name: language_name).each do |key, translation| + english = batch_hash[key] + next if english.nil? || translation.nil? || translation.to_s.empty? + + unless StringPlaceholders.compatible?(english, translation) + UI.message("Dropping #{language_code} translation for '#{key}' — placeholders changed") + next + end + + translations[key] = translation + end + end + end + + def translate_batch(client:, strings:, language_code:, language_name:) + message = client.messages.create( + model: MODEL, + max_tokens: MAX_TOKENS, + messages: [{ role: 'user', content: prompt_for(strings: strings, language_code: language_code, language_name: language_name) }] + ) + + text = message.content.filter_map { |block| block.text if block.type == :text }.join + parse_json_object(text) + rescue StandardError => e + # A best-effort backfill must never crash the daily job. Skip this batch + # (those strings stay untranslated for now) and move on. + UI.error("Claude translation request failed for #{language_code}: #{e.message}") + {} + end + + def prompt_for(strings:, language_code:, language_name:) + <<~PROMPT + Translate these iOS app UI strings from English to #{language_name} (locale code `#{language_code}`). + + Rules: + - Preserve EVERY format specifier exactly: `%@`, `%1$@`, `%2$d`, `%%`, etc. Keep the same count, the same order, and the same positional indices (the `$` numbers). + - Preserve leading and trailing whitespace and the surrounding punctuation style. + - Keep translations concise and natural for a mobile UI. + - Return ONLY a JSON object mapping each original key to its translation — no prose, no markdown, no code fences. + + Strings to translate (JSON object, key → English source): + #{JSON.pretty_generate(strings)} + PROMPT + end + + # Extracts the JSON object from the model's response, tolerating any stray + # prose or code fences despite the prompt asking for raw JSON. + def parse_json_object(text) + json = text[/\{.*\}/m] + return {} if json.nil? + + parsed = JSON.parse(json) + parsed.is_a?(Hash) ? parsed : {} + rescue JSON::ParserError => e + UI.error("Could not parse Claude response as JSON: #{e.message}") + {} + end +end diff --git a/fastlane/lanes/localization.rb b/fastlane/lanes/localization.rb index 79928c381406..52312aaea271 100644 --- a/fastlane/lanes/localization.rb +++ b/fastlane/lanes/localization.rb @@ -53,6 +53,44 @@ 'zh-tw' => 'zh-Hant' # Chinese (Taiwan) }.freeze +# Human-readable language names (keyed by `.lproj` code) used when asking Claude +# to backfill missing translations. See `backfill_missing_translations`. +LANGUAGE_NAMES = { + 'ar' => 'Arabic', + 'bg' => 'Bulgarian', + 'cs' => 'Czech', + 'cy' => 'Welsh', + 'da' => 'Danish', + 'de' => 'German', + 'en-AU' => 'English (Australia)', + 'en-CA' => 'English (Canada)', + 'en-GB' => 'English (UK)', + 'es' => 'Spanish', + 'fr' => 'French', + 'he' => 'Hebrew', + 'hr' => 'Croatian', + 'hu' => 'Hungarian', + 'id' => 'Indonesian', + 'is' => 'Icelandic', + 'it' => 'Italian', + 'ja' => 'Japanese', + 'ko' => 'Korean', + 'nb' => 'Norwegian Bokmål', + 'nl' => 'Dutch', + 'pl' => 'Polish', + 'pt' => 'Portuguese', + 'pt-BR' => 'Brazilian Portuguese', + 'ro' => 'Romanian', + 'ru' => 'Russian', + 'sk' => 'Slovak', + 'sq' => 'Albanian', + 'sv' => 'Swedish', + 'th' => 'Thai', + 'tr' => 'Turkish', + 'zh-Hans' => 'Simplified Chinese', + 'zh-Hant' => 'Traditional Chinese' +}.freeze + # Mapping of all locales which can be used for AppStore metadata (Glotpress code => AppStore Connect code) # # TODO: Replace with `LocaleHelper` once provided by release toolkit (https://github.com/wordpress-mobile/release-toolkit/pull/296) @@ -424,6 +462,78 @@ def generate_strings_file(gutenberg_path:, derived_data_path:) ) end + # Backfills still-untranslated strings with AI so the app never ships an + # untranslated string. For each locale, every key present in the English base + # but missing from the locale is translated by Claude, validated to preserve + # its placeholders (via `StringPlaceholders`), and appended to that locale's + # `Localizable.strings` under a clearly-marked section. + # + # Human translations from GlotPress overwrite these on the next sync — the AI + # output is only a stopgap and is never uploaded to GlotPress. Part of the + # "Faster Releases" RFC, Phase 2 (continuous translations). + # + # @called_by sync_translations + # + desc 'Backfill still-untranslated strings with AI (never ship untranslated copy)' + lane :backfill_missing_translations do + get_required_env('ANTHROPIC_API_KEY') # Fail fast if the key isn't available. + + parent_dir = File.join(PROJECT_ROOT_FOLDER, 'WordPress', 'Resources') + base_strings = StringPlaceholders.parse_file(File.join(PROJECT_ROOT_FOLDER, WORDPRESS_EN_LPROJ, 'Localizable.strings')) + modified_files = [] + + GLOTPRESS_TO_LPROJ_APP_LOCALE_CODES.each_value do |lproj| + locale_file = File.join(parent_dir, "#{lproj}.lproj", 'Localizable.strings') + next unless File.exist?(locale_file) + + locale_strings = StringPlaceholders.parse_file(locale_file) + missing = base_strings.reject { |key, _value| locale_strings.key?(key) } + next if missing.empty? + + UI.message("#{lproj}: #{missing.size} untranslated string(s)") + translations = AITranslator.translate( + strings: missing, + language_code: lproj, + language_name: LANGUAGE_NAMES.fetch(lproj, lproj) + ) + next if translations.empty? + + append_ai_translations(locale_file, translations) + modified_files << locale_file + end + + if modified_files.empty? + UI.success('No missing translations to backfill.') + next + end + + git_add(path: modified_files, shell_escape: false) + git_commit(path: modified_files, message: 'Backfill missing translations with AI', allow_nothing_to_commit: true) + end + + # Appends AI-generated translations to a `.strings` file, under a marker + # comment so they're easy to spot in review and in the file itself. + def append_ai_translations(path, translations) + File.open(path, 'a') do |file| + file.puts + file.puts '/* Machine-translated — pending human translation from GlotPress. */' + translations.sort.each do |key, value| + file.puts("\"#{escape_strings_literal(key)}\" = \"#{escape_strings_literal(value)}\";") + end + end + end + + # Escapes a string for use as a key or value in a `.strings` (old-style plist) + # file. Backslashes must be escaped first so we don't double-escape the ones + # we add for the quotes and control characters. + def escape_strings_literal(text) + text.to_s + .gsub('\\') { '\\\\' } + .gsub('"') { '\\"' } + .gsub("\n") { '\\n' } + .gsub("\t") { '\\t' } + end + # Downloads the localized metadata (for App Store Connect) from GlotPress for the WordPress app. # desc 'Downloads the localized metadata (for App Store Connect) from GlotPress for the WordPress app' From aba399ad7c020c0a0a3db61c841a50cc9f1954b8 Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Fri, 19 Jun 2026 10:26:59 -0600 Subject: [PATCH 3/5] Add continuous translation jobs (upload on merge, download daily) Phase 2 of the "Faster Releases" RFC. Both halves run in Buildkite. Upload (on each trunk merge): regenerate the English `Localizable.strings` from code, run the placeholder guardrail, and push to trunk so GlotPress imports new strings promptly. Trunk-gated, and guarded against re-triggering itself. Download (daily): `sync_translations` downloads the latest translations, runs the AI backfill, and opens/updates a single PR to trunk (dependabot-style). Scheduled via `.buildkite/translation-sync.yml`. - `sync_translations` + `upload_strings_for_translation` lanes - `.buildkite/commands/{sync-translations,upload-strings-for-translation}.sh` - trunk-gated upload step in pipeline.yml (mac queue); daily pipeline yaml The daily schedule itself is configured in Buildkite, pointing at the new pipeline. --- .buildkite/commands/sync-translations.sh | 18 ++++ .../upload-strings-for-translation.sh | 17 ++++ .buildkite/pipeline.yml | 17 ++++ .buildkite/translation-sync.yml | 16 +++ fastlane/lanes/localization.rb | 99 +++++++++++++++++++ 5 files changed, 167 insertions(+) create mode 100755 .buildkite/commands/sync-translations.sh create mode 100755 .buildkite/commands/upload-strings-for-translation.sh create mode 100644 .buildkite/translation-sync.yml diff --git a/.buildkite/commands/sync-translations.sh b/.buildkite/commands/sync-translations.sh new file mode 100755 index 000000000000..1e9700bdfcdc --- /dev/null +++ b/.buildkite/commands/sync-translations.sh @@ -0,0 +1,18 @@ +#!/bin/bash -eu + +# Downloads the latest translations from GlotPress, AI-backfills any strings +# still untranslated, and opens/updates a single PR to trunk. Runs daily. +# +# Requires ANTHROPIC_API_KEY in the CI environment for the AI backfill. +# Part of the "Faster Releases" RFC, Phase 2 (continuous translations). + +echo '--- :robot_face: Use bot for Git operations' +source use-bot-for-git + +"$(dirname "${BASH_SOURCE[0]}")/shared-set-up.sh" + +echo '--- :closed_lock_with_key: Access secrets' +bundle exec fastlane run configure_apply + +echo '--- :globe_with_meridians: Sync translations' +bundle exec fastlane sync_translations diff --git a/.buildkite/commands/upload-strings-for-translation.sh b/.buildkite/commands/upload-strings-for-translation.sh new file mode 100755 index 000000000000..ab01dbed9de1 --- /dev/null +++ b/.buildkite/commands/upload-strings-for-translation.sh @@ -0,0 +1,17 @@ +#!/bin/bash -eu + +# Regenerates the English `Localizable.strings` from code and pushes it to trunk +# so GlotPress imports new strings promptly. Runs on each trunk merge. +# +# Part of the "Faster Releases" RFC, Phase 2 (continuous translations). + +echo '--- :robot_face: Use bot for Git operations' +source use-bot-for-git + +"$(dirname "${BASH_SOURCE[0]}")/shared-set-up.sh" + +echo '--- :closed_lock_with_key: Access secrets' +bundle exec fastlane run configure_apply + +echo '--- :globe_with_meridians: Regenerate and upload strings for translation' +bundle exec fastlane upload_strings_for_translation diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index bad262059d14..a93d9f5d0ca9 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -130,6 +130,23 @@ steps: command: .buildkite/commands/lint-localized-strings-format.sh plugins: [$CI_TOOLKIT_PLUGIN] + ################# + # Continuous translations: regenerate + upload English strings on each trunk merge + # + # Part of the "Faster Releases" RFC, Phase 2. Trunk-only by necessity — it + # pushes the regenerated strings to trunk, which would be wrong from a PR/branch + # build. The daily download half runs from `.buildkite/translation-sync.yml`. + ################# + - group: "🌐 Localization" + key: localization_group + steps: + - label: "🌐 Upload strings for translation" + command: ".buildkite/commands/upload-strings-for-translation.sh" + if: "build.branch == 'trunk'" + agents: + queue: mac + plugins: [$CI_TOOLKIT_PLUGIN] + ################# # Claude Build Analysis - dynamically uploaded so Build result conditions evaluate at runtime after the wait ################# diff --git a/.buildkite/translation-sync.yml b/.buildkite/translation-sync.yml new file mode 100644 index 000000000000..81238d75f5a1 --- /dev/null +++ b/.buildkite/translation-sync.yml @@ -0,0 +1,16 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/buildkite/pipeline-schema/main/schema.json +--- + +# Daily translation sync — download the latest GlotPress translations, AI-backfill +# the gaps, and open/update a PR. Triggered on a schedule (configured in Buildkite). +# Part of the "Faster Releases" RFC, Phase 2 (continuous translations). + +agents: + queue: mac +env: + IMAGE_ID: $IMAGE_ID + +steps: + - label: "🌐 Sync translations" + command: ".buildkite/commands/sync-translations.sh" + plugins: [$CI_TOOLKIT_PLUGIN] diff --git a/fastlane/lanes/localization.rb b/fastlane/lanes/localization.rb index 52312aaea271..7898f0a8b9e9 100644 --- a/fastlane/lanes/localization.rb +++ b/fastlane/lanes/localization.rb @@ -91,6 +91,16 @@ 'zh-Hant' => 'Traditional Chinese' }.freeze +# Long-lived branch for the daily translation-sync PR. Reusing one branch keeps +# a single PR that updates each day (dependabot-style) instead of opening a new +# one per run. +TRANSLATION_SYNC_BRANCH = 'bot/translation-sync' + +# Commit message used when the upload-on-merge job pushes regenerated English +# strings to trunk. Also used as the loop-guard marker so that commit doesn't +# re-trigger the job. +TRANSLATION_STRINGS_COMMIT_MESSAGE = 'Update strings for localization' + # Mapping of all locales which can be used for AppStore metadata (Glotpress code => AppStore Connect code) # # TODO: Replace with `LocaleHelper` once provided by release toolkit (https://github.com/wordpress-mobile/release-toolkit/pull/296) @@ -534,6 +544,95 @@ def escape_strings_literal(text) .gsub("\t") { '\\t' } end + # The daily half of the continuous-translation model: download the latest + # human translations from GlotPress, AI-backfill anything still untranslated, + # and open (or update) a single PR to trunk. Runs from CI on a schedule. + # + # @called_by CI + # + desc 'Download the latest translations, AI-backfill the gaps, and open/update a PR' + lane :sync_translations do + ensure_git_status_clean + Fastlane::Helper::GitHelper.checkout_and_pull(DEFAULT_BRANCH) + + # Reset the long-lived sync branch to the current trunk so the PR updates in + # place rather than stacking on yesterday's changes. + sh('git', 'checkout', '-B', TRANSLATION_SYNC_BRANCH) + + download_localized_strings + backfill_missing_translations + + if sh('git', 'rev-list', '--count', "#{DEFAULT_BRANCH}..HEAD").strip == '0' + UI.success('Translations already up to date — nothing to sync.') + next + end + + sh('git', 'push', '--force', 'origin', TRANSLATION_SYNC_BRANCH) + open_translation_sync_pr + end + + def open_translation_sync_pr + pr_url = create_pull_request( + api_token: get_required_env('GHHELPER_ACCESS'), + repo: GITHUB_REPO, + title: 'Update translations', + body: <<~BODY, + Automated translation sync: the latest human translations from GlotPress, plus an AI backfill for any strings still untranslated so the app never ships untranslated copy. + + The AI translations are a stopgap and will be replaced by human translations as they arrive. They are never uploaded to GlotPress. + BODY + head: TRANSLATION_SYNC_BRANCH, + base: DEFAULT_BRANCH + ) + UI.success("Translation sync PR ready: #{pr_url}") + rescue StandardError => e + # A PR for this branch is likely already open from a previous run — the + # force-push above updated it, so there's nothing more to do. + UI.message("Did not open a new PR (one is likely already open): #{e.message}") + end + + # The on-merge half of the continuous-translation model: regenerate the English + # `Localizable.strings` from code and push it to trunk so GlotPress imports new + # strings promptly. Guarded so an existing key can't change its placeholders + # without a new key. Runs from CI on every trunk merge. + # + # @called_by CI + # + desc 'Regenerate the English strings and push them to trunk for GlotPress' + lane :upload_strings_for_translation do + # Pushing the regenerated strings creates a trunk commit that re-triggers this + # pipeline. Skip when we're building our own strings commit, to avoid a loop. + if ENV.fetch('BUILDKITE_MESSAGE', '').include?(TRANSLATION_STRINGS_COMMIT_MESSAGE) + UI.success('Skipping — this build is the automated strings commit.') + next + end + + ensure_git_status_clean + Fastlane::Helper::GitHelper.checkout_and_pull(DEFAULT_BRANCH) + + en_strings_relative_path = File.join(WORDPRESS_EN_LPROJ, 'Localizable.strings') + en_strings_absolute_path = File.join(PROJECT_ROOT_FOLDER, en_strings_relative_path) + + # Snapshot the committed English strings before regenerating, for the guardrail. + previous_strings_path = File.join(Dir.tmpdir, 'previous_en_Localizable.strings') + File.write(previous_strings_path, sh('git', 'show', "HEAD:#{en_strings_relative_path}", log: false)) + + generate_strings_file_for_glotpress(skip_commit: true) + + # An existing key must not change its placeholders — that would silently break + # every existing translation. New and removed keys are fine. + validate_string_placeholders(old: previous_strings_path, new: en_strings_absolute_path) + + git_commit(path: [WORDPRESS_EN_LPROJ], message: TRANSLATION_STRINGS_COMMIT_MESSAGE, allow_nothing_to_commit: true) + + if sh('git', 'rev-list', '--count', "origin/#{DEFAULT_BRANCH}..HEAD").strip == '0' + UI.success('English strings already current — nothing to upload.') + next + end + + push_to_git_remote(tags: false) + end + # Downloads the localized metadata (for App Store Connect) from GlotPress for the WordPress app. # desc 'Downloads the localized metadata (for App Store Connect) from GlotPress for the WordPress app' From 98508848333e2629a68eda17efb8072fef81f5d1 Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Fri, 19 Jun 2026 10:34:25 -0600 Subject: [PATCH 4/5] Add a PR dry-run for the strings upload Lets the upload-on-merge flow be exercised from a PR without touching trunk: `dry_run:true` regenerates the English strings and runs the placeholder guardrail but skips the commit and push. Adds a PR-only "Upload strings (dry run)" step; the real step stays trunk-gated. --- .../upload-strings-for-translation.sh | 4 +++- .buildkite/pipeline.yml | 15 +++++++++++++ fastlane/lanes/localization.rb | 22 ++++++++++++++++--- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/.buildkite/commands/upload-strings-for-translation.sh b/.buildkite/commands/upload-strings-for-translation.sh index ab01dbed9de1..75e33f964914 100755 --- a/.buildkite/commands/upload-strings-for-translation.sh +++ b/.buildkite/commands/upload-strings-for-translation.sh @@ -14,4 +14,6 @@ echo '--- :closed_lock_with_key: Access secrets' bundle exec fastlane run configure_apply echo '--- :globe_with_meridians: Regenerate and upload strings for translation' -bundle exec fastlane upload_strings_for_translation +# DRY_RUN=true regenerates and runs the guardrail without committing or pushing — +# used to exercise this flow from a PR. Defaults to a real run. +bundle exec fastlane upload_strings_for_translation dry_run:"${DRY_RUN:-false}" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index a93d9f5d0ca9..e349ab0cb702 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -147,6 +147,21 @@ steps: queue: mac plugins: [$CI_TOOLKIT_PLUGIN] + # Dry run on PRs: regenerate + run the placeholder guardrail without + # committing or pushing, so the flow can be exercised (and guards the PR) + # before it goes live on trunk. + - label: "🌐 Upload strings (dry run)" + command: ".buildkite/commands/upload-strings-for-translation.sh" + if: "build.pull_request.id != null || build.pull_request.draft" + env: + DRY_RUN: "true" + agents: + queue: mac + plugins: [$CI_TOOLKIT_PLUGIN] + notify: + - github_commit_status: + context: "Strings Upload (dry run)" + ################# # Claude Build Analysis - dynamically uploaded so Build result conditions evaluate at runtime after the wait ################# diff --git a/fastlane/lanes/localization.rb b/fastlane/lanes/localization.rb index 7898f0a8b9e9..72b81d69e537 100644 --- a/fastlane/lanes/localization.rb +++ b/fastlane/lanes/localization.rb @@ -598,17 +598,26 @@ def open_translation_sync_pr # # @called_by CI # + # @param [Boolean] dry_run If true, regenerate and run the guardrail but stay on + # the current branch — no commit, no push to trunk. Used to + # exercise (and guard) this flow from a PR before it's live. + # desc 'Regenerate the English strings and push them to trunk for GlotPress' - lane :upload_strings_for_translation do + lane :upload_strings_for_translation do |dry_run: false| + # Fastlane passes CLI args as strings (`dry_run:true`), so normalize. + dry_run = dry_run.to_s == 'true' + # Pushing the regenerated strings creates a trunk commit that re-triggers this # pipeline. Skip when we're building our own strings commit, to avoid a loop. - if ENV.fetch('BUILDKITE_MESSAGE', '').include?(TRANSLATION_STRINGS_COMMIT_MESSAGE) + if !dry_run && ENV.fetch('BUILDKITE_MESSAGE', '').include?(TRANSLATION_STRINGS_COMMIT_MESSAGE) UI.success('Skipping — this build is the automated strings commit.') next end ensure_git_status_clean - Fastlane::Helper::GitHelper.checkout_and_pull(DEFAULT_BRANCH) + # A dry run stays on the current branch (e.g. a PR) and never touches trunk; + # a real run regenerates against the latest trunk. + Fastlane::Helper::GitHelper.checkout_and_pull(DEFAULT_BRANCH) unless dry_run en_strings_relative_path = File.join(WORDPRESS_EN_LPROJ, 'Localizable.strings') en_strings_absolute_path = File.join(PROJECT_ROOT_FOLDER, en_strings_relative_path) @@ -623,6 +632,13 @@ def open_translation_sync_pr # every existing translation. New and removed keys are fine. validate_string_placeholders(old: previous_strings_path, new: en_strings_absolute_path) + if dry_run + diff = sh('git', 'diff', '--stat', '--', en_strings_relative_path, log: false) + UI.success('Dry run: regeneration and the placeholder guardrail passed.') + UI.message(diff.empty? ? 'No string changes vs the current branch.' : "Strings that would be uploaded:\n#{diff}") + next + end + git_commit(path: [WORDPRESS_EN_LPROJ], message: TRANSLATION_STRINGS_COMMIT_MESSAGE, allow_nothing_to_commit: true) if sh('git', 'rev-list', '--count', "origin/#{DEFAULT_BRANCH}..HEAD").strip == '0' From 28313d614097238c2a160f0fe6d3f6fdcae15e27 Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Fri, 19 Jun 2026 10:45:40 -0600 Subject: [PATCH 5/5] Satisfy rubocop in the translation helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `fastlane/helpers/` path isn't covered by the lane-file exclusions in .rubocop.yml, so the Metrics cops apply. - Single-quote the model symbol (Style/QuotedSymbols). - Extract translation validation out of `translate` into `validated_translations` (AbcSize / CyclomaticComplexity / RedundantEach). - Collapse `signature` to one uniform position-keyed path and extract `specifiers` (AbcSize / CyclomaticComplexity / PerceivedComplexity). The signature string is internal — only compared within a run — so the change is behavior-preserving. --- fastlane/helpers/ai_translator.rb | 27 ++++++++++++-------- fastlane/helpers/string_placeholders.rb | 34 +++++++++++++------------ 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/fastlane/helpers/ai_translator.rb b/fastlane/helpers/ai_translator.rb index f3f45743b3e4..e2a1d793b780 100644 --- a/fastlane/helpers/ai_translator.rb +++ b/fastlane/helpers/ai_translator.rb @@ -10,7 +10,7 @@ # See the "Faster Releases" RFC, Phase 2 (continuous translations). module AITranslator # Matches the Claude model already used elsewhere in CI (`.buildkite/claude-analysis.yml`). - MODEL = :"claude-sonnet-4-6" + MODEL = :'claude-sonnet-4-6' # Keep batches small so each request's JSON response stays well under the # non-streaming token ceiling and a single failure costs little to retry. BATCH_SIZE = 40 @@ -33,18 +33,25 @@ def translate(strings:, language_code:, language_name:) require 'anthropic' client = Anthropic::Client.new # reads ANTHROPIC_API_KEY from the environment - strings.each_slice(BATCH_SIZE).each_with_object({}) do |batch, translations| + result = {} + strings.each_slice(BATCH_SIZE) do |batch| batch_hash = batch.to_h - translate_batch(client: client, strings: batch_hash, language_code: language_code, language_name: language_name).each do |key, translation| - english = batch_hash[key] - next if english.nil? || translation.nil? || translation.to_s.empty? + raw = translate_batch(client: client, strings: batch_hash, language_code: language_code, language_name: language_name) + result.merge!(validated_translations(raw, batch_hash, language_code)) + end + result + end - unless StringPlaceholders.compatible?(english, translation) - UI.message("Dropping #{language_code} translation for '#{key}' — placeholders changed") - next - end + # Keeps only the translations whose placeholders match the English source. + def validated_translations(translations, english_by_key, language_code) + translations.each_with_object({}) do |(key, translation), kept| + english = english_by_key[key] + next if english.nil? || translation.to_s.empty? - translations[key] = translation + if StringPlaceholders.compatible?(english, translation) + kept[key] = translation + else + UI.message("Dropping #{language_code} translation for '#{key}' — placeholders changed") end end end diff --git a/fastlane/helpers/string_placeholders.rb b/fastlane/helpers/string_placeholders.rb index 8bc00e1f320f..54cf897b4d8b 100644 --- a/fastlane/helpers/string_placeholders.rb +++ b/fastlane/helpers/string_placeholders.rb @@ -48,28 +48,30 @@ def parse_file(path) # A canonical signature of the placeholders in a string value, or '' if there # are none. Two values with the same signature are placeholder-compatible. def signature(value) - specifiers = [] + # Key each specifier by its position — explicit for `%1$@`, otherwise its + # appearance order — then sort by it, so reordering equivalent positional args + # (`%1$@ %2$@` vs `%2$@ %1$@`) yields the same signature while a changed count + # or argument type does not. + specifiers(value) + .each_with_index + .map { |spec, index| [spec[:position] || (index + 1), spec[:klass]] } + .sort_by(&:first) + .map { |position, klass| "#{position}:#{klass}" } + .join(',') + end + + # The format specifiers in a value as `[{ position:, klass: }]`, excluding the + # literal `%%`. `position` is nil for non-positional specifiers. + def specifiers(value) + found = [] value.to_s.scan(SPECIFIER) do match = Regexp.last_match conversion = match[:conversion] next if conversion == '%' # literal percent, not a placeholder - position = match[:position]&.delete('$')&.to_i - specifiers << { position: position, klass: CONVERSION_CLASSES.fetch(conversion, conversion) } - end - - return '' if specifiers.empty? - - if specifiers.all? { |s| s[:position] } - # Positional args (`%1$@`): compare the position -> type mapping. - specifiers.sort_by { |s| s[:position] }.map { |s| "#{s[:position]}:#{s[:klass]}" }.join(',') - elsif specifiers.none? { |s| s[:position] } - # Non-positional args: compare types in order of appearance. - specifiers.map { |s| s[:klass] }.join(',') - else - # Mixed positional/non-positional is unusual; be conservative and keep both. - specifiers.map { |s| "#{s[:position] || '_'}:#{s[:klass]}" }.join(',') + found << { position: match[:position]&.delete('$')&.to_i, klass: CONVERSION_CLASSES.fetch(conversion, conversion) } end + found end # Whether two string values share the same placeholder shape.