From eaca17d8fd1df469e0061d30a198432d7c2ae418 Mon Sep 17 00:00:00 2001 From: Radek Stankiewicz Date: Thu, 25 Jun 2026 17:14:04 +0200 Subject: [PATCH 1/3] Fix bq load for clashing copy job names --- .github/trigger_files/beam_PostCommit_Python.json | 4 ++-- sdks/python/apache_beam/io/gcp/bigquery_file_loads.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json index 11064375d62e..690f231aa801 100644 --- a/.github/trigger_files/beam_PostCommit_Python.json +++ b/.github/trigger_files/beam_PostCommit_Python.json @@ -1,5 +1,5 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run.", "pr": "37345", - "modification": 49 -} + "modification": 50 +} diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py index 4ef6c392254b..aacdce914203 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py @@ -170,6 +170,13 @@ def _bq_uuid(seed=None): else: return str(hashlib.md5(seed.encode('utf8')).hexdigest()) +def _bq_uuid_list(list): + checksum = hashlib.sha256() + for item in list: + checksum.update(item.encode('utf-8')) + # separator + checksum.update(b'\x00') + return checksum.hexdigest() class _ShardDestinations(beam.DoFn): """Adds a shard number to the key of the KV element. @@ -589,9 +596,7 @@ def process( write_disposition = 'WRITE_APPEND' wait_for_job = False - chunk_job_name = copy_job_name_base - if len(chunks) > 1: - chunk_job_name = f"{copy_job_name_base}_{i}" + chunk_job_name = '%s_%s' % (copy_job_name_base, _bq_uuid_list(chunk)) _LOGGER.info( "Triggering copy job %s from %s to %s (write_disposition: %s)", From 7fbd13b964d5fe5f4a8af9e83af8bb8280ffb4c4 Mon Sep 17 00:00:00 2001 From: Radek Stankiewicz Date: Thu, 25 Jun 2026 15:48:34 +0000 Subject: [PATCH 2/3] spotless --- sdks/python/apache_beam/io/gcp/bigquery_file_loads.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py index aacdce914203..0d468114ff47 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py @@ -170,6 +170,7 @@ def _bq_uuid(seed=None): else: return str(hashlib.md5(seed.encode('utf8')).hexdigest()) + def _bq_uuid_list(list): checksum = hashlib.sha256() for item in list: @@ -178,6 +179,7 @@ def _bq_uuid_list(list): checksum.update(b'\x00') return checksum.hexdigest() + class _ShardDestinations(beam.DoFn): """Adds a shard number to the key of the KV element. From a7e6f849e9782e27790b19c5183126a2caa4c2ef Mon Sep 17 00:00:00 2001 From: Radek Stankiewicz Date: Thu, 25 Jun 2026 19:04:55 +0200 Subject: [PATCH 3/3] fix --- sdks/python/apache_beam/io/gcp/bigquery_file_loads.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py index 0d468114ff47..7bad85b5cb6f 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py @@ -172,12 +172,12 @@ def _bq_uuid(seed=None): def _bq_uuid_list(list): - checksum = hashlib.sha256() + cs = hashlib.sha256() for item in list: - checksum.update(item.encode('utf-8')) + cs.update(bigquery_tools.get_hashable_destination(item).encode('utf-8')) # separator - checksum.update(b'\x00') - return checksum.hexdigest() + cs.update(b'\x00') + return cs.hexdigest() class _ShardDestinations(beam.DoFn):