From 66244990f86de1616eb76f30915177df9a650449 Mon Sep 17 00:00:00 2001 From: lifulong Date: Fri, 26 Jun 2026 16:40:48 +0800 Subject: [PATCH] feat: Vectorized hash re-aggregation for HashAggregation spill recovery --- cpp/velox/compute/WholeStageResultIterator.cc | 2 ++ cpp/velox/config/VeloxConfig.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index b3d9f480c1c..5a64dfb9d6b 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -678,6 +678,8 @@ std::unordered_map WholeStageResultIterator::getQueryC } configs[velox::core::QueryConfig::kAggregationSpillEnabled] = std::to_string(veloxCfg_->get(kAggregationSpillEnabled, true)); + configs[velox::core::QueryConfig::kAggregationSpillHashRecoveryEnabled] = + std::to_string(veloxCfg_->get(kAggregationSpillHashRecoveryEnabled, false)); configs[velox::core::QueryConfig::kJoinSpillEnabled] = std::to_string(veloxCfg_->get(kJoinSpillEnabled, true)); configs[velox::core::QueryConfig::kOrderBySpillEnabled] = diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h index d88c4361938..386d3f62cd8 100644 --- a/cpp/velox/config/VeloxConfig.h +++ b/cpp/velox/config/VeloxConfig.h @@ -26,6 +26,10 @@ const std::string kSpillStrategyDefaultValue = "auto"; const std::string kSpillThreadNum = "spark.gluten.sql.columnar.backend.velox.spillThreadNum"; const uint32_t kSpillThreadNumDefaultValue = 0; const std::string kAggregationSpillEnabled = "spark.gluten.sql.columnar.backend.velox.aggregationSpillEnabled"; +// When enabled, aggregation spilling skips the spill-time sort and re-aggregates each spill +// partition through a hash table (vectorized) on read, instead of the row-by-row ordered merge. +const std::string kAggregationSpillHashRecoveryEnabled = + "spark.gluten.sql.columnar.backend.velox.aggregationSpillHashRecoveryEnabled"; const std::string kJoinSpillEnabled = "spark.gluten.sql.columnar.backend.velox.joinSpillEnabled"; const std::string kOrderBySpillEnabled = "spark.gluten.sql.columnar.backend.velox.orderBySpillEnabled"; const std::string kWindowSpillEnabled = "spark.gluten.sql.columnar.backend.velox.windowSpillEnabled";