diff --git a/.gitignore b/.gitignore
index 7a61c598c99f75..d3976f46132e8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,3 +153,4 @@ compile_commands.json
 .github
 
 .worktrees/
+.worktree_initialized
diff --git a/be/src/core/data_type/data_type_timestamptz.h b/be/src/core/data_type/data_type_timestamptz.h
index 4a3fba0616cc45..b386402cb49696 100644
--- a/be/src/core/data_type/data_type_timestamptz.h
+++ b/be/src/core/data_type/data_type_timestamptz.h
@@ -56,6 +56,10 @@ class DataTypeTimeStampTz final : public DataTypeNumberBase<PrimitiveType::TYPE_
         return "TimeStampTz(" + std::to_string(_scale) + ")";
     }
 
+    void to_protobuf(PTypeDesc* ptype, PTypeNode* node, PScalarType* scalar_type) const override {
+        scalar_type->set_scale(_scale);
+    }
+
     void to_pb_column_meta(PColumnMeta* col_meta) const override {
         DataTypeNumberBase<PrimitiveType::TYPE_TIMESTAMPTZ>::to_pb_column_meta(col_meta);
         col_meta->mutable_decimal_param()->set_scale(_scale);
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
index ca84996ea45306..0eb5e4d44a39a1 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
@@ -28,6 +28,7 @@
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/types.h"
 #include "core/value/vdatetime_value.h"
 #include "exprs/function/cast/cast_to_datetimev2_impl.hpp"
@@ -43,6 +44,95 @@ enum {
 namespace doris {
 static const int64_t micro_to_nano_second = 1000;
 
+namespace {
+
+#pragma pack(1)
+struct DecodedInt96Timestamp {
+    int64_t nanos_of_day;
+    int32_t julian_day;
+
+    int64_t to_timestamp_micros() const {
+        static constexpr int32_t JULIAN_EPOCH_OFFSET_DAYS = 2440588;
+        static constexpr int64_t MICROS_IN_DAY = 86400000000;
+        static constexpr int64_t NANOS_PER_MICROSECOND = 1000;
+        return (julian_day - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY +
+               nanos_of_day / NANOS_PER_MICROSECOND;
+    }
+};
+#pragma pack()
+static_assert(sizeof(DecodedInt96Timestamp) == 12);
+
+Status append_datetimev2_from_epoch_micros(ColumnDateTimeV2::Container& data,
+                                           int64_t timestamp_micros) {
+    static constexpr int64_t MICROS_PER_SECOND = 1000000;
+    static constexpr int64_t MICROS_PER_MINUTE = MICROS_PER_SECOND * 60;
+    static constexpr int64_t MICROS_PER_HOUR = MICROS_PER_MINUTE * 60;
+    static constexpr int64_t MICROS_PER_DAY = MICROS_PER_HOUR * 24;
+    static const int64_t EPOCH_DAYNR = calc_daynr(1970, 1, 1);
+
+    int64_t days_since_epoch = timestamp_micros / MICROS_PER_DAY;
+    int64_t micros_of_day = timestamp_micros % MICROS_PER_DAY;
+    if (micros_of_day < 0) {
+        micros_of_day += MICROS_PER_DAY;
+        --days_since_epoch;
+    }
+
+    const int64_t daynr = EPOCH_DAYNR + days_since_epoch;
+    if (daynr <= 0) {
+        return Status::DataQualityError(
+                "Decoded DATETIMEV2 timestamp is out of range: micros={}, daynr={}",
+                timestamp_micros, daynr);
+    }
+
+    DateV2Value<DateTimeV2ValueType> datetime_value;
+    if (!datetime_value.get_date_from_daynr(static_cast<uint64_t>(daynr))) {
+        return Status::DataQualityError(
+                "Decoded DATETIMEV2 timestamp is out of range: micros={}, daynr={}",
+                timestamp_micros, daynr);
+    }
+
+    const auto hour = static_cast<uint8_t>(micros_of_day / MICROS_PER_HOUR);
+    micros_of_day %= MICROS_PER_HOUR;
+    const auto minute = static_cast<uint8_t>(micros_of_day / MICROS_PER_MINUTE);
+    micros_of_day %= MICROS_PER_MINUTE;
+    const auto second = static_cast<uint16_t>(micros_of_day / MICROS_PER_SECOND);
+    const auto microsecond = static_cast<uint32_t>(micros_of_day % MICROS_PER_SECOND);
+    datetime_value.unchecked_set_time(datetime_value.year(), datetime_value.month(),
+                                      datetime_value.day(), hour, minute, second, microsecond);
+    data.push_back(datetime_value);
+    return Status::OK();
+}
+
+void append_datetimev2_from_utc_epoch_micros(ColumnDateTimeV2::Container& data,
+                                             int64_t timestamp_micros,
+                                             const cctz::time_zone& timezone) {
+    static constexpr int64_t MICROS_PER_SECOND = 1000000;
+
+    int64_t epoch_seconds = timestamp_micros / MICROS_PER_SECOND;
+    int64_t micros_of_second = timestamp_micros % MICROS_PER_SECOND;
+    if (micros_of_second < 0) {
+        micros_of_second += MICROS_PER_SECOND;
+        --epoch_seconds;
+    }
+
+    DateV2Value<DateTimeV2ValueType> datetime_value;
+    datetime_value.from_unixtime(epoch_seconds, timezone);
+    datetime_value.set_microsecond(static_cast<uint32_t>(micros_of_second));
+    data.push_back(datetime_value);
+}
+
+int64_t decoded_timestamp_micros(const DecodedColumnView& view, int64_t value) {
+    if (view.time_unit == DecodedTimeUnit::MILLIS) {
+        return value * 1000;
+    }
+    if (view.time_unit == DecodedTimeUnit::NANOS) {
+        return value / 1000;
+    }
+    return value;
+}
+
+} // namespace
+
 // NOLINTBEGIN(readability-function-size)
 // NOLINTBEGIN(readability-function-cognitive-complexity)
 Status DataTypeDateTimeV2SerDe::from_string_batch(const ColumnString& col_str,
@@ -451,6 +541,59 @@ Status DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column,
     return Status::OK();
 }
 
+Status DataTypeDateTimeV2SerDe::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::INT64 && view.value_kind != DecodedValueKind::INT96) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view,
+                Status::NotSupported("DATETIMEV2 decoded reader expects INT64 or INT96 source"));
+    }
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data = assert_cast<ColumnDateTimeV2&>(column).get_data();
+    const auto old_size = data.size();
+    if (view.value_kind == DecodedValueKind::INT96) {
+        const auto* values = reinterpret_cast<const DecodedInt96Timestamp*>(view.values);
+        static const auto utc_timezone = cctz::utc_time_zone();
+        const auto& timezone = view.timezone == nullptr ? utc_timezone : *view.timezone;
+        for (int64_t row = 0; row < view.row_count; ++row) {
+            if (decoded_column_view_row_is_null(view, row)) {
+                data.push_back(DateV2Value<DateTimeV2ValueType>());
+                continue;
+            }
+            append_datetimev2_from_utc_epoch_micros(data, values[row].to_timestamp_micros(),
+                                                    timezone);
+        }
+        return Status::OK();
+    }
+
+    const auto* values = reinterpret_cast<const int64_t*>(view.values);
+    static const auto utc_timezone = cctz::utc_time_zone();
+    const auto& timezone = view.timezone == nullptr ? utc_timezone : *view.timezone;
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(DateV2Value<DateTimeV2ValueType>());
+            continue;
+        }
+        const int64_t timestamp_micros = decoded_timestamp_micros(view, values[row]);
+        if (view.timestamp_is_adjusted_to_utc) {
+            append_datetimev2_from_utc_epoch_micros(data, timestamp_micros, timezone);
+        } else {
+            auto st = append_datetimev2_from_epoch_micros(data, timestamp_micros);
+            if (!st.ok()) {
+                if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                    decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                    continue;
+                }
+                data.resize(old_size);
+                return st;
+            }
+        }
+    }
+    return Status::OK();
+}
+
 Status DataTypeDateTimeV2SerDe::write_column_to_mysql_binary(const IColumn& column,
                                                              MysqlRowBinaryBuffer& result,
                                                              int64_t row_idx, bool col_const,
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.h b/be/src/core/data_type_serde/data_type_datetimev2_serde.h
index 0389432a621730..34d0373eba1c34 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.h
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.h
@@ -88,6 +88,8 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_D
                                  const cctz::time_zone& ctz) const override;
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
 
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.cpp b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
index 1a503af38ef4ce..33e484ef946c01 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
@@ -27,6 +27,7 @@
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/types.h"
 #include "core/value/vdatetime_value.h"
 #include "exprs/function/cast/cast_to_datev2_impl.hpp"
@@ -124,6 +125,29 @@ Status DataTypeDateV2SerDe::read_column_from_arrow(IColumn& column, const arrow:
     return Status::OK();
 }
 
+Status DataTypeDateV2SerDe::read_column_from_decoded_values(IColumn& column,
+                                                            const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::INT32) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view, Status::NotSupported("DATEV2 decoded reader expects INT32 source"));
+    }
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data = assert_cast<ColumnDateV2&>(column).get_data();
+    const auto* values = reinterpret_cast<const int32_t*>(view.values);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(DateV2Value<DateV2ValueType>());
+            continue;
+        }
+        DateV2Value<DateV2ValueType> date_v2;
+        date_v2.get_date_from_daynr(values[row] + date_threshold);
+        data.push_back(date_v2);
+    }
+    return Status::OK();
+}
+
 Status DataTypeDateV2SerDe::write_column_to_mysql_binary(const IColumn& column,
                                                          MysqlRowBinaryBuffer& result,
                                                          int64_t row_idx, bool col_const,
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.h b/be/src/core/data_type_serde/data_type_datev2_serde.h
index 0375f9be4b4b23..ff985d61345d5a 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.h
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.h
@@ -86,6 +86,8 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_DATEV
                                  const cctz::time_zone& ctz) const override;
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override;
diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.cpp b/be/src/core/data_type_serde/data_type_decimal_serde.cpp
index fa21498d9feaf2..c977b3e31251c6 100644
--- a/be/src/core/data_type_serde/data_type_decimal_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_decimal_serde.cpp
@@ -31,6 +31,7 @@
 #include "core/column/column_decimal.h"
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/types.h"
 #include "exec/common/arithmetic_overflow.h"
 #include "exprs/function/cast/cast_to_decimal.h"
@@ -43,6 +44,139 @@
 #include "util/string_parser.hpp"
 
 namespace doris {
+namespace {
+
+template <typename NativeType>
+NativeType decode_big_endian_signed_integer(const uint8_t* data, int length) {
+    if constexpr (std::is_same_v<NativeType, wide::Int256>) {
+        NativeType value = data != nullptr && length > 0 && (data[0] & 0x80) != 0 ? NativeType(-1)
+                                                                                  : NativeType(0);
+        for (int i = 0; i < length; ++i) {
+            value = (value << 8) + NativeType(data[i]);
+        }
+        return value;
+    } else {
+        using UnsignedNativeType =
+                std::conditional_t<std::is_same_v<NativeType, Int128>, unsigned __int128,
+                                   std::make_unsigned_t<NativeType>>;
+        UnsignedNativeType value = data != nullptr && length > 0 && (data[0] & 0x80) != 0
+                                           ? static_cast<UnsignedNativeType>(-1)
+                                           : 0;
+        for (int i = 0; i < length; ++i) {
+            value = static_cast<UnsignedNativeType>((value << 8) | data[i]);
+        }
+        return static_cast<NativeType>(value);
+    }
+}
+
+template <PrimitiveType T>
+bool decoded_decimal_value_fits(const typename PrimitiveTypeTraits<T>::CppType::NativeType& value,
+                                UInt32 precision) {
+    return value >= min_decimal_value<T>(precision).value &&
+           value <= max_decimal_value<T>(precision).value;
+}
+
+template <PrimitiveType T>
+bool decoded_decimal_int_value_fits(Int128 value, UInt32 precision) {
+    using NativeType = typename PrimitiveTypeTraits<T>::CppType::NativeType;
+    if constexpr (std::is_same_v<NativeType, wide::Int256>) {
+        const auto wide_value = wide::Int256(value);
+        return decoded_decimal_value_fits<T>(wide_value, precision);
+    } else {
+        return value >= static_cast<Int128>(min_decimal_value<T>(precision).value) &&
+               value <= static_cast<Int128>(max_decimal_value<T>(precision).value);
+    }
+}
+
+template <PrimitiveType T>
+Status read_decimal_decoded_value(const DecodedColumnView& view, UInt32 precision, int64_t row,
+                                  typename PrimitiveTypeTraits<T>::CppType* result) {
+    using FieldType = typename PrimitiveTypeTraits<T>::CppType;
+    using NativeType = typename FieldType::NativeType;
+    NativeType native_value;
+    if (view.value_kind == DecodedValueKind::INT32) {
+        const auto* values = reinterpret_cast<const int32_t*>(view.values);
+        const auto value = static_cast<Int128>(values[row]);
+        if (!decoded_decimal_int_value_fits<T>(value, precision)) {
+            return Status::DataQualityError("Decoded decimal value is out of range");
+        }
+        native_value = NativeType(value);
+    } else if (view.value_kind == DecodedValueKind::INT64) {
+        const auto* values = reinterpret_cast<const int64_t*>(view.values);
+        const auto value = static_cast<Int128>(values[row]);
+        if (!decoded_decimal_int_value_fits<T>(value, precision)) {
+            return Status::DataQualityError("Decoded decimal value is out of range");
+        }
+        native_value = NativeType(value);
+    } else {
+        const auto& value = (*view.binary_values)[row];
+        const auto length = view.value_kind == DecodedValueKind::FIXED_BINARY
+                                    ? view.fixed_length
+                                    : cast_set<int, size_t, false>(value.size);
+        if (length > static_cast<int>(sizeof(NativeType))) {
+            return Status::DataQualityError("Decoded decimal binary value is too wide: length={}",
+                                            length);
+        }
+        native_value = decode_big_endian_signed_integer<NativeType>(
+                reinterpret_cast<const uint8_t*>(value.data), length);
+    }
+    if (!decoded_decimal_value_fits<T>(native_value, precision)) {
+        return Status::DataQualityError("Decoded decimal value is out of range");
+    }
+    *result = FieldType {native_value};
+    return Status::OK();
+}
+
+template <PrimitiveType T>
+Status read_decimal_decoded_values(IColumn& column, const DecodedColumnView& view,
+                                   UInt32 precision) {
+    if (view.value_kind == DecodedValueKind::INT32 || view.value_kind == DecodedValueKind::INT64) {
+        if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+            return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+        }
+    } else if (view.binary_values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded binary values are null for {}", column.get_name());
+    }
+    auto& data = assert_cast<ColumnDecimal<T>&>(column).get_data();
+    const auto old_size = data.size();
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(typename PrimitiveTypeTraits<T>::CppType());
+            continue;
+        }
+        if (view.value_kind == DecodedValueKind::BINARY ||
+            view.value_kind == DecodedValueKind::FIXED_BINARY) {
+            const auto& value = (*view.binary_values)[row];
+            const auto length = view.value_kind == DecodedValueKind::FIXED_BINARY
+                                        ? view.fixed_length
+                                        : cast_set<int, size_t, false>(value.size);
+            if (value.data == nullptr && length > 0) {
+                if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                    decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                    continue;
+                }
+                return Status::Corruption("Decoded decimal binary value is null for {} at row {}",
+                                          column.get_name(), row);
+            }
+        }
+        typename PrimitiveTypeTraits<T>::CppType value;
+        auto st = read_decimal_decoded_value<T>(view, precision, row, &value);
+        if (!st.ok()) {
+            if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                continue;
+            }
+            data.resize(old_size);
+            st.prepend(fmt::format(
+                    "Failed to decode decimal value for {} at row {}: ", column.get_name(), row));
+            return st;
+        }
+        data.push_back(value);
+    }
+    return Status::OK();
+}
+
+} // namespace
 
 template <PrimitiveType T>
 Status DataTypeDecimalSerDe<T>::from_string_batch(const ColumnString& str, ColumnNullable& column,
@@ -371,6 +505,24 @@ Status DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column,
     return Status::OK();
 }
 
+template <PrimitiveType T>
+Status DataTypeDecimalSerDe<T>::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if constexpr (T == TYPE_DECIMAL32 || T == TYPE_DECIMAL64 || T == TYPE_DECIMAL128I ||
+                  T == TYPE_DECIMAL256) {
+        if (view.value_kind == DecodedValueKind::INT32 ||
+            view.value_kind == DecodedValueKind::INT64 ||
+            view.value_kind == DecodedValueKind::BINARY ||
+            view.value_kind == DecodedValueKind::FIXED_BINARY) {
+            return read_decimal_decoded_values<T>(column, view, precision);
+        }
+    }
+    return decoded_column_view_handle_conversion_failure(
+            column, view,
+            Status::NotSupported("Unsupported decoded values for {} from source kind {}",
+                                 get_name(), static_cast<int>(view.value_kind)));
+}
+
 template <PrimitiveType T>
 Status DataTypeDecimalSerDe<T>::write_column_to_mysql_binary(const IColumn& column,
                                                              MysqlRowBinaryBuffer& result,
diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.h b/be/src/core/data_type_serde/data_type_decimal_serde.h
index 0185672e024718..089835a21be955 100644
--- a/be/src/core/data_type_serde/data_type_decimal_serde.h
+++ b/be/src/core/data_type_serde/data_type_decimal_serde.h
@@ -107,6 +107,8 @@ class DataTypeDecimalSerDe : public DataTypeSerDe {
                                  const cctz::time_zone& ctz) const override;
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override;
diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.cpp b/be/src/core/data_type_serde/data_type_nullable_serde.cpp
index a93f8d6126c7d5..7c6ce46e1cd960 100644
--- a/be/src/core/data_type_serde/data_type_nullable_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_nullable_serde.cpp
@@ -22,7 +22,7 @@
 
 #include <algorithm>
 #include <boost/iterator/iterator_facade.hpp>
-#include <memory>
+#include <vector>
 
 #include "core/assert_cast.h"
 #include "core/column/column.h"
@@ -31,10 +31,12 @@
 #include "core/column/column_vector.h"
 #include "core/data_type_serde/data_type_serde.h"
 #include "core/data_type_serde/data_type_string_serde.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "exprs/function/cast/cast_base.h"
 #include "format/transformer/vcsv_transformer.h"
 #include "util/jsonb_document.h"
 #include "util/jsonb_writer.h"
+#include "util/simd/bits.h"
 
 namespace doris {
 class Arena;
@@ -350,6 +352,39 @@ Status DataTypeNullableSerDe::read_column_from_arrow(IColumn& column,
                                                 ctz);
 }
 
+Status DataTypeNullableSerDe::read_column_from_decoded_values(IColumn& column,
+                                                              const DecodedColumnView& view) const {
+    auto& nullable_column = assert_cast<ColumnNullable&>(column);
+    auto& null_map = nullable_column.get_null_map_data();
+    const auto old_size = null_map.size();
+    auto& nested_column = nullable_column.get_nested_column();
+    const auto old_nested_size = nested_column.size();
+    null_map.resize(null_map.size() + view.row_count);
+    if (view.null_map == nullptr) {
+        // No null value
+        memset(null_map.data() + old_size, 0, view.row_count);
+    } else {
+        // TODO: skip if no null in map
+        auto* dst = null_map.data() + old_size;
+        memcpy(dst, view.null_map, view.row_count);
+        // If there are all null values, we can skip reading nested column and just insert defaults.
+        if (simd::count_zero_num(reinterpret_cast<const int8_t*>(view.null_map), view.row_count) ==
+            0) {
+            nested_column.insert_many_defaults(view.row_count);
+            return Status::OK();
+        }
+    }
+    DecodedColumnView nested_view = view;
+    nested_view.conversion_failure_null_map = &null_map;
+    nested_view.conversion_failure_null_map_offset = old_size;
+    auto st = nested_serde->read_column_from_decoded_values(nested_column, nested_view);
+    if (!st.ok()) {
+        null_map.resize(old_size);
+        nested_column.resize(old_nested_size);
+    }
+    return st;
+}
+
 bool DataTypeNullableSerDe::write_column_to_mysql_text(const IColumn& column, BufferWritable& bw,
                                                        int64_t row_idx,
                                                        const FormatOptions& options) const {
diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.h b/be/src/core/data_type_serde/data_type_nullable_serde.h
index 6e069444483b87..ee1eab51941ecb 100644
--- a/be/src/core/data_type_serde/data_type_nullable_serde.h
+++ b/be/src/core/data_type_serde/data_type_nullable_serde.h
@@ -86,6 +86,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe {
                                  const cctz::time_zone& ctz) const override;
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override;
diff --git a/be/src/core/data_type_serde/data_type_number_serde.cpp b/be/src/core/data_type_serde/data_type_number_serde.cpp
index 2124547c2f89f1..3c99a53b5b07bf 100644
--- a/be/src/core/data_type_serde/data_type_number_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_number_serde.cpp
@@ -20,6 +20,8 @@
 #include <arrow/builder.h>
 
 #include <cstdint>
+#include <limits>
+#include <type_traits>
 
 #include "common/exception.h"
 #include "common/status.h"
@@ -27,6 +29,7 @@
 #include "core/data_type/define_primitive_type.h"
 #include "core/data_type/primitive_type.h"
 #include "core/data_type_serde/data_type_serde.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/packed_int128.h"
 #include "core/types.h"
 #include "core/value/timestamptz_value.h"
@@ -43,6 +46,137 @@
 #include "util/unaligned.h"
 
 namespace doris {
+namespace {
+
+template <typename NativeType>
+const NativeType* decoded_values_as(const DecodedColumnView& view) {
+    return reinterpret_cast<const NativeType*>(view.values);
+}
+
+template <typename DorisCppType, typename SourceType>
+bool decoded_number_value_fits(SourceType value) {
+    if constexpr (std::is_floating_point_v<DorisCppType>) {
+        return true;
+    } else if constexpr (std::is_same_v<DorisCppType, UInt8>) {
+        return value == SourceType(0) || value == SourceType(1);
+    } else if constexpr (std::is_signed_v<SourceType>) {
+        const auto int128_value = static_cast<Int128>(value);
+        return int128_value >= static_cast<Int128>(std::numeric_limits<DorisCppType>::lowest()) &&
+               int128_value <= static_cast<Int128>(std::numeric_limits<DorisCppType>::max());
+    } else {
+        const auto uint128_value = static_cast<unsigned __int128>(value);
+        if constexpr (std::is_signed_v<DorisCppType>) {
+            return uint128_value <=
+                   static_cast<unsigned __int128>(std::numeric_limits<DorisCppType>::max());
+        } else {
+            return uint128_value <=
+                   static_cast<unsigned __int128>(std::numeric_limits<DorisCppType>::max());
+        }
+    }
+}
+
+template <PrimitiveType DorisType, typename SourceType>
+Status read_number_decoded_values(IColumn& column, const DecodedColumnView& view) {
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data =
+            assert_cast<typename PrimitiveTypeTraits<DorisType>::ColumnType&>(column).get_data();
+    const auto old_size = data.size();
+    const auto* values = decoded_values_as<SourceType>(view);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        using DorisCppType = typename PrimitiveTypeTraits<DorisType>::CppType;
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(DorisCppType());
+            continue;
+        }
+        if (!decoded_number_value_fits<DorisCppType>(values[row])) {
+            if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                continue;
+            }
+            data.resize(old_size);
+            return Status::DataQualityError("Decoded value is out of range for {} at row {}",
+                                            column.get_name(), row);
+        }
+        data.push_back(static_cast<DorisCppType>(values[row]));
+    }
+    return Status::OK();
+}
+
+template <PrimitiveType DorisType, typename SourceType, typename LogicalType>
+Status read_logical_integer_decoded_values_as(IColumn& column, const DecodedColumnView& view) {
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data =
+            assert_cast<typename PrimitiveTypeTraits<DorisType>::ColumnType&>(column).get_data();
+    const auto old_size = data.size();
+    const auto* values = decoded_values_as<SourceType>(view);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        using DorisCppType = typename PrimitiveTypeTraits<DorisType>::CppType;
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(DorisCppType());
+            continue;
+        }
+        const auto logical_value = static_cast<LogicalType>(values[row]);
+        if (!decoded_number_value_fits<DorisCppType>(logical_value)) {
+            if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                continue;
+            }
+            data.resize(old_size);
+            return Status::DataQualityError(
+                    "Decoded logical integer value is out of range for {} at row {}",
+                    column.get_name(), row);
+        }
+        data.push_back(static_cast<DorisCppType>(logical_value));
+    }
+    return Status::OK();
+}
+
+template <PrimitiveType DorisType, typename SourceType>
+Status read_integer_decoded_values(IColumn& column, const DecodedColumnView& view) {
+    if (view.logical_integer_bit_width <= 0) {
+        return read_number_decoded_values<DorisType, SourceType>(column, view);
+    }
+
+    if (view.logical_integer_is_signed) {
+        switch (view.logical_integer_bit_width) {
+        case 8:
+            return read_logical_integer_decoded_values_as<DorisType, SourceType, Int8>(column,
+                                                                                       view);
+        case 16:
+            return read_logical_integer_decoded_values_as<DorisType, SourceType, Int16>(column,
+                                                                                        view);
+        case 32:
+            return read_logical_integer_decoded_values_as<DorisType, SourceType, Int32>(column,
+                                                                                        view);
+        case 64:
+            return read_logical_integer_decoded_values_as<DorisType, SourceType, Int64>(column,
+                                                                                        view);
+        default:
+            return Status::NotSupported("Unsupported decoded logical integer bit width {} for {}",
+                                        view.logical_integer_bit_width, column.get_name());
+        }
+    }
+
+    switch (view.logical_integer_bit_width) {
+    case 8:
+        return read_logical_integer_decoded_values_as<DorisType, SourceType, UInt8>(column, view);
+    case 16:
+        return read_logical_integer_decoded_values_as<DorisType, SourceType, UInt16>(column, view);
+    case 32:
+        return read_logical_integer_decoded_values_as<DorisType, SourceType, UInt32>(column, view);
+    case 64:
+        return read_logical_integer_decoded_values_as<DorisType, SourceType, UInt64>(column, view);
+    default:
+        return Status::NotSupported("Unsupported decoded logical integer bit width {} for {}",
+                                    view.logical_integer_bit_width, column.get_name());
+    }
+}
+
+} // namespace
 // Type map的基本结构
 template <typename Key, typename Value, typename... Rest>
 struct TypeMap {
@@ -157,6 +291,42 @@ Status DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, cons
     return Status::OK();
 }
 
+template <PrimitiveType T>
+Status DataTypeNumberSerDe<T>::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if constexpr (T == TYPE_BOOLEAN) {
+        if (view.value_kind == DecodedValueKind::BOOL) {
+            return read_number_decoded_values<TYPE_BOOLEAN, bool>(column, view);
+        }
+    } else if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT ||
+                         T == TYPE_BIGINT || T == TYPE_LARGEINT) {
+        if (view.value_kind == DecodedValueKind::INT32) {
+            return read_integer_decoded_values<T, int32_t>(column, view);
+        }
+        if (view.value_kind == DecodedValueKind::UINT32) {
+            return read_integer_decoded_values<T, uint32_t>(column, view);
+        }
+        if (view.value_kind == DecodedValueKind::INT64) {
+            return read_integer_decoded_values<T, int64_t>(column, view);
+        }
+        if (view.value_kind == DecodedValueKind::UINT64) {
+            return read_integer_decoded_values<T, uint64_t>(column, view);
+        }
+    } else if constexpr (T == TYPE_FLOAT) {
+        if (view.value_kind == DecodedValueKind::FLOAT) {
+            return read_number_decoded_values<TYPE_FLOAT, float>(column, view);
+        }
+    } else if constexpr (T == TYPE_DOUBLE) {
+        if (view.value_kind == DecodedValueKind::DOUBLE) {
+            return read_number_decoded_values<TYPE_DOUBLE, double>(column, view);
+        }
+    }
+    return decoded_column_view_handle_conversion_failure(
+            column, view,
+            Status::NotSupported("Unsupported decoded values for {} from source kind {}",
+                                 get_name(), static_cast<int>(view.value_kind)));
+}
+
 template <PrimitiveType T>
 Status DataTypeNumberSerDe<T>::deserialize_one_cell_from_json(IColumn& column, Slice& slice,
                                                               const FormatOptions& options) const {
diff --git a/be/src/core/data_type_serde/data_type_number_serde.h b/be/src/core/data_type_serde/data_type_number_serde.h
index b57f9f9d21298d..0e0a3acfc1aed7 100644
--- a/be/src/core/data_type_serde/data_type_number_serde.h
+++ b/be/src/core/data_type_serde/data_type_number_serde.h
@@ -117,6 +117,9 @@ class DataTypeNumberSerDe : public DataTypeSerDe {
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
 
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
+
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override;
diff --git a/be/src/core/data_type_serde/data_type_serde.cpp b/be/src/core/data_type_serde/data_type_serde.cpp
index ac688ae6c307a3..728cafab3469fd 100644
--- a/be/src/core/data_type_serde/data_type_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_serde.cpp
@@ -34,6 +34,54 @@
 namespace doris {
 DataTypeSerDe::~DataTypeSerDe() = default;
 
+bool decoded_column_view_can_null_on_conversion_failure(const DecodedColumnView& view) {
+    return !view.enable_strict_mode && view.conversion_failure_null_map != nullptr;
+}
+
+void decoded_column_view_insert_null_on_conversion_failure(IColumn& column,
+                                                           const DecodedColumnView& view,
+                                                           int64_t row) {
+    DORIS_CHECK(decoded_column_view_can_null_on_conversion_failure(view));
+    DORIS_CHECK(row >= 0);
+    DORIS_CHECK(row < view.row_count);
+    DORIS_CHECK(view.conversion_failure_null_map_offset >= 0);
+    const auto null_map_row = view.conversion_failure_null_map_offset + row;
+    DORIS_CHECK(null_map_row >= 0);
+    DORIS_CHECK(static_cast<size_t>(null_map_row) < view.conversion_failure_null_map->size());
+    column.insert_default();
+    (*view.conversion_failure_null_map)[null_map_row] = 1;
+}
+
+Status decoded_column_view_handle_conversion_failure(IColumn& column, const DecodedColumnView& view,
+                                                     const Status& status) {
+    if (!decoded_column_view_can_null_on_conversion_failure(view)) {
+        return status;
+    }
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+    }
+    return Status::OK();
+}
+
+Status DataTypeSerDe::read_column_from_decoded_values(IColumn& column,
+                                                      const DecodedColumnView& view) const {
+    return decoded_column_view_handle_conversion_failure(
+            column, view,
+            Status::NotSupported("read_column_from_decoded_values is not supported for {}",
+                                 get_name()));
+}
+
+Status DataTypeSerDe::read_field_from_decoded_value(const IDataType& data_type, Field* field,
+                                                    const DecodedColumnView& view) const {
+    DORIS_CHECK(field != nullptr);
+    DORIS_CHECK(view.row_count == 1);
+    auto column = data_type.create_column();
+    RETURN_IF_ERROR(read_column_from_decoded_values(*column, view));
+    DORIS_CHECK(column->size() == 1);
+    column->get(0, *field);
+    return Status::OK();
+}
+
 DataTypeSerDeSPtrs create_data_type_serdes(const DataTypes& types) {
     DataTypeSerDeSPtrs serdes;
     serdes.reserve(types.size());
diff --git a/be/src/core/data_type_serde/data_type_serde.h b/be/src/core/data_type_serde/data_type_serde.h
index eb7ce74fbe7e9c..baab90ea8d2a82 100644
--- a/be/src/core/data_type_serde/data_type_serde.h
+++ b/be/src/core/data_type_serde/data_type_serde.h
@@ -27,6 +27,7 @@
 #include "common/cast_set.h"
 #include "common/status.h"
 #include "core/column/column_nullable.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/field.h"
 #include "core/string_buffer.hpp"
 #include "core/types.h"
@@ -485,6 +486,14 @@ class DataTypeSerDe {
                                           int64_t start, int64_t end,
                                           const cctz::time_zone& ctz) const = 0;
 
+    // Read already decoded column values into a Doris column. The input view is format-neutral:
+    // file readers translate their decoder output into DecodedColumnView, while SerDe owns
+    // the Doris-type-specific materialization into IColumn.
+    virtual Status read_column_from_decoded_values(IColumn& column,
+                                                   const DecodedColumnView& view) const;
+    virtual Status read_field_from_decoded_value(const IDataType& data_type, Field* field,
+                                                 const DecodedColumnView& view) const;
+
     // ORC serializer
     virtual Status write_column_to_orc(const std::string& timezone, const IColumn& column,
                                        const NullMap* null_map,
diff --git a/be/src/core/data_type_serde/data_type_string_serde.cpp b/be/src/core/data_type_serde/data_type_string_serde.cpp
index 8c5bf1664da19c..bba973fe755779 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_string_serde.cpp
@@ -19,11 +19,40 @@
 
 #include "core/column/column_string.h"
 #include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "util/jsonb_document_cast.h"
 #include "util/jsonb_utils.h"
 #include "util/jsonb_writer.h"
 
 namespace doris {
+namespace {
+
+template <typename ColumnType>
+Status read_string_decoded_values(IColumn& column, const DecodedColumnView& view) {
+    if (view.binary_values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded binary values are null for {}", column.get_name());
+    }
+    auto& string_column = assert_cast<ColumnType&>(column);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            string_column.insert_default();
+            continue;
+        }
+        const auto& value = (*view.binary_values)[row];
+        if (value.data == nullptr && value.size > 0) {
+            if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                continue;
+            }
+            return Status::Corruption("Decoded string binary value is null for {} at row {}",
+                                      column.get_name(), row);
+        }
+        string_column.insert_data(value.data, value.size);
+    }
+    return Status::OK();
+}
+
+} // namespace
 
 template <typename ColumnType>
 Status DataTypeStringSerDeBase<ColumnType>::serialize_column_to_json(const IColumn& column,
@@ -312,6 +341,19 @@ Status DataTypeStringSerDeBase<ColumnType>::read_column_from_arrow(
     return Status::OK();
 }
 
+template <typename ColumnType>
+Status DataTypeStringSerDeBase<ColumnType>::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::BINARY &&
+        view.value_kind != DecodedValueKind::FIXED_BINARY) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view,
+                Status::NotSupported("Unsupported decoded values for {} from source kind {}",
+                                     get_name(), static_cast<int>(view.value_kind)));
+    }
+    return read_string_decoded_values<ColumnType>(column, view);
+}
+
 template <typename ColumnType>
 Status DataTypeStringSerDeBase<ColumnType>::write_column_to_orc(
         const std::string& timezone, const IColumn& column, const NullMap* null_map,
diff --git a/be/src/core/data_type_serde/data_type_string_serde.h b/be/src/core/data_type_serde/data_type_string_serde.h
index 79c8450835d39c..81b80eab4a5cbf 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.h
+++ b/be/src/core/data_type_serde/data_type_string_serde.h
@@ -203,6 +203,9 @@ class DataTypeStringSerDeBase : public DataTypeSerDe {
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
 
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
+
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& result,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override {
diff --git a/be/src/core/data_type_serde/data_type_time_serde.cpp b/be/src/core/data_type_serde/data_type_time_serde.cpp
index e57fd08a271339..c40e671793c848 100644
--- a/be/src/core/data_type_serde/data_type_time_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_time_serde.cpp
@@ -20,11 +20,38 @@
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/value/time_value.h"
 #include "exprs/function/cast/cast_base.h"
 #include "exprs/function/cast/cast_to_time_impl.hpp"
 
 namespace doris {
+namespace {
+
+TimeValue::TimeType read_time_decoded_value(const DecodedColumnView& view, int64_t row) {
+    int64_t micros = 0;
+    if (view.value_kind == DecodedValueKind::INT32) {
+        const auto* values = reinterpret_cast<const int32_t*>(view.values);
+        micros = static_cast<int64_t>(values[row]) * 1000;
+    } else {
+        const auto* values = reinterpret_cast<const int64_t*>(view.values);
+        micros = values[row];
+        if (view.time_unit == DecodedTimeUnit::MILLIS) {
+            micros *= 1000;
+        } else if (view.time_unit == DecodedTimeUnit::NANOS) {
+            micros /= 1000;
+        }
+    }
+    const bool negative = micros < 0;
+    const int64_t abs_micros = std::abs(micros);
+    return TimeValue::make_time(
+            abs_micros / TimeValue::ONE_HOUR_MICROSECONDS,
+            (abs_micros % TimeValue::ONE_HOUR_MICROSECONDS) / TimeValue::ONE_MINUTE_MICROSECONDS,
+            (abs_micros % TimeValue::ONE_MINUTE_MICROSECONDS) / TimeValue::ONE_SECOND_MICROSECONDS,
+            abs_micros % TimeValue::ONE_SECOND_MICROSECONDS, negative);
+}
+
+} // namespace
 
 Status DataTypeTimeV2SerDe::write_column_to_mysql_binary(const IColumn& column,
                                                          MysqlRowBinaryBuffer& result,
@@ -145,6 +172,27 @@ Status DataTypeTimeV2SerDe::from_string_strict_mode(StringRef& str, IColumn& col
     return Status::OK();
 }
 
+Status DataTypeTimeV2SerDe::read_column_from_decoded_values(IColumn& column,
+                                                            const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::INT32 && view.value_kind != DecodedValueKind::INT64) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view,
+                Status::NotSupported("TIMEV2 decoded reader expects INT32 or INT64 source"));
+    }
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data = assert_cast<ColumnTimeV2&>(column).get_data();
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(TimeValue::TimeType());
+            continue;
+        }
+        data.push_back(read_time_decoded_value(view, row));
+    }
+    return Status::OK();
+}
+
 template <typename IntDataType>
 Status DataTypeTimeV2SerDe::from_int_batch(const typename IntDataType::ColumnType& int_col,
                                            ColumnNullable& target_col) const {
diff --git a/be/src/core/data_type_serde/data_type_time_serde.h b/be/src/core/data_type_serde/data_type_time_serde.h
index db703616b497cf..e3fccf379c913a 100644
--- a/be/src/core/data_type_serde/data_type_time_serde.h
+++ b/be/src/core/data_type_serde/data_type_time_serde.h
@@ -67,6 +67,8 @@ class DataTypeTimeV2SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_TIMEV
     template <typename DecimalDataType>
     Status from_decimal_strict_mode_batch(const typename DecimalDataType::ColumnType& decimal_col,
                                           IColumn& target_col) const;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
     int get_scale() const override { return _scale; }
 
 protected:
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
index e8c26f6db68e75..abc8b86700023a 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
@@ -18,14 +18,64 @@
 #include "core/data_type_serde/data_type_timestamptz_serde.h"
 
 #include <arrow/builder.h>
+#include <cctz/time_zone.h>
 
 #include "core/data_type/primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/value/timestamptz_value.h"
 #include "exprs/function/cast/cast_parameters.h"
 #include "exprs/function/cast/cast_to_string.h"
 #include "exprs/function/cast/cast_to_timestamptz.h"
 namespace doris {
 
+namespace {
+
+#pragma pack(1)
+struct DecodedInt96Timestamp {
+    int64_t nanos_of_day;
+    int32_t julian_day;
+
+    int64_t to_timestamp_micros() const {
+        static constexpr int32_t JULIAN_EPOCH_OFFSET_DAYS = 2440588;
+        static constexpr int64_t MICROS_IN_DAY = 86400000000;
+        static constexpr int64_t NANOS_PER_MICROSECOND = 1000;
+        return (julian_day - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY +
+               nanos_of_day / NANOS_PER_MICROSECOND;
+    }
+};
+#pragma pack()
+static_assert(sizeof(DecodedInt96Timestamp) == 12);
+
+void append_timestamptz_from_utc_epoch_micros(ColumnTimeStampTz::Container& data,
+                                              int64_t timestamp_micros) {
+    static constexpr int64_t MICROS_PER_SECOND = 1000000;
+    static const auto UTC = cctz::utc_time_zone();
+
+    int64_t epoch_seconds = timestamp_micros / MICROS_PER_SECOND;
+    int64_t micros_of_second = timestamp_micros % MICROS_PER_SECOND;
+    if (micros_of_second < 0) {
+        micros_of_second += MICROS_PER_SECOND;
+        --epoch_seconds;
+    }
+
+    TimestampTzValue timestamp_tz;
+    timestamp_tz.from_unixtime(epoch_seconds, UTC);
+    timestamp_tz.set_microsecond(static_cast<uint32_t>(micros_of_second));
+    data.push_back(timestamp_tz);
+}
+
+int64_t decoded_timestamp_micros(const DecodedColumnView& view, int64_t value) {
+    if (view.time_unit == DecodedTimeUnit::MILLIS) {
+        return value * 1000;
+    }
+    if (view.time_unit == DecodedTimeUnit::NANOS) {
+        return value / 1000;
+    }
+    return value;
+}
+
+} // namespace
+
 // The implementation of these functions mainly refers to data_type_datetimev2_serde.cpp
 
 Status DataTypeTimeStampTzSerDe::from_string(StringRef& str, IColumn& column,
@@ -246,6 +296,41 @@ Status DataTypeTimeStampTzSerDe::write_column_to_orc(const std::string& timezone
     return Status::OK();
 }
 
+Status DataTypeTimeStampTzSerDe::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::INT64 && view.value_kind != DecodedValueKind::INT96) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view,
+                Status::NotSupported("TIMESTAMPTZ decoded reader expects INT64 or INT96 source"));
+    }
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+
+    auto& data = assert_cast<ColumnTimeStampTz&>(column).get_data();
+    if (view.value_kind == DecodedValueKind::INT96) {
+        const auto* values = reinterpret_cast<const DecodedInt96Timestamp*>(view.values);
+        for (int64_t row = 0; row < view.row_count; ++row) {
+            if (decoded_column_view_row_is_null(view, row)) {
+                data.push_back(TimestampTzValue());
+                continue;
+            }
+            append_timestamptz_from_utc_epoch_micros(data, values[row].to_timestamp_micros());
+        }
+        return Status::OK();
+    }
+
+    const auto* values = reinterpret_cast<const int64_t*>(view.values);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(TimestampTzValue());
+            continue;
+        }
+        append_timestamptz_from_utc_epoch_micros(data, decoded_timestamp_micros(view, values[row]));
+    }
+    return Status::OK();
+}
+
 std::string DataTypeTimeStampTzSerDe::to_olap_string(const Field& field) const {
     return CastToString::from_timestamptz(field.get<TYPE_TIMESTAMPTZ>(), 6);
 }
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.h b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
index 0a595935d8fdd6..133e37fed33b03 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
@@ -22,6 +22,7 @@
 #include <cstdint>
 
 #include "core/data_type_serde/data_type_number_serde.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/types.h"
 #include "core/value/time_value.h"
 
@@ -72,6 +73,9 @@ class DataTypeTimeStampTzSerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_
                                int64_t start, int64_t end, Arena& arena,
                                const FormatOptions& options) const override;
 
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
+
     // Override needed: paired reader skips a scale byte; the inherited number-serde writer omits it.
     void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
                                   int64_t row_num) const override;
diff --git a/be/src/core/data_type_serde/decoded_column_view.h b/be/src/core/data_type_serde/decoded_column_view.h
new file mode 100644
index 00000000000000..9f40f1513dbf93
--- /dev/null
+++ b/be/src/core/data_type_serde/decoded_column_view.h
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "common/status.h"
+#include "core/column/column_nullable.h"
+#include "core/string_ref.h"
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris {
+
+class IColumn;
+
+// 已解码 column batch 的物理值来源类型。
+// 该枚举只描述通用内存布局，不包含 Parquet/ORC/Arrow 等格式专有类型。
+enum class DecodedValueKind {
+    BOOL,
+    INT32,
+    UINT32,
+    INT64,
+    UINT64,
+    INT96,
+    FLOAT,
+    DOUBLE,
+    BINARY,
+    FIXED_BINARY,
+};
+
+enum class DecodedTimeUnit {
+    UNKNOWN,
+    MILLIS,
+    MICROS,
+    NANOS,
+};
+
+struct DecodedColumnView {
+    DecodedValueKind value_kind = DecodedValueKind::INT32;
+    DecodedTimeUnit time_unit = DecodedTimeUnit::UNKNOWN;
+    int64_t row_count = 0;
+    // Optional logical integer annotation. value_kind still describes the physical buffer layout.
+    int logical_integer_bit_width = -1;
+    int decimal_precision = -1;
+    int decimal_scale = -1;
+    int fixed_length = -1;
+    bool logical_integer_is_signed = true;
+    bool timestamp_is_adjusted_to_utc = false;
+    const uint8_t* values = nullptr;
+    const uint8_t* null_map = nullptr;
+    const std::vector<StringRef>* binary_values = nullptr;
+    const cctz::time_zone* timezone = nullptr;
+    bool enable_strict_mode = false;
+    NullMap* conversion_failure_null_map = nullptr;
+    int64_t conversion_failure_null_map_offset = 0;
+};
+
+inline bool decoded_column_view_row_is_null(const DecodedColumnView& view, int64_t row) {
+    return view.null_map != nullptr && view.null_map[row] != 0;
+}
+
+inline bool decoded_column_view_has_non_null_value(const DecodedColumnView& view) {
+    if (view.null_map == nullptr) {
+        return view.row_count > 0;
+    }
+
+    // TODO(gabriel): optimize null map check with SIMD or bitset if needed.
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (view.null_map[row] == 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool decoded_column_view_can_null_on_conversion_failure(const DecodedColumnView& view);
+
+void decoded_column_view_insert_null_on_conversion_failure(IColumn& column,
+                                                           const DecodedColumnView& view,
+                                                           int64_t row);
+
+Status decoded_column_view_handle_conversion_failure(IColumn& column, const DecodedColumnView& view,
+                                                     const Status& status);
+
+} // namespace doris
diff --git a/be/src/exec/operator/file_scan_operator.cpp b/be/src/exec/operator/file_scan_operator.cpp
index 2a87f413a15bd6..d4035d37e27106 100644
--- a/be/src/exec/operator/file_scan_operator.cpp
+++ b/be/src/exec/operator/file_scan_operator.cpp
@@ -24,6 +24,7 @@
 #include "exec/operator/olap_scan_operator.h"
 #include "exec/operator/scan_operator.h"
 #include "exec/scan/file_scanner.h"
+#include "exec/scan/file_scanner_v2.h"
 #include "exec/scan/scanner_context.h"
 #include "format/format_common.h"
 #include "storage/storage_engine.h"
@@ -119,10 +120,32 @@ Status FileScanLocalState::_init_scanners(std::list<ScannerSPtr>* scanners) {
                      _max_scanners);
     shard_num = std::max(shard_num, 1U);
     _kv_cache = std::make_unique<ShardedKVCache>(shard_num);
+    const TFileScanRangeParams* scan_params = nullptr;
+    if (state()->get_query_ctx() != nullptr &&
+        state()->get_query_ctx()->file_scan_range_params_map.count(parent_id()) > 0) {
+        scan_params = &state()->get_query_ctx()->file_scan_range_params_map[parent_id()];
+    } else {
+        scan_params = _split_source->get_params();
+    }
+    const bool is_load =
+            state()->desc_tbl().get_tuple_descriptor(scan_params->src_tuple_id) != nullptr;
+    // TODO: Use scanner v2 for all queries.
+    const bool use_file_scanner_v2 =
+            state()->query_options().__isset.enable_file_scanner_v2 &&
+            state()->query_options().enable_file_scanner_v2 && !is_load &&
+            _split_source->all_scan_ranges_match(*scan_params, FileScannerV2::is_supported);
+    _operator_profile->add_info_string("UseScannerV2", use_file_scanner_v2 ? "true" : "false");
     for (int i = 0; i < _max_scanners; ++i) {
-        std::unique_ptr<FileScanner> scanner = FileScanner::create_unique(
-                state(), this, p._limit, _split_source, _scanner_profile.get(), _kv_cache.get(),
-                &p._colname_to_slot_id);
+        ScannerSPtr scanner;
+        if (use_file_scanner_v2) {
+            scanner = FileScannerV2::create_shared(state(), this, p._limit, _split_source,
+                                                   _scanner_profile.get(), _kv_cache.get(),
+                                                   &p._colname_to_slot_id);
+        } else {
+            scanner = FileScanner::create_shared(state(), this, p._limit, _split_source,
+                                                 _scanner_profile.get(), _kv_cache.get(),
+                                                 &p._colname_to_slot_id);
+        }
         RETURN_IF_ERROR(scanner->init(state(), _conjuncts));
         scanners->push_back(std::move(scanner));
     }
diff --git a/be/src/exec/operator/file_scan_operator.h b/be/src/exec/operator/file_scan_operator.h
index d4e31195a4459a..c47488fa357c77 100644
--- a/be/src/exec/operator/file_scan_operator.h
+++ b/be/src/exec/operator/file_scan_operator.h
@@ -29,6 +29,7 @@
 
 namespace doris {
 class FileScanner;
+class FileScannerV2;
 } // namespace doris
 
 namespace doris {
@@ -56,6 +57,7 @@ class FileScanLocalState final : public ScanLocalState<FileScanLocalState> {
 
 private:
     friend class FileScanner;
+    friend class FileScannerV2;
     PushDownType _should_push_down_bloom_filter() const override {
         return PushDownType::UNACCEPTABLE;
     }
diff --git a/be/src/exec/operator/result_sink_operator.h b/be/src/exec/operator/result_sink_operator.h
index 4ead2985d85162..7ff4b18c9b2817 100644
--- a/be/src/exec/operator/result_sink_operator.h
+++ b/be/src/exec/operator/result_sink_operator.h
@@ -45,7 +45,7 @@ struct ResultFileOptions {
     TParquetCompressionType::type parquet_commpression_type;
     TParquetVersion::type parquet_version;
     bool parquert_disable_dictionary = false;
-    bool enable_int96_timestamps = false;
+    bool enable_int96_timestamps = true;
     //note: use outfile with parquet format, have deprecated 9:schema and 10:file_properties
     //But in order to consider the compatibility when upgrading, so add a bool to check
     //Now the code version is 1.1.2, so when the version is after 1.2, could remove this code.
diff --git a/be/src/exec/scan/access_path_parser.cpp b/be/src/exec/scan/access_path_parser.cpp
new file mode 100644
index 00000000000000..b215212b6d861b
--- /dev/null
+++ b/be/src/exec/scan/access_path_parser.cpp
@@ -0,0 +1,479 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/scan/access_path_parser.h"
+
+#include <fmt/format.h>
+
+#include <algorithm>
+#include <charconv>
+#include <map>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "runtime/descriptors.h"
+#include "util/string_util.h"
+
+namespace doris {
+namespace {
+
+bool is_scanner_materialized_virtual_column(const std::string& column_name) {
+    return column_name == BeConsts::ICEBERG_ROWID_COL;
+}
+
+bool parse_non_negative_int(std::string_view value, int32_t* result) {
+    DORIS_CHECK(result != nullptr);
+    int32_t parsed = -1;
+    const auto* begin = value.data();
+    const auto* end = begin + value.size();
+    const auto [ptr, ec] = std::from_chars(begin, end, parsed);
+    if (ec != std::errc() || ptr != end || parsed < 0) {
+        return false;
+    }
+    *result = parsed;
+    return true;
+}
+
+std::string access_path_to_string(const std::vector<std::string>& path) {
+    return fmt::format("{}", fmt::join(path, "."));
+}
+
+format::ColumnDefinition* find_or_add_child(format::ColumnDefinition* parent, int32_t id,
+                                            std::string name, DataTypePtr type) {
+    DORIS_CHECK(parent != nullptr);
+    for (auto& child : parent->children) {
+        if ((child.has_identifier_field_id() && child.get_identifier_field_id() == id) ||
+            child.name == name) {
+            return &child;
+        }
+    }
+    parent->children.push_back({
+            .identifier = Field::create_field<TYPE_INT>(id),
+            .name = std::move(name),
+            .type = std::move(type),
+            .children = {},
+            .default_expr = nullptr,
+            .is_partition_key = false,
+    });
+    return &parent->children.back();
+}
+
+void inherit_schema_metadata(format::ColumnDefinition* column,
+                             const format::ColumnDefinition* schema_column) {
+    if (column == nullptr || schema_column == nullptr) {
+        return;
+    }
+    column->name_mapping = schema_column->name_mapping;
+}
+
+const format::ColumnDefinition* find_schema_child_by_path(
+        const format::ColumnDefinition* schema_column, const std::string& child_path) {
+    if (schema_column == nullptr) {
+        return nullptr;
+    }
+    int32_t parsed_field_id = -1;
+    if (parse_non_negative_int(child_path, &parsed_field_id)) {
+        const auto child_it = std::ranges::find_if(
+                schema_column->children, [&](const format::ColumnDefinition& child) {
+                    return child.has_identifier_field_id() &&
+                           child.get_identifier_field_id() == parsed_field_id;
+                });
+        return child_it == schema_column->children.end() ? nullptr : &*child_it;
+    }
+    const auto child_it = std::ranges::find_if(schema_column->children, [&](const auto& child) {
+        if (to_lower(child.name) == to_lower(child_path)) {
+            return true;
+        }
+        return std::ranges::any_of(child.name_mapping, [&](const std::string& alias) {
+            return to_lower(alias) == to_lower(child_path);
+        });
+    });
+    return child_it == schema_column->children.end() ? nullptr : &*child_it;
+}
+
+int32_t schema_field_id(const format::ColumnDefinition* schema_column) {
+    if (schema_column == nullptr || !schema_column->has_identifier_field_id()) {
+        return -1;
+    }
+    return schema_column->get_identifier_field_id();
+}
+
+int32_t schema_field_id_or(const format::ColumnDefinition* schema_column, int32_t fallback) {
+    const auto field_id = schema_field_id(schema_column);
+    return field_id >= 0 ? field_id : fallback;
+}
+
+std::string schema_field_name_or(const format::ColumnDefinition* schema_column,
+                                 std::string fallback) {
+    return schema_column == nullptr || schema_column->name.empty() ? std::move(fallback)
+                                                                   : schema_column->name;
+}
+
+struct AccessPathNode {
+    bool project_all = false;
+    std::map<std::string, AccessPathNode> children;
+};
+
+void merge_access_path_node(AccessPathNode* dst, const AccessPathNode& src) {
+    DORIS_CHECK(dst != nullptr);
+    if (dst->project_all) {
+        return;
+    }
+    if (src.project_all) {
+        dst->project_all = true;
+        dst->children.clear();
+        return;
+    }
+    for (const auto& [path, child] : src.children) {
+        merge_access_path_node(&dst->children[path], child);
+    }
+}
+
+void insert_access_path(AccessPathNode* root, const std::vector<std::string>& path,
+                        size_t path_idx) {
+    DORIS_CHECK(root != nullptr);
+    if (root->project_all) {
+        return;
+    }
+    if (path_idx >= path.size()) {
+        root->project_all = true;
+        root->children.clear();
+        return;
+    }
+    insert_access_path(&root->children[path[path_idx]], path, path_idx + 1);
+}
+
+Status build_nested_children_from_access_node(format::ColumnDefinition* column,
+                                              const DataTypePtr& type, const AccessPathNode& node,
+                                              const std::string& path,
+                                              const format::ColumnDefinition* schema_column);
+
+// Expand a full complex-column projection into table-schema children when the table format provides
+// an external/current schema. Without this, `SELECT complex_col` or `SELECT *` leaves
+// ColumnDefinition::children empty, so ColumnMapper treats the root complex column as a scalar
+// mapping and later tries to cast the old file shape to the current table shape directly.
+//
+// Examples:
+//   - STRUCT country/city projected from an old file STRUCT country/population/location should
+//     create children country and city, so city can be materialized as missing/default.
+//   - ARRAY<STRUCT<item, quantity>> should create the array element wrapper and then the element
+//     struct children item and quantity.
+//   - MAP<STRING, STRUCT<full_name, age>> should create semantic children key/value directly, then
+//     expand the value struct children full_name and age. Do not introduce a physical entries
+//     wrapper here: ColumnMapper and TableReader treat MAP children as [key, value].
+Status build_all_nested_children_from_schema(format::ColumnDefinition* column,
+                                             const DataTypePtr& type, const std::string& path,
+                                             const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+
+    const auto nested_type = remove_nullable(type);
+    AccessPathNode project_all;
+    project_all.project_all = true;
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_STRUCT: {
+        const auto& struct_type = assert_cast<const DataTypeStruct&>(*nested_type);
+        for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) {
+            const auto field_name = struct_type.get_element_name(field_idx);
+            const auto* schema_child = find_schema_child_by_path(schema_column, field_name);
+            auto* child = find_or_add_child(
+                    column, schema_field_id_or(schema_child, cast_set<int32_t>(field_idx)),
+                    schema_field_name_or(schema_child, field_name),
+                    struct_type.get_element(field_idx));
+            inherit_schema_metadata(child, schema_child);
+            RETURN_IF_ERROR(build_nested_children_from_access_node(
+                    child, child->type, project_all, path + "." + child->name, schema_child));
+        }
+        return Status::OK();
+    }
+    case TYPE_ARRAY: {
+        const auto& array_type = assert_cast<const DataTypeArray&>(*nested_type);
+        const auto* element_schema = schema_column != nullptr && !schema_column->children.empty()
+                                             ? &schema_column->children[0]
+                                             : nullptr;
+        auto* child = find_or_add_child(column, schema_field_id_or(element_schema, 0), "element",
+                                        array_type.get_nested_type());
+        inherit_schema_metadata(child, element_schema);
+        return build_nested_children_from_access_node(child, child->type, project_all, path + ".*",
+                                                      element_schema);
+    }
+    case TYPE_MAP: {
+        const auto& map_type = assert_cast<const DataTypeMap&>(*nested_type);
+        const auto* key_schema = schema_column != nullptr && !schema_column->children.empty()
+                                         ? &schema_column->children[0]
+                                         : nullptr;
+        const auto* value_schema = schema_column != nullptr && schema_column->children.size() > 1
+                                           ? &schema_column->children[1]
+                                           : nullptr;
+        auto* key_child = find_or_add_child(column, schema_field_id_or(key_schema, 0), "key",
+                                            map_type.get_key_type());
+        inherit_schema_metadata(key_child, key_schema);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(
+                key_child, key_child->type, project_all, path + ".KEYS", key_schema));
+        auto* value_child = find_or_add_child(column, schema_field_id_or(value_schema, 1), "value",
+                                              map_type.get_value_type());
+        inherit_schema_metadata(value_child, value_schema);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(
+                value_child, value_child->type, project_all, path + ".VALUES", value_schema));
+        return Status::OK();
+    }
+    default:
+        return Status::OK();
+    }
+}
+
+Status build_struct_children_from_access_node(format::ColumnDefinition* column,
+                                              const DataTypeStruct& struct_type,
+                                              const AccessPathNode& node, const std::string& path,
+                                              const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    for (const auto& [child_path, child_node] : node.children) {
+        // Struct children are resolved by name or schema field id. We do not treat a numeric
+        // child token as a struct ordinal, because `col.0` becomes ambiguous once the struct
+        // evolves. Position-based access needs a separate design if it is required later.
+        if (child_path == "OFFSET" || child_path == "*" || child_path == "KEYS" ||
+            child_path == "VALUES") {
+            return Status::NotSupported(
+                    "AccessPathParser does not support access path {} for slot {}",
+                    path + "." + child_path, column->name);
+        }
+
+        // Prefer the table/schema ColumnDefinition because it carries field ids and aliases.
+        // Fallback to the struct type name only for formats without external schema metadata.
+        const auto* schema_child = find_schema_child_by_path(schema_column, child_path);
+        int32_t field_id = schema_field_id(schema_child);
+        std::string field_name = schema_child == nullptr ? child_path : schema_child->name;
+        DataTypePtr field_type = schema_child == nullptr ? nullptr : schema_child->type;
+        if (field_id < 0 || field_type == nullptr) {
+            for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) {
+                if (to_lower(struct_type.get_element_name(field_idx)) == to_lower(field_name)) {
+                    field_id = cast_set<int32_t>(field_idx);
+                    field_name = struct_type.get_element_name(field_idx);
+                    field_type = struct_type.get_element(field_idx);
+                    break;
+                }
+            }
+        }
+
+        if (field_id < 0 || field_type == nullptr) {
+            return Status::NotSupported(
+                    "AccessPathParser does not support access path {} for slot {}",
+                    path + "." + child_path, column->name);
+        }
+        // TODO: For TVF Parquet files without field ids, this fallback uses the struct ordinal as
+        // the table child identifier. BY_NAME mapping should instead keep a string identifier and
+        // let TableColumnMapper resolve the file-local child id from the Parquet schema.
+        auto* child = find_or_add_child(column, field_id, field_name, field_type);
+        inherit_schema_metadata(child, schema_child);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(
+                child, child->type, child_node, path + "." + child_path, schema_child));
+    }
+    return Status::OK();
+}
+
+Status build_map_children_from_access_node(format::ColumnDefinition* column,
+                                           const DataTypeMap& map_type, const AccessPathNode& node,
+                                           const std::string& path,
+                                           const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    AccessPathNode key_node;
+    AccessPathNode value_node;
+    bool need_key = false;
+    bool need_value = false;
+
+    for (const auto& [child_path, child_node] : node.children) {
+        if (child_path == "OFFSET") {
+            return Status::NotSupported(
+                    "AccessPathParser does not support access path {} for slot {}",
+                    path + "." + child_path, column->name);
+        }
+        if (child_path == "KEYS") {
+            need_key = true;
+            merge_access_path_node(&key_node, child_node);
+            continue;
+        }
+        if (child_path == "VALUES") {
+            need_key = true;
+            key_node.project_all = true;
+            key_node.children.clear();
+            need_value = true;
+            merge_access_path_node(&value_node, child_node);
+            continue;
+        }
+        if (child_path == "*") {
+            need_key = true;
+            key_node.project_all = true;
+            key_node.children.clear();
+            need_value = true;
+            merge_access_path_node(&value_node, child_node);
+            continue;
+        }
+        return Status::NotSupported("AccessPathParser does not support access path {} for slot {}",
+                                    path + "." + child_path, column->name);
+    }
+    if (need_key && !need_value) {
+        // A key-only MAP projection is not independently materializable yet. FileScannerV2 can
+        // describe a projection such as `m.KEYS`, but the downstream file block -> table block path
+        // still builds a ColumnMap from key column + value column + offsets. If the value child is
+        // omitted here, TableReader/ColumnMapper cannot reconstruct a valid table MAP column even
+        // though the query only needs keys.
+        //
+        // Example:
+        //   SELECT map_keys(m) FROM t;
+        // or
+        //   SELECT * FROM t WHERE array_contains(map_keys(m), 'k1');
+        //
+        // The access path only asks for `m.KEYS`, but the scan still has to read `m.VALUES` as a
+        // temporary full projection until map materialization supports constructing a table MAP
+        // from keys only.
+        need_value = true;
+        value_node.project_all = true;
+        value_node.children.clear();
+    }
+
+    if (!need_key && !need_value) {
+        return Status::OK();
+    }
+
+    const auto* key_schema = schema_column != nullptr && !schema_column->children.empty()
+                                     ? &schema_column->children[0]
+                                     : nullptr;
+    const auto* value_schema = schema_column != nullptr && schema_column->children.size() > 1
+                                       ? &schema_column->children[1]
+                                       : nullptr;
+    if (need_key) {
+        auto* key_child = find_or_add_child(column, schema_field_id_or(key_schema, 0), "key",
+                                            map_type.get_key_type());
+        inherit_schema_metadata(key_child, key_schema);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(key_child, key_child->type, key_node,
+                                                               path + ".KEYS", key_schema));
+    }
+    if (need_value) {
+        auto* value_child = find_or_add_child(column, schema_field_id_or(value_schema, 1), "value",
+                                              map_type.get_value_type());
+        inherit_schema_metadata(value_child, value_schema);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(
+                value_child, value_child->type, value_node, path + ".VALUES", value_schema));
+    }
+    return Status::OK();
+}
+
+Status build_nested_children_from_access_node(format::ColumnDefinition* column,
+                                              const DataTypePtr& type, const AccessPathNode& node,
+                                              const std::string& path,
+                                              const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    if (node.project_all || node.children.empty()) {
+        return build_all_nested_children_from_schema(column, type, path, schema_column);
+    }
+
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_STRUCT:
+        return build_struct_children_from_access_node(
+                column, assert_cast<const DataTypeStruct&>(*nested_type), node, path,
+                schema_column);
+    case TYPE_ARRAY: {
+        if (node.children.size() != 1 || !node.children.contains("*")) {
+            return Status::NotSupported(
+                    "AccessPathParser does not support access path {} for slot {}", path,
+                    column->name);
+        }
+        const auto& array_type = assert_cast<const DataTypeArray&>(*nested_type);
+        const auto* element_schema = schema_column != nullptr && !schema_column->children.empty()
+                                             ? &schema_column->children[0]
+                                             : nullptr;
+        auto* child = find_or_add_child(column, schema_field_id_or(element_schema, 0), "element",
+                                        array_type.get_nested_type());
+        inherit_schema_metadata(child, element_schema);
+        return build_nested_children_from_access_node(child, child->type, node.children.at("*"),
+                                                      path + ".*", element_schema);
+    }
+    case TYPE_MAP:
+        return build_map_children_from_access_node(
+                column, assert_cast<const DataTypeMap&>(*nested_type), node, path, schema_column);
+    default:
+        return Status::NotSupported("AccessPathParser does not support access path {} for slot {}",
+                                    path, column->name);
+    }
+}
+
+} // namespace
+
+Status AccessPathParser::build_nested_children(format::ColumnDefinition* column,
+                                               const std::vector<TColumnAccessPath>& access_paths,
+                                               const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    if (is_scanner_materialized_virtual_column(column->name)) {
+        return Status::OK();
+    }
+    if (!is_complex_type(remove_nullable(column->type)->get_primitive_type())) {
+        return Status::OK();
+    }
+
+    AccessPathNode root;
+    // Build tree for AccessPathNode.
+    // For example, for access paths ["a.b", "a.c", "d"], the tree will be:
+    // root
+    // ├── a
+    // │   ├── b
+    // │   └── c
+    // └── d
+    for (const auto& access_path : access_paths) {
+        // TODO: Support META access paths if needed. Currently AccessPathParser only supports
+        // DATA access paths.
+        if (access_path.type != TAccessPathType::DATA || !access_path.__isset.data_access_path) {
+            return Status::NotSupported(
+                    "AccessPathParser only supports DATA access paths for slot {}", column->name);
+        }
+        const auto& path = access_path.data_access_path.path;
+        if (path.empty()) {
+            insert_access_path(&root, path, 0);
+            continue;
+        }
+        int32_t top_level_id = -1;
+        if (to_lower(path.front()) != to_lower(column->name) &&
+            (!parse_non_negative_int(path.front(), &top_level_id) ||
+             !column->has_identifier_field_id() ||
+             top_level_id != column->get_identifier_field_id())) {
+            return Status::NotSupported("AccessPathParser access path {} does not match slot {}",
+                                        access_path_to_string(path), column->name);
+        }
+        insert_access_path(&root, path, 1);
+    }
+    // Recursively build nested children for the column based on the AccessPathNode tree.
+    return build_nested_children_from_access_node(column, column->type, root, column->name,
+                                                  schema_column);
+}
+
+Status AccessPathParser::build_nested_children(format::ColumnDefinition* column,
+                                               const SlotDescriptor* slot_desc,
+                                               const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    DORIS_CHECK(slot_desc != nullptr);
+    return build_nested_children(column, slot_desc->all_access_paths(), schema_column);
+}
+
+} // namespace doris
diff --git a/be/src/exec/scan/access_path_parser.h b/be/src/exec/scan/access_path_parser.h
new file mode 100644
index 00000000000000..1aa4c5b89d492a
--- /dev/null
+++ b/be/src/exec/scan/access_path_parser.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris {
+
+class SlotDescriptor;
+
+class AccessPathParser {
+public:
+    static Status build_nested_children(format::ColumnDefinition* column,
+                                        const SlotDescriptor* slot_desc,
+                                        const format::ColumnDefinition* schema_column);
+
+    static Status build_nested_children(format::ColumnDefinition* column,
+                                        const std::vector<TColumnAccessPath>& access_paths,
+                                        const format::ColumnDefinition* schema_column);
+};
+
+} // namespace doris
diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp
index 6419ce4f65c5e2..6811efcdd5da6e 100644
--- a/be/src/exec/scan/file_scanner.cpp
+++ b/be/src/exec/scan/file_scanner.cpp
@@ -1078,8 +1078,31 @@ Status FileScanner::_get_next_reader() {
                 _cur_reader = std::move(mc_reader);
             } else if (range.__isset.table_format_params &&
                        range.table_format_params.table_format_type == "paimon") {
-                if (_state->query_options().__isset.enable_paimon_cpp_reader &&
-                    _state->query_options().enable_paimon_cpp_reader) {
+                const auto& paimon_params = range.table_format_params.paimon_params;
+                bool use_paimon_cpp_reader = false;
+                if (paimon_params.__isset.reader_type) {
+                    switch (paimon_params.reader_type) {
+                    case TPaimonReaderType::PAIMON_CPP:
+                        use_paimon_cpp_reader = true;
+                        break;
+                    case TPaimonReaderType::PAIMON_JNI:
+                        break;
+                    case TPaimonReaderType::PAIMON_NATIVE:
+                        return Status::InternalError(
+                                "invalid PAIMON_NATIVE reader_type for paimon FORMAT_JNI split, "
+                                "possibly caused by FE/BE protocol mismatch");
+                    default:
+                        return Status::InternalError(
+                                "unknown paimon reader_type for paimon FORMAT_JNI split, possibly "
+                                "caused by FE/BE protocol mismatch");
+                    }
+                } else {
+                    // TODO: Remove this fallback after all FE versions set TPaimonReaderType.
+                    use_paimon_cpp_reader =
+                            _state->query_options().__isset.enable_paimon_cpp_reader &&
+                            _state->query_options().enable_paimon_cpp_reader;
+                }
+                if (use_paimon_cpp_reader) {
                     auto cpp_reader = PaimonCppReader::create_unique(_file_slot_descs, _state,
                                                                      _profile, range, _params);
                     if (!_is_load && !_push_down_conjuncts.empty()) {
@@ -1771,7 +1794,6 @@ Status FileScanner::_init_expr_ctxes() {
         if (is_file_slot) {
             _is_file_slot.emplace(slot_id);
             _file_slot_descs.emplace_back(it->second);
-            _file_col_names.push_back(it->second->col_name());
         }
 
         _column_descs.push_back(col_desc);
diff --git a/be/src/exec/scan/file_scanner.h b/be/src/exec/scan/file_scanner.h
index fbcbca464a5546..3675fd2449711e 100644
--- a/be/src/exec/scan/file_scanner.h
+++ b/be/src/exec/scan/file_scanner.h
@@ -135,8 +135,6 @@ class FileScanner : public Scanner {
     bool _cur_reader_eof = false;
     // File source slot descriptors
     std::vector<SlotDescriptor*> _file_slot_descs;
-    // col names from _file_slot_descs
-    std::vector<std::string> _file_col_names;
     // Unified column descriptors for init_reader (includes file, partition, missing, synthesized cols)
     std::vector<ColumnDescriptor> _column_descs;
 
@@ -149,6 +147,7 @@ class FileScanner : public Scanner {
     // dest slot name to index in _dest_vexpr_ctx;
     std::unordered_map<std::string, int> _dest_slot_name_to_idx;
     // col name to default value expr
+    // TODO: only used by json reader. Could we delete this?
     std::unordered_map<std::string, VExprContextSPtr> _col_default_value_ctx;
     // the map values of dest slot id to src slot desc
     // if there is not key of dest slot id in dest_sid_to_src_sid_without_trans, it will be set to nullptr
@@ -195,7 +194,6 @@ class FileScanner : public Scanner {
     std::shared_ptr<io::IOContext> _io_ctx;
 
     // Whether to fill partition columns from path, default is true.
-    bool _fill_partition_from_path = true;
     std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
             _partition_col_descs;
     std::unordered_map<std::string, bool> _partition_value_is_null;
diff --git a/be/src/exec/scan/file_scanner_v2.cpp b/be/src/exec/scan/file_scanner_v2.cpp
new file mode 100644
index 00000000000000..92398fe0bf5d88
--- /dev/null
+++ b/be/src/exec/scan/file_scanner_v2.cpp
@@ -0,0 +1,715 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/scan/file_scanner_v2.h"
+
+#include <gen_cpp/Exprs_types.h>
+#include <gen_cpp/PlanNodes_types.h>
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/config.h"
+#include "common/consts.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/column/column.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "core/string_ref.h"
+#include "exec/common/util.hpp"
+#include "exec/operator/scan_operator.h"
+#include "exec/scan/access_path_parser.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vslot_ref.h"
+#include "format/format_common.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/jni/iceberg_sys_table_reader.h"
+#include "format_v2/jni/jdbc_reader.h"
+#include "format_v2/jni/max_compute_jni_reader.h"
+#include "format_v2/jni/trino_connector_jni_reader.h"
+#include "format_v2/table/hive_reader.h"
+#include "format_v2/table/hudi_reader.h"
+#include "format_v2/table/iceberg_reader.h"
+#include "format_v2/table/paimon_reader.h"
+#include "format_v2/table_reader.h"
+#include "io/fs/file_meta_cache.h"
+#include "io/io_common.h"
+#include "runtime/descriptors.h"
+#include "runtime/exec_env.h"
+#include "runtime/runtime_state.h"
+#include "service/backend_options.h"
+#include "storage/id_manager.h"
+
+namespace doris {
+namespace {
+
+std::string table_format_name(const TFileRangeDesc& range) {
+    return range.__isset.table_format_params ? range.table_format_params.table_format_type
+                                             : "NotSet";
+}
+
+TFileFormatType::type get_range_format_type(const TFileScanRangeParams& params,
+                                            const TFileRangeDesc& range) {
+    return range.__isset.format_type ? range.format_type : params.format_type;
+}
+
+bool is_supported_table_format(const TFileRangeDesc& range) {
+    const auto table_format = table_format_name(range);
+    if (table_format == "hudi" && range.__isset.table_format_params &&
+        range.table_format_params.__isset.hudi_params &&
+        range.table_format_params.hudi_params.__isset.delta_logs &&
+        !range.table_format_params.hudi_params.delta_logs.empty()) {
+        // Hudi MOR splits need log-file merge semantics and must stay on the existing JNI path.
+        // FileScannerV2 currently supports native Parquet data files only.
+        return false;
+    }
+    return table_format == "NotSet" || table_format == "tvf" || table_format == "hive" ||
+           table_format == "iceberg" || table_format == "paimon" || table_format == "hudi";
+}
+
+bool is_supported_jni_table_format(const TFileRangeDesc& range) {
+    const auto table_format = table_format_name(range);
+    if (table_format == "paimon") {
+        return range.__isset.table_format_params &&
+               range.table_format_params.__isset.paimon_params &&
+               range.table_format_params.paimon_params.__isset.reader_type &&
+               range.table_format_params.paimon_params.reader_type == TPaimonReaderType::PAIMON_JNI;
+    }
+    return table_format == "jdbc" || table_format == "iceberg" || table_format == "hudi" ||
+           table_format == "max_compute" || table_format == "trino_connector";
+}
+
+bool is_csv_format(TFileFormatType::type format_type) {
+    switch (format_type) {
+    case TFileFormatType::FORMAT_CSV_PLAIN:
+    case TFileFormatType::FORMAT_CSV_GZ:
+    case TFileFormatType::FORMAT_CSV_BZ2:
+    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+    case TFileFormatType::FORMAT_CSV_LZOP:
+    case TFileFormatType::FORMAT_CSV_DEFLATE:
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+    case TFileFormatType::FORMAT_PROTO:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool is_text_format(TFileFormatType::type format_type) {
+    return format_type == TFileFormatType::FORMAT_TEXT;
+}
+
+bool is_json_format(TFileFormatType::type format_type) {
+    return format_type == TFileFormatType::FORMAT_JSON;
+}
+
+bool is_partition_slot(const TFileScanSlotInfo& slot_info, const std::string& column_name) {
+    if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) ||
+        column_name == BeConsts::ICEBERG_ROWID_COL) {
+        return false;
+    }
+    return slot_info.__isset.category ? slot_info.category == TColumnCategory::PARTITION_KEY
+                                      : !slot_info.is_file_slot;
+}
+
+bool is_data_file_slot(const TFileScanSlotInfo& slot_info, const std::string& column_name) {
+    if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) ||
+        column_name == BeConsts::ICEBERG_ROWID_COL) {
+        return false;
+    }
+    // CSV and other non-self-describing formats need FE slot descriptors for only the columns that
+    // are physically read from the file. Partition/default/virtual columns stay in TableReader's
+    // mapping layer and are materialized after the file-local block is read. New FE provides an
+    // explicit category; old FE falls back to `is_file_slot`.
+    if (slot_info.__isset.category) {
+        return slot_info.category == TColumnCategory::REGULAR ||
+               slot_info.category == TColumnCategory::GENERATED;
+    }
+    return slot_info.is_file_slot;
+}
+
+Status rewrite_slot_refs_to_global_index(
+        VExprSPtr* expr,
+        const std::unordered_map<int32_t, format::GlobalIndex>& slot_id_to_global_index) {
+    DORIS_CHECK(expr != nullptr);
+    if (*expr == nullptr) {
+        return Status::OK();
+    }
+    if ((*expr)->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(expr->get());
+        const auto global_index_it = slot_id_to_global_index.find(slot_ref->slot_id());
+        if (global_index_it == slot_id_to_global_index.end()) {
+            DORIS_CHECK(slot_ref->slot_id() >= 0);
+            const auto global_index = format::GlobalIndex(cast_set<size_t>(slot_ref->slot_id()));
+            *expr = VSlotRef::create_shared(cast_set<int>(global_index.value()),
+                                            cast_set<int>(global_index.value()), -1,
+                                            slot_ref->data_type(), slot_ref->column_name());
+            RETURN_IF_ERROR(expr->get()->prepare(nullptr, RowDescriptor(), nullptr));
+            return Status::OK();
+        }
+        const auto global_index = global_index_it->second;
+        *expr = VSlotRef::create_shared(cast_set<int>(global_index.value()),
+                                        cast_set<int>(global_index.value()), -1,
+                                        slot_ref->data_type(), slot_ref->column_name());
+        RETURN_IF_ERROR(expr->get()->prepare(nullptr, RowDescriptor(), nullptr));
+        return Status::OK();
+    }
+    auto children = (*expr)->children();
+    for (auto& child : children) {
+        if (child == nullptr) {
+            continue;
+        }
+        RETURN_IF_ERROR(rewrite_slot_refs_to_global_index(&child, slot_id_to_global_index));
+    }
+    (*expr)->set_children(std::move(children));
+    return Status::OK();
+}
+
+} // namespace
+
+#ifdef BE_TEST
+Status FileScannerV2::TEST_to_file_format(TFileFormatType::type format_type,
+                                          format::FileFormat* file_format) {
+    return _to_file_format(format_type, file_format);
+}
+
+bool FileScannerV2::TEST_is_partition_slot(const TFileScanSlotInfo& slot_info,
+                                           const std::string& column_name) {
+    return is_partition_slot(slot_info, column_name);
+}
+
+bool FileScannerV2::TEST_is_data_file_slot(const TFileScanSlotInfo& slot_info,
+                                           const std::string& column_name) {
+    return is_data_file_slot(slot_info, column_name);
+}
+
+Status FileScannerV2::TEST_rewrite_slot_refs_to_global_index(
+        VExprSPtr* expr,
+        const std::unordered_map<int32_t, format::GlobalIndex>& slot_id_to_global_index) {
+    return rewrite_slot_refs_to_global_index(expr, slot_id_to_global_index);
+}
+#endif
+
+bool FileScannerV2::is_supported(const TFileScanRangeParams& params, const TFileRangeDesc& range) {
+    const auto format_type = get_range_format_type(params, range);
+    if (format_type == TFileFormatType::FORMAT_PARQUET) {
+        return is_supported_table_format(range);
+    } else if (format_type == TFileFormatType::FORMAT_JNI) {
+        return is_supported_jni_table_format(range);
+    } else if (is_csv_format(format_type) || is_text_format(format_type) ||
+               is_json_format(format_type)) {
+        return is_supported_table_format(range);
+    } else {
+        LOG(WARNING) << "Unsupported file format type " << format_type << " for file scanner v2";
+        return false;
+    }
+}
+
+FileScannerV2::FileScannerV2(RuntimeState* state, FileScanLocalState* local_state, int64_t limit,
+                             std::shared_ptr<SplitSourceConnector> split_source,
+                             RuntimeProfile* profile, ShardedKVCache* kv_cache,
+                             const std::unordered_map<std::string, int>* colname_to_slot_id)
+        : Scanner(state, local_state, limit, profile),
+          _split_source(std::move(split_source)),
+          _kv_cache(kv_cache) {
+    (void)colname_to_slot_id;
+    if (state->get_query_ctx() != nullptr &&
+        state->get_query_ctx()->file_scan_range_params_map.count(local_state->parent_id()) > 0) {
+        _params = &(state->get_query_ctx()->file_scan_range_params_map[local_state->parent_id()]);
+    } else {
+        _params = _split_source->get_params();
+    }
+}
+
+Status FileScannerV2::init(RuntimeState* state, const VExprContextSPtrs& conjuncts) {
+    RETURN_IF_ERROR(Scanner::init(state, conjuncts));
+    _get_block_timer =
+            ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileScannerV2GetBlockTime", 1);
+    _file_counter =
+            ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), "FileNumber", TUnit::UNIT, 1);
+    _file_read_bytes_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(),
+                                                      "FileReadBytes", TUnit::BYTES, 1);
+    _file_read_calls_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(),
+                                                      "FileReadCalls", TUnit::UNIT, 1);
+    _file_read_time_counter =
+            ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileReadTime", 1);
+    _file_cache_statistics = std::make_unique<io::FileCacheStatistics>();
+    _file_reader_stats = std::make_unique<io::FileReaderStats>();
+    RETURN_IF_ERROR(_init_io_ctx());
+    _io_ctx->file_cache_stats = _file_cache_statistics.get();
+    _io_ctx->file_reader_stats = _file_reader_stats.get();
+    _io_ctx->is_disposable = _state->query_options().disable_file_cache;
+    return Status::OK();
+}
+
+Status FileScannerV2::_open_impl(RuntimeState* state) {
+    RETURN_IF_CANCELLED(state);
+    RETURN_IF_ERROR(Scanner::_open_impl(state));
+    RETURN_IF_ERROR(_split_source->get_next(&_first_scan_range, &_current_range));
+    if (_first_scan_range) {
+        RETURN_IF_ERROR(_create_table_reader_for_format(_current_range, &_table_reader));
+        DORIS_CHECK(_table_reader != nullptr);
+        RETURN_IF_ERROR(_init_expr_ctxes());
+        RETURN_IF_ERROR(_init_table_reader(_current_range));
+    }
+    return Status::OK();
+}
+
+Status FileScannerV2::_get_block_impl(RuntimeState* state, Block* block, bool* eof) {
+    while (true) {
+        RETURN_IF_CANCELLED(state);
+        if (!_has_prepared_split) {
+            RETURN_IF_ERROR(_prepare_next_split(eof));
+            if (*eof) {
+                return Status::OK();
+            }
+        }
+
+        {
+            SCOPED_TIMER(_get_block_timer);
+            RETURN_IF_ERROR(_table_reader->get_block(block, eof));
+        }
+        if (*eof) {
+            _state->update_num_finished_scan_range(1);
+            _has_prepared_split = false;
+            *eof = false;
+            continue;
+        }
+        return Status::OK();
+    }
+}
+
+Status FileScannerV2::_prepare_next_split(bool* eos) {
+    bool has_next = _first_scan_range;
+    if (!_first_scan_range) {
+        RETURN_IF_ERROR(_split_source->get_next(&has_next, &_current_range));
+    }
+    _first_scan_range = false;
+    if (!has_next || _should_stop) {
+        *eos = true;
+        return Status::OK();
+    }
+    DORIS_CHECK(_table_reader != nullptr);
+    _current_range_path = _current_range.path;
+    RETURN_IF_ERROR(_prepare_table_reader_split(_current_range));
+    COUNTER_UPDATE(_file_counter, 1);
+    _has_prepared_split = true;
+    *eos = false;
+    return Status::OK();
+}
+
+Status FileScannerV2::_init_table_reader(const TFileRangeDesc& range) {
+    const auto format_type = get_range_format_type(*_params, range);
+    format::FileFormat file_format;
+    RETURN_IF_ERROR(_to_file_format(format_type, &file_format));
+    DORIS_CHECK(_table_reader != nullptr);
+
+    format::TableColumnPredicates table_column_predicates;
+    RETURN_IF_ERROR(_build_table_column_predicates(&table_column_predicates));
+    VExprContextSPtrs table_conjuncts;
+    RETURN_IF_ERROR(_build_table_conjuncts(&table_conjuncts));
+    RETURN_IF_ERROR(_table_reader->init({
+            .projected_columns = _projected_columns,
+            .column_predicates = std::move(table_column_predicates),
+            .conjuncts = std::move(table_conjuncts),
+            .format = file_format,
+            .scan_params = const_cast<TFileScanRangeParams*>(_params),
+            .io_ctx = _io_ctx,
+            .runtime_state = _state,
+            .scanner_profile = _local_state->scanner_profile(),
+            .file_slot_descs = &_file_slot_descs,
+            .push_down_agg_type = _local_state->get_push_down_agg_type(),
+            .condition_cache_digest = _local_state->get_condition_cache_digest(),
+    }));
+    return Status::OK();
+}
+
+Status FileScannerV2::_create_table_reader_for_format(
+        const TFileRangeDesc& range, std::unique_ptr<format::TableReader>* reader) const {
+    DORIS_CHECK(reader != nullptr);
+    const auto table_format = table_format_name(range);
+    if (table_format == "NotSet" || table_format == "tvf") {
+        *reader = std::make_unique<format::TableReader>();
+    } else if (table_format == "hive") {
+        *reader = format::hive::HiveReader::create_unique();
+    } else if (table_format == "iceberg") {
+        if (get_range_format_type(*_params, range) == TFileFormatType::FORMAT_JNI) {
+            *reader = std::make_unique<format::iceberg::IcebergSysTableJniReader>();
+        } else {
+            *reader = std::make_unique<format::iceberg::IcebergTableReader>();
+        }
+    } else if (table_format == "paimon") {
+        *reader = std::make_unique<format::paimon::PaimonHybridReader>();
+    } else if (table_format == "hudi") {
+        *reader = std::make_unique<format::hudi::HudiHybridReader>();
+    } else if (table_format == "jdbc") {
+        *reader = std::make_unique<format::jdbc::JdbcJniReader>();
+    } else if (table_format == "max_compute") {
+        const auto* mc_desc =
+                static_cast<const MaxComputeTableDescriptor*>(_output_tuple_desc->table_desc());
+        RETURN_IF_ERROR(mc_desc->init_status());
+        *reader = std::make_unique<format::max_compute::MaxComputeJniReader>(mc_desc);
+    } else if (table_format == "trino_connector") {
+        *reader = std::make_unique<format::trino_connector::TrinoConnectorJniReader>();
+    } else {
+        return Status::NotSupported("FileScannerV2 does not support table format {}", table_format);
+    }
+    return Status::OK();
+}
+
+Status FileScannerV2::_prepare_table_reader_split(const TFileRangeDesc& range) {
+    std::map<std::string, Field> partition_values;
+    RETURN_IF_ERROR(_generate_partition_values(range, &partition_values));
+    RETURN_IF_ERROR(_table_reader->prepare_split({
+            .partition_values = std::move(partition_values),
+            .cache = _kv_cache,
+            .current_range = range,
+            .global_rowid_context = _create_global_rowid_context(range),
+    }));
+    return Status::OK();
+}
+
+bool FileScannerV2::_should_enable_file_meta_cache() const {
+    return ExecEnv::GetInstance()->file_meta_cache()->enabled() &&
+           _split_source->num_scan_ranges() < config::max_external_file_meta_cache_num / 3;
+}
+
+std::optional<format::GlobalRowIdContext> FileScannerV2::_create_global_rowid_context(
+        const TFileRangeDesc& range) const {
+    if (!_need_global_rowid_column) {
+        return std::nullopt;
+    }
+    auto& id_file_map = _state->get_id_file_map();
+    DORIS_CHECK(id_file_map != nullptr);
+    const auto file_id = id_file_map->get_file_mapping_id(
+            std::make_shared<FileMapping>(_local_state->cast<FileScanLocalState>().parent_id(),
+                                          range, _should_enable_file_meta_cache()));
+    return format::GlobalRowIdContext {
+            .version = IdManager::ID_VERSION,
+            .backend_id = BackendOptions::get_backend_id(),
+            .file_id = file_id,
+    };
+}
+
+Status FileScannerV2::_generate_partition_values(
+        const TFileRangeDesc& range, std::map<std::string, Field>* partition_values) const {
+    DORIS_CHECK(partition_values != nullptr);
+    partition_values->clear();
+    if (!range.__isset.columns_from_path_keys || !range.__isset.columns_from_path) {
+        return Status::OK();
+    }
+    DORIS_CHECK(range.columns_from_path_keys.size() == range.columns_from_path.size());
+    for (size_t idx = 0; idx < range.columns_from_path_keys.size(); ++idx) {
+        const auto& key = range.columns_from_path_keys[idx];
+        const auto it = _partition_slot_descs.find(key);
+        if (it == _partition_slot_descs.end()) {
+            continue;
+        }
+        const auto& value = range.columns_from_path[idx];
+        const bool is_null = range.__isset.columns_from_path_is_null &&
+                             idx < range.columns_from_path_is_null.size() &&
+                             range.columns_from_path_is_null[idx];
+        Field field;
+        DORIS_CHECK(it->second.slot_desc != nullptr);
+        RETURN_IF_ERROR(_parse_partition_value(it->second.slot_desc, value, is_null, &field));
+        partition_values->emplace(it->second.canonical_name, std::move(field));
+    }
+    return Status::OK();
+}
+
+Status FileScannerV2::_parse_partition_value(const SlotDescriptor* slot_desc,
+                                             const std::string& value, bool is_null,
+                                             Field* field) const {
+    DORIS_CHECK(slot_desc != nullptr);
+    DORIS_CHECK(field != nullptr);
+    if (is_null) {
+        *field = Field::create_field<TYPE_NULL>(Null());
+        return Status::OK();
+    }
+    const auto data_type = remove_nullable(slot_desc->get_data_type_ptr());
+    auto column = data_type->create_column();
+    auto serde = data_type->get_serde();
+    DataTypeSerDe::FormatOptions options;
+    options.converted_from_string = true;
+    StringRef ref(value.data(), value.size());
+    RETURN_IF_ERROR(serde->from_string(ref, *column, options));
+    DORIS_CHECK(column->size() == 1);
+    *field = (*column)[0];
+    return Status::OK();
+}
+
+Status FileScannerV2::_init_expr_ctxes() {
+    _slot_id_to_desc.clear();
+    _slot_id_to_global_index.clear();
+    _partition_slot_descs.clear();
+    _file_slot_descs.clear();
+    for (const auto* slot_desc : _output_tuple_desc->slots()) {
+        _slot_id_to_desc.emplace(slot_desc->id(), slot_desc);
+    }
+    DORIS_CHECK(_table_reader != nullptr);
+    RETURN_IF_ERROR(_build_projected_columns(*_table_reader));
+    return Status::OK();
+}
+
+Status FileScannerV2::_build_projected_columns(const format::TableReader& table_reader) {
+    _projected_columns.clear();
+    _projected_columns.reserve(_params->required_slots.size());
+    _need_global_rowid_column = false;
+    format::ProjectedColumnBuildContext build_context {
+            .scan_params = _params,
+            .range = &_current_range,
+            .runtime_state = _state,
+    };
+
+    for (size_t slot_idx = 0; slot_idx < _params->required_slots.size(); ++slot_idx) {
+        const auto& slot_info = _params->required_slots[slot_idx];
+        const auto it = _slot_id_to_desc.find(slot_info.slot_id);
+        if (it == _slot_id_to_desc.end()) {
+            return Status::InternalError("Unknown source slot descriptor, slot_id={}",
+                                         slot_info.slot_id);
+        }
+        auto column = _build_table_column(it->second);
+        if (column.name.starts_with(BeConsts::GLOBAL_ROWID_COL)) {
+            _need_global_rowid_column = true;
+        }
+        RETURN_IF_ERROR(_build_default_expr(slot_info, &column.default_expr));
+        build_context.schema_column.reset();
+        RETURN_IF_ERROR(table_reader.annotate_projected_column(slot_info, &build_context, &column));
+        // Build nested children from access paths generated by the slot's access-path
+        // expressions. A projected column can therefore contain only a subset of the schema
+        // column's nested children.
+        RETURN_IF_ERROR(AccessPathParser::build_nested_children(
+                &column, it->second,
+                build_context.schema_column.has_value() ? &*build_context.schema_column : nullptr));
+        if (is_partition_slot(slot_info, column.name)) {
+            column.is_partition_key = true;
+            _partition_slot_descs.emplace(
+                    column.name,
+                    PartitionSlotInfo {.slot_desc = it->second, .canonical_name = column.name});
+            for (const auto& alias : column.name_mapping) {
+                _partition_slot_descs.emplace(
+                        alias,
+                        PartitionSlotInfo {.slot_desc = it->second, .canonical_name = column.name});
+            }
+        } else if (is_data_file_slot(slot_info, column.name)) {
+            _file_slot_descs.push_back(const_cast<SlotDescriptor*>(it->second));
+        }
+        const auto global_index = format::GlobalIndex(slot_idx);
+        _slot_id_to_global_index.emplace(slot_info.slot_id, global_index);
+        _projected_columns.push_back(std::move(column));
+    }
+    RETURN_IF_ERROR(table_reader.validate_projected_columns(build_context));
+    return Status::OK();
+}
+
+Status FileScannerV2::_build_default_expr(const TFileScanSlotInfo& slot_info,
+                                          VExprContextSPtr* ctx) const {
+    DORIS_CHECK(ctx != nullptr);
+    if (slot_info.__isset.default_value_expr && !slot_info.default_value_expr.nodes.empty()) {
+        return VExpr::create_expr_tree(slot_info.default_value_expr, *ctx);
+    }
+
+    if (_params->__isset.default_value_of_src_slot) {
+        const auto it = _params->default_value_of_src_slot.find(slot_info.slot_id);
+        if (it != _params->default_value_of_src_slot.end() && !it->second.nodes.empty()) {
+            return VExpr::create_expr_tree(it->second, *ctx);
+        }
+    }
+    return Status::OK();
+}
+
+format::ColumnDefinition FileScannerV2::_build_table_column(const SlotDescriptor* slot_desc) {
+    DORIS_CHECK(slot_desc != nullptr);
+    format::ColumnDefinition column;
+    // TODO(gabriel): why always BY_NAME here?
+    column.identifier = Field::create_field<TYPE_STRING>(slot_desc->col_name());
+    column.name = slot_desc->col_name();
+    column.type = slot_desc->get_data_type_ptr();
+    return column;
+}
+
+Status FileScannerV2::_build_table_column_predicates(
+        format::TableColumnPredicates* predicates) const {
+    DORIS_CHECK(predicates != nullptr);
+    predicates->clear();
+    const auto& slot_predicates = _local_state->cast<FileScanLocalState>()._slot_id_to_predicates;
+    for (const auto& [slot_id, slot_predicate_list] : slot_predicates) {
+        const auto it = _slot_id_to_desc.find(slot_id);
+        if (it == _slot_id_to_desc.end()) {
+            continue;
+        }
+        const auto global_index_it = _slot_id_to_global_index.find(slot_id);
+        if (global_index_it == _slot_id_to_global_index.end()) {
+            continue;
+        }
+        (*predicates)[global_index_it->second] = slot_predicate_list;
+    }
+    return Status::OK();
+}
+
+Status FileScannerV2::_build_table_conjuncts(VExprContextSPtrs* conjuncts) const {
+    DORIS_CHECK(conjuncts != nullptr);
+    conjuncts->clear();
+    conjuncts->reserve(_conjuncts.size());
+    for (const auto& conjunct : _conjuncts) {
+        VExprSPtr root;
+        RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root));
+        RETURN_IF_ERROR(rewrite_slot_refs_to_global_index(&root, _slot_id_to_global_index));
+        conjuncts->push_back(VExprContext::create_shared(std::move(root)));
+    }
+    return Status::OK();
+}
+
+TFileFormatType::type FileScannerV2::_get_current_format_type() const {
+    return get_range_format_type(*_params, _current_range);
+}
+
+Status FileScannerV2::_to_file_format(TFileFormatType::type format_type,
+                                      format::FileFormat* file_format) {
+    DORIS_CHECK(file_format != nullptr);
+    switch (format_type) {
+    case TFileFormatType::FORMAT_PARQUET:
+        *file_format = format::FileFormat::PARQUET;
+        return Status::OK();
+    case TFileFormatType::FORMAT_JNI:
+        *file_format = format::FileFormat::JNI;
+        return Status::OK();
+    case TFileFormatType::FORMAT_CSV_PLAIN:
+    case TFileFormatType::FORMAT_CSV_GZ:
+    case TFileFormatType::FORMAT_CSV_BZ2:
+    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+    case TFileFormatType::FORMAT_CSV_LZOP:
+    case TFileFormatType::FORMAT_CSV_DEFLATE:
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+    case TFileFormatType::FORMAT_PROTO:
+        *file_format = format::FileFormat::CSV;
+        return Status::OK();
+    case TFileFormatType::FORMAT_TEXT:
+        *file_format = format::FileFormat::TEXT;
+        return Status::OK();
+    case TFileFormatType::FORMAT_JSON:
+        *file_format = format::FileFormat::JSON;
+        return Status::OK();
+    default:
+        return Status::NotSupported("FileScannerV2 does not support file format {}",
+                                    to_string(format_type));
+    }
+}
+
+Status FileScannerV2::_init_io_ctx() {
+    _io_ctx = std::make_shared<io::IOContext>();
+    _io_ctx->query_id = &_state->query_id();
+    return Status::OK();
+}
+
+Status FileScannerV2::close(RuntimeState* state) {
+    if (!_try_close()) {
+        return Status::OK();
+    }
+    if (_table_reader != nullptr) {
+        RETURN_IF_ERROR(_table_reader->close());
+        _report_condition_cache_profile();
+        _table_reader.reset();
+    }
+    return Scanner::close(state);
+}
+
+void FileScannerV2::try_stop() {
+    Scanner::try_stop();
+    if (_io_ctx) {
+        _io_ctx->should_stop = true;
+    }
+}
+
+void FileScannerV2::update_realtime_counters() {
+    if (_file_reader_stats == nullptr) {
+        return;
+    }
+    const int64_t bytes_read = _file_reader_stats->read_bytes;
+    COUNTER_SET(_file_read_bytes_counter, bytes_read);
+    COUNTER_SET(_file_read_calls_counter, cast_set<int64_t>(_file_reader_stats->read_calls));
+    COUNTER_SET(_file_read_time_counter, cast_set<int64_t>(_file_reader_stats->read_time_ns));
+}
+
+void FileScannerV2::_collect_profile_before_close() {
+    _report_file_reader_predicate_filtered_rows();
+    Scanner::_collect_profile_before_close();
+    if (_file_reader_stats != nullptr) {
+        COUNTER_SET(_file_read_bytes_counter, cast_set<int64_t>(_file_reader_stats->read_bytes));
+        COUNTER_SET(_file_read_calls_counter, cast_set<int64_t>(_file_reader_stats->read_calls));
+        COUNTER_SET(_file_read_time_counter, cast_set<int64_t>(_file_reader_stats->read_time_ns));
+    }
+    // Query profiles can be collected before Scanner::close() runs. Publish condition-cache
+    // counters here as well, using deltas so this method and close() cannot double count.
+    _report_condition_cache_profile();
+}
+
+bool FileScannerV2::_should_update_load_counters() const {
+    if (_is_load) {
+        return true;
+    }
+    // TVF based loads (e.g. http_stream, group commit relay) plan the load source as a
+    // tvf query scan without src tuple desc, so _is_load is false. But rows filtered by
+    // the load's WHERE clause still need to be reported as unselected rows. FILE_STREAM
+    // is only reachable from such load entries, never from normal queries, so use it to
+    // identify these scanners.
+    return (_params != nullptr && _params->__isset.file_type &&
+            _params->file_type == TFileType::FILE_STREAM) ||
+           (_current_range.__isset.file_type && _current_range.file_type == TFileType::FILE_STREAM);
+}
+
+void FileScannerV2::_report_file_reader_predicate_filtered_rows() {
+    const int64_t filtered_rows = _io_ctx != nullptr ? _io_ctx->predicate_filtered_rows : 0;
+    const int64_t filtered_delta = filtered_rows - _reported_predicate_filtered_rows;
+    if (filtered_delta > 0) {
+        // File readers can evaluate localized conjuncts before a block reaches Scanner. Count
+        // those rows as scanner-level unselected rows so load statistics stay identical no matter
+        // whether a predicate is pushed down or evaluated by Scanner::_filter_output_block().
+        _counter.num_rows_unselected += filtered_delta;
+        _reported_predicate_filtered_rows = filtered_rows;
+    }
+}
+
+void FileScannerV2::_report_condition_cache_profile() {
+    auto* local_state = static_cast<FileScanLocalState*>(_local_state);
+    const int64_t hit_count =
+            _table_reader != nullptr ? _table_reader->condition_cache_hit_count() : 0;
+    const int64_t hit_delta = hit_count - _reported_condition_cache_hit_count;
+    if (hit_delta > 0) {
+        COUNTER_UPDATE(local_state->_condition_cache_hit_counter, hit_delta);
+        _reported_condition_cache_hit_count = hit_count;
+    }
+    const int64_t filtered_rows = _io_ctx != nullptr ? _io_ctx->condition_cache_filtered_rows : 0;
+    const int64_t filtered_delta = filtered_rows - _reported_condition_cache_filtered_rows;
+    if (filtered_delta > 0) {
+        COUNTER_UPDATE(local_state->_condition_cache_filtered_rows_counter, filtered_delta);
+        _reported_condition_cache_filtered_rows = filtered_rows;
+    }
+}
+
+} // namespace doris
diff --git a/be/src/exec/scan/file_scanner_v2.h b/be/src/exec/scan/file_scanner_v2.h
new file mode 100644
index 00000000000000..7140842b12ff3d
--- /dev/null
+++ b/be/src/exec/scan/file_scanner_v2.h
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "common/factory_creator.h"
+#include "common/status.h"
+#include "core/block/block.h"
+#include "exec/operator/file_scan_operator.h"
+#include "exec/scan/scanner.h"
+#include "exec/scan/split_source_connector.h"
+#include "exprs/vexpr_fwd.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Descriptors_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+
+namespace doris {
+
+class RuntimeState;
+class SlotDescriptor;
+class TFileRangeDesc;
+class TFileScanRangeParams;
+class ShardedKVCache;
+
+class FileScannerV2 final : public Scanner {
+    ENABLE_FACTORY_CREATOR(FileScannerV2);
+
+public:
+    static constexpr const char* NAME = "FileScannerV2";
+
+    static bool is_supported(const TFileScanRangeParams& params, const TFileRangeDesc& range);
+#ifdef BE_TEST
+    static Status TEST_to_file_format(TFileFormatType::type format_type,
+                                      format::FileFormat* file_format);
+    static bool TEST_is_partition_slot(const TFileScanSlotInfo& slot_info,
+                                       const std::string& column_name);
+    static bool TEST_is_data_file_slot(const TFileScanSlotInfo& slot_info,
+                                       const std::string& column_name);
+    static Status TEST_rewrite_slot_refs_to_global_index(
+            VExprSPtr* expr,
+            const std::unordered_map<int32_t, format::GlobalIndex>& slot_id_to_global_index);
+#endif
+
+    FileScannerV2(RuntimeState* state, FileScanLocalState* parent, int64_t limit,
+                  std::shared_ptr<SplitSourceConnector> split_source, RuntimeProfile* profile,
+                  ShardedKVCache* kv_cache,
+                  const std::unordered_map<std::string, int>* colname_to_slot_id);
+
+    Status init(RuntimeState* state, const VExprContextSPtrs& conjuncts) override;
+    Status _open_impl(RuntimeState* state) override;
+    Status close(RuntimeState* state) override;
+    void try_stop() override;
+    std::string get_name() override { return FileScannerV2::NAME; }
+    std::string get_current_scan_range_name() override { return _current_range_path; }
+    void update_realtime_counters() override;
+
+protected:
+    Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override;
+    void _collect_profile_before_close() override;
+    bool _should_update_load_counters() const override;
+
+private:
+    TFileFormatType::type _get_current_format_type() const;
+    Status _init_io_ctx();
+    Status _init_expr_ctxes();
+    Status _prepare_next_split(bool* eos);
+    Status _init_table_reader(const TFileRangeDesc& range);
+    Status _create_table_reader_for_format(const TFileRangeDesc& range,
+                                           std::unique_ptr<format::TableReader>* reader) const;
+    Status _prepare_table_reader_split(const TFileRangeDesc& range);
+    bool _should_enable_file_meta_cache() const;
+    std::optional<format::GlobalRowIdContext> _create_global_rowid_context(
+            const TFileRangeDesc& range) const;
+    Status _generate_partition_values(const TFileRangeDesc& range,
+                                      std::map<std::string, Field>* partition_values) const;
+    Status _parse_partition_value(const SlotDescriptor* slot_desc, const std::string& value,
+                                  bool is_null, Field* field) const;
+    Status _build_projected_columns(const format::TableReader& table_reader);
+    Status _build_default_expr(const TFileScanSlotInfo& slot_info, VExprContextSPtr* ctx) const;
+    static format::ColumnDefinition _build_table_column(const SlotDescriptor* slot_desc);
+    Status _build_table_column_predicates(format::TableColumnPredicates* predicates) const;
+    Status _build_table_conjuncts(VExprContextSPtrs* conjuncts) const;
+    static Status _to_file_format(TFileFormatType::type format_type,
+                                  format::FileFormat* file_format);
+    void _report_file_reader_predicate_filtered_rows();
+    void _report_condition_cache_profile();
+
+    struct PartitionSlotInfo {
+        const SlotDescriptor* slot_desc = nullptr;
+        std::string canonical_name;
+    };
+
+    const TFileScanRangeParams* _params = nullptr;
+    std::shared_ptr<SplitSourceConnector> _split_source;
+    bool _first_scan_range = false;
+    bool _has_prepared_split = false;
+    TFileRangeDesc _current_range;
+    std::string _current_range_path;
+
+    std::unique_ptr<format::TableReader> _table_reader;
+    std::vector<format::ColumnDefinition> _projected_columns;
+    // File formats without embedded schema, such as CSV, still need the FE slot descriptors in
+    // file-column order. This mirrors old FileScanner::_file_slot_descs and is passed only to
+    // readers that cannot derive their schema from file metadata.
+    std::vector<SlotDescriptor*> _file_slot_descs;
+    bool _need_global_rowid_column = false;
+    std::unordered_map<int32_t, const SlotDescriptor*> _slot_id_to_desc;
+    std::unordered_map<int32_t, format::GlobalIndex> _slot_id_to_global_index;
+    std::unordered_map<std::string, PartitionSlotInfo> _partition_slot_descs;
+
+    std::unique_ptr<io::FileCacheStatistics> _file_cache_statistics;
+    std::unique_ptr<io::FileReaderStats> _file_reader_stats;
+    std::shared_ptr<io::IOContext> _io_ctx;
+    ShardedKVCache* _kv_cache = nullptr;
+
+    RuntimeProfile::Counter* _get_block_timer = nullptr;
+    RuntimeProfile::Counter* _file_counter = nullptr;
+    RuntimeProfile::Counter* _file_read_bytes_counter = nullptr;
+    RuntimeProfile::Counter* _file_read_calls_counter = nullptr;
+    RuntimeProfile::Counter* _file_read_time_counter = nullptr;
+    int64_t _reported_predicate_filtered_rows = 0;
+    int64_t _reported_condition_cache_hit_count = 0;
+    int64_t _reported_condition_cache_filtered_rows = 0;
+};
+
+} // namespace doris
diff --git a/be/src/exec/scan/split_source_connector.h b/be/src/exec/scan/split_source_connector.h
index 5926baff303cbf..320f6f90d0dd02 100644
--- a/be/src/exec/scan/split_source_connector.h
+++ b/be/src/exec/scan/split_source_connector.h
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include <functional>
+
 #include "common/config.h"
 #include "core/custom_allocator.h"
 #include "runtime/runtime_state.h"
@@ -45,6 +47,15 @@ class SplitSourceConnector {
 
     virtual TFileScanRangeParams* get_params() = 0;
 
+    virtual bool all_scan_ranges_match(
+            const TFileScanRangeParams& params,
+            const std::function<bool(const TFileScanRangeParams&, const TFileRangeDesc&)>&
+                    predicate) {
+        (void)params;
+        (void)predicate;
+        return false;
+    }
+
 protected:
     template <typename T, typename V1 = std::vector<T>, typename V2 = std::vector<T>>
         requires(std::is_same_v<std::remove_cvref_t<V1>,
@@ -125,6 +136,24 @@ class LocalSplitSourceConnector : public SplitSourceConnector {
         throw Exception(
                 Status::FatalError("Unreachable, params is got by file_scan_range_params_map"));
     }
+
+    bool all_scan_ranges_match(
+            const TFileScanRangeParams& params,
+            const std::function<bool(const TFileScanRangeParams&, const TFileRangeDesc&)>&
+                    predicate) override {
+        if (_scan_ranges.empty()) {
+            return false;
+        }
+        for (const auto& scan_range : _scan_ranges) {
+            const auto& file_scan_range = scan_range.scan_range.ext_scan_range.file_scan_range;
+            for (const auto& range : file_scan_range.ranges) {
+                if (!predicate(params, range)) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
 };
 
 /**
diff --git a/be/src/exec/sink/writer/vhive_partition_writer.cpp b/be/src/exec/sink/writer/vhive_partition_writer.cpp
index 686732b4712e06..f10922e908d01a 100644
--- a/be/src/exec/sink/writer/vhive_partition_writer.cpp
+++ b/be/src/exec/sink/writer/vhive_partition_writer.cpp
@@ -88,6 +88,8 @@ Status VHivePartitionWriter::open(RuntimeState* state, RuntimeProfile* operator_
                                          to_string(_hive_compress_type));
         }
         }
+        // TODO: INT96 is kept for Hive 2/3 compatibility. Add an explicit option before
+        // changing the default Hive parquet timestamp encoding to standard logical types.
         ParquetFileOptions parquet_options = {parquet_compression_type,
                                               TParquetVersion::PARQUET_1_0, false, true};
         _file_format_transformer = std::make_unique<VParquetTransformer>(
diff --git a/be/src/exprs/runtime_filter_expr.cpp b/be/src/exprs/runtime_filter_expr.cpp
index 8544c809206785..a4f47ac9257bef 100644
--- a/be/src/exprs/runtime_filter_expr.cpp
+++ b/be/src/exprs/runtime_filter_expr.cpp
@@ -63,6 +63,17 @@ RuntimeFilterExpr::RuntimeFilterExpr(const TExprNode& node, VExprSPtr impl, doub
           _filter_id(filter_id),
           _sampling_frequency(sampling_frequency) {}
 
+Status RuntimeFilterExpr::clone_node(VExprSPtr* cloned_expr) const {
+    DORIS_CHECK(cloned_expr != nullptr);
+    DORIS_CHECK(_impl != nullptr);
+    VExprSPtr cloned_impl;
+    RETURN_IF_ERROR(_impl->deep_clone(&cloned_impl));
+    *cloned_expr = RuntimeFilterExpr::create_shared(clone_texpr_node(), std::move(cloned_impl),
+                                                    _ignore_thredhold, _null_aware, _filter_id,
+                                                    _sampling_frequency);
+    return Status::OK();
+}
+
 Status RuntimeFilterExpr::prepare(RuntimeState* state, const RowDescriptor& desc,
                                   VExprContext* context) {
     RETURN_IF_ERROR_OR_PREPARED(_impl->prepare(state, desc, context));
@@ -87,7 +98,7 @@ void RuntimeFilterExpr::close(VExprContext* context, FunctionContext::FunctionSt
 Status RuntimeFilterExpr::execute_column_impl(VExprContext* context, const Block* block,
                                               const Selector* selector, size_t count,
                                               ColumnPtr& result_column) const {
-    return Status::InternalError("Not implement RuntimeFilterExpr::execute_column_impl");
+    return _impl->execute_column(context, block, selector, count, result_column);
 }
 
 const std::string& RuntimeFilterExpr::expr_name() const {
diff --git a/be/src/exprs/runtime_filter_expr.h b/be/src/exprs/runtime_filter_expr.h
index ceb4324d9d56f5..d9cea40781d1f9 100644
--- a/be/src/exprs/runtime_filter_expr.h
+++ b/be/src/exprs/runtime_filter_expr.h
@@ -23,6 +23,7 @@
 #include <cstdint>
 #include <memory>
 #include <string>
+#include <utility>
 
 #include "common/config.h"
 #include "common/status.h"
@@ -80,6 +81,8 @@ class RuntimeFilterExpr final : public VExpr {
     }
 
     VExprSPtr get_impl() const override { return _impl; }
+    void set_impl(VExprSPtr impl) { _impl = std::move(impl); }
+    Status clone_node(VExprSPtr* cloned_expr) const override;
 
     void attach_profile_counter(std::shared_ptr<RuntimeProfile::Counter> rf_input_rows,
                                 std::shared_ptr<RuntimeProfile::Counter> rf_filter_rows,
@@ -112,6 +115,9 @@ class RuntimeFilterExpr final : public VExpr {
     std::shared_ptr<RuntimeProfile::Counter> predicate_always_true_rows_counter() const {
         return _always_true_filter_rows;
     }
+    bool is_slot_ref() const override { return false; }
+    bool is_virtual_slot_ref() const override { return false; }
+    bool is_column_ref() const override { return false; }
 
 private:
     VExprSPtr _impl;
diff --git a/be/src/exprs/short_circuit_evaluation_expr.h b/be/src/exprs/short_circuit_evaluation_expr.h
index 47a37b360c6e90..7240207aacad71 100644
--- a/be/src/exprs/short_circuit_evaluation_expr.h
+++ b/be/src/exprs/short_circuit_evaluation_expr.h
@@ -63,6 +63,13 @@ class ShortCircuitIfExpr final : public ShortCircuitExpr {
     ~ShortCircuitIfExpr() override = default;
 
     const std::string& expr_name() const override { return IF_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        node.__set_short_circuit_evaluation(true);
+        *cloned_expr = ShortCircuitIfExpr::create_shared(node);
+        return Status::OK();
+    }
 
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
                                size_t count, ColumnPtr& result_column) const override;
@@ -76,6 +83,18 @@ class ShortCircuitCaseExpr final : public ShortCircuitExpr {
     ShortCircuitCaseExpr(const TExprNode& node);
     ~ShortCircuitCaseExpr() override = default;
     const std::string& expr_name() const override { return CASE_NAME; }
+    bool has_else_expr() const { return _has_else_expr; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        TCaseExpr case_node;
+        case_node.__set_has_case_expr(false);
+        case_node.__set_has_else_expr(_has_else_expr);
+        node.__set_case_expr(case_node);
+        node.__set_short_circuit_evaluation(true);
+        *cloned_expr = ShortCircuitCaseExpr::create_shared(node);
+        return Status::OK();
+    }
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
                                size_t count, ColumnPtr& result_column) const override;
 
@@ -91,6 +110,13 @@ class ShortCircuitIfNullExpr final : public ShortCircuitExpr {
     ~ShortCircuitIfNullExpr() override = default;
 
     const std::string& expr_name() const override { return IFNULL_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        node.__set_short_circuit_evaluation(true);
+        *cloned_expr = ShortCircuitIfNullExpr::create_shared(node);
+        return Status::OK();
+    }
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
                                size_t count, ColumnPtr& result_column) const override;
 
@@ -104,10 +130,17 @@ class ShortCircuitCoalesceExpr final : public ShortCircuitExpr {
     ShortCircuitCoalesceExpr(const TExprNode& node) : ShortCircuitExpr(node) {}
     ~ShortCircuitCoalesceExpr() override = default;
     const std::string& expr_name() const override { return COALESCE_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        node.__set_short_circuit_evaluation(true);
+        *cloned_expr = ShortCircuitCoalesceExpr::create_shared(node);
+        return Status::OK();
+    }
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
                                size_t count, ColumnPtr& result_column) const override;
 
 private:
     inline static const std::string COALESCE_NAME = "coalesce";
 };
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/exprs/vbloom_predicate.h b/be/src/exprs/vbloom_predicate.h
index f23bde0d9ad3fd..410bb5c8d370b3 100644
--- a/be/src/exprs/vbloom_predicate.h
+++ b/be/src/exprs/vbloom_predicate.h
@@ -59,6 +59,13 @@ class VBloomPredicate final : public VExpr {
     std::shared_ptr<BloomFilterFuncBase> get_bloom_filter_func() const override { return _filter; }
 
     uint64_t get_digest(uint64_t seed) const override;
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto cloned = VBloomPredicate::create_shared(clone_texpr_node());
+        cloned->set_filter(_filter);
+        *cloned_expr = std::move(cloned);
+        return Status::OK();
+    }
 
 private:
     Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter,
diff --git a/be/src/exprs/vcase_expr.h b/be/src/exprs/vcase_expr.h
index 97b2551091d100..6787283f0c5d23 100644
--- a/be/src/exprs/vcase_expr.h
+++ b/be/src/exprs/vcase_expr.h
@@ -59,6 +59,17 @@ class VCaseExpr final : public VExpr {
     void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
     const std::string& expr_name() const override;
     std::string debug_string() const override;
+    bool has_else_expr() const { return _has_else_expr; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        TCaseExpr case_node;
+        case_node.__set_has_case_expr(false);
+        case_node.__set_has_else_expr(_has_else_expr);
+        node.__set_case_expr(case_node);
+        *cloned_expr = VCaseExpr::create_shared(node);
+        return Status::OK();
+    }
 
 private:
     template <typename IndexType, typename ColumnType>
diff --git a/be/src/exprs/vcast_expr.h b/be/src/exprs/vcast_expr.h
index c3f2526794b3b8..f0f3ead95d56af 100644
--- a/be/src/exprs/vcast_expr.h
+++ b/be/src/exprs/vcast_expr.h
@@ -57,6 +57,11 @@ class VCastExpr : public VExpr {
     const DataTypePtr& get_target_type() const;
 
     virtual std::string cast_name() const { return "CAST"; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VCastExpr::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
 
     uint64_t get_digest(uint64_t seed) const override {
         auto res = VExpr::get_digest(seed);
@@ -94,6 +99,13 @@ class TryCastExpr final : public VCastExpr {
                                size_t count, ColumnPtr& result_column) const override;
     ~TryCastExpr() override = default;
     std::string cast_name() const override { return "TRY CAST"; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        node.__set_is_cast_nullable(_original_cast_return_is_nullable);
+        *cloned_expr = TryCastExpr::create_shared(node);
+        return Status::OK();
+    }
 
 private:
     DataTypePtr original_cast_return_type() const;
diff --git a/be/src/exprs/vcolumn_ref.h b/be/src/exprs/vcolumn_ref.h
index e4485e5815e02f..33ade77defaaba 100644
--- a/be/src/exprs/vcolumn_ref.h
+++ b/be/src/exprs/vcolumn_ref.h
@@ -81,6 +81,19 @@ class VColumnRef final : public VExpr {
         }
     }
 
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        TColumnRef column_ref;
+        column_ref.__set_column_id(_column_id);
+        column_ref.__set_column_name(_column_name);
+        node.__set_column_ref(column_ref);
+        auto cloned = VColumnRef::create_shared(node);
+        cloned->set_gap(_gap.load());
+        *cloned_expr = std::move(cloned);
+        return Status::OK();
+    }
+
     std::string debug_string() const override {
         std::stringstream out;
         out << "VColumnRef(slot_id: " << _column_id << ",column_name: " << _column_name
diff --git a/be/src/exprs/vcompound_pred.h b/be/src/exprs/vcompound_pred.h
index 0c8fd46d8bdf37..51945d2ccba62e 100644
--- a/be/src/exprs/vcompound_pred.h
+++ b/be/src/exprs/vcompound_pred.h
@@ -58,6 +58,11 @@ class VCompoundPred : public VectorizedFnCall {
 #endif
 
     const std::string& expr_name() const override { return _expr_name; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VCompoundPred::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
 
     Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override {
         segment_v2::InvertedIndexResultBitmap res;
diff --git a/be/src/exprs/vcondition_expr.h b/be/src/exprs/vcondition_expr.h
index ceb6ed1396d78f..6beade74b3e3d6 100644
--- a/be/src/exprs/vcondition_expr.h
+++ b/be/src/exprs/vcondition_expr.h
@@ -65,6 +65,11 @@ class VectorizedIfExpr : public VConditionExpr {
                                size_t count, ColumnPtr& result_column) const override;
 
     const std::string& expr_name() const override { return IF_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VectorizedIfExpr::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
     inline static const std::string IF_NAME = "if";
 
 protected:
@@ -123,6 +128,11 @@ class VectorizedIfNullExpr : public VectorizedIfExpr {
 public:
     VectorizedIfNullExpr(const TExprNode& node) : VectorizedIfExpr(node) {}
     const std::string& expr_name() const override { return IF_NULL_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VectorizedIfNullExpr::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
     inline static const std::string IF_NULL_NAME = "ifnull";
 
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
@@ -137,6 +147,11 @@ class VectorizedCoalesceExpr : public VConditionExpr {
                                size_t count, ColumnPtr& result_column) const override;
     VectorizedCoalesceExpr(const TExprNode& node) : VConditionExpr(node) {}
     const std::string& expr_name() const override { return NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VectorizedCoalesceExpr::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
     inline static const std::string NAME = "coalesce";
 };
 
diff --git a/be/src/exprs/vdirect_in_predicate.h b/be/src/exprs/vdirect_in_predicate.h
index 882c8d4de69030..fed92a35a23afb 100644
--- a/be/src/exprs/vdirect_in_predicate.h
+++ b/be/src/exprs/vdirect_in_predicate.h
@@ -68,6 +68,12 @@ class VDirectInPredicate final : public VExpr {
 
     std::shared_ptr<HybridSetBase> get_set_func() const override { return _filter; }
 
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VDirectInPredicate::create_shared(clone_texpr_node(), _filter);
+        return Status::OK();
+    }
+
     bool get_slot_in_expr(VExprSPtr& new_root) const {
         if (!get_child(0)->is_slot_ref()) {
             return false;
diff --git a/be/src/exprs/vectorized_fn_call.cpp b/be/src/exprs/vectorized_fn_call.cpp
index b5ac9cf0c76634..9704a6629a933b 100644
--- a/be/src/exprs/vectorized_fn_call.cpp
+++ b/be/src/exprs/vectorized_fn_call.cpp
@@ -80,7 +80,9 @@ const static std::set<std::string> DISTANCE_FUNCS = {L2DistanceApproximate::name
 const static std::set<TExprOpcode::type> OPS_FOR_ANN_RANGE_SEARCH = {
         TExprOpcode::GE, TExprOpcode::LE, TExprOpcode::LE, TExprOpcode::GT, TExprOpcode::LT};
 
-VectorizedFnCall::VectorizedFnCall(const TExprNode& node) : VExpr(node) {}
+VectorizedFnCall::VectorizedFnCall(const TExprNode& node) : VExpr(node) {
+    _function_name = _fn.name.function_name;
+}
 
 Status VectorizedFnCall::prepare(RuntimeState* state, const RowDescriptor& desc,
                                  VExprContext* context) {
diff --git a/be/src/exprs/vectorized_fn_call.h b/be/src/exprs/vectorized_fn_call.h
index ff7b8174e8a7ad..7b721260daad5e 100644
--- a/be/src/exprs/vectorized_fn_call.h
+++ b/be/src/exprs/vectorized_fn_call.h
@@ -99,6 +99,12 @@ class VectorizedFnCall : public VExpr {
                                   segment_v2::AnnRangeSearchRuntime& runtime,
                                   bool& suitable_for_ann_index) override;
 
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<VectorizedFnCall>(*this);
+        return Status::OK();
+    }
+
 protected:
     FunctionBasePtr _function;
     std::string _expr_name;
diff --git a/be/src/exprs/vexpr.cpp b/be/src/exprs/vexpr.cpp
index 7f6b75ce6ddeab..b5b701e2829ca2 100644
--- a/be/src/exprs/vexpr.cpp
+++ b/be/src/exprs/vexpr.cpp
@@ -372,6 +372,51 @@ VExpr::VExpr(DataTypePtr type, bool is_slotref)
     }
 }
 
+TExprNode VExpr::clone_texpr_node() const {
+    TExprNode node;
+    node.__set_node_type(_node_type);
+    node.__set_opcode(_opcode);
+    node.__set_type(create_type_desc(remove_nullable(_data_type)->get_primitive_type(),
+                                     static_cast<int>(_data_type->get_precision()),
+                                     static_cast<int>(_data_type->get_scale())));
+    node.__set_is_nullable(_data_type->is_nullable());
+    node.__set_num_children(get_num_children());
+    node.__set_fn(_fn);
+    return node;
+}
+
+Status VExpr::clone_node(VExprSPtr* cloned_expr) const {
+    DORIS_CHECK(cloned_expr != nullptr);
+    return Status::NotSupported("Cannot clone expression {} for file-local rewrite", expr_name());
+}
+
+Status VExpr::deep_clone(VExprSPtr* cloned_expr,
+                         const VExprCloneNodeOverride& clone_node_override) const {
+    DORIS_CHECK(cloned_expr != nullptr);
+
+    VExprSPtr cloned;
+    if (clone_node_override) {
+        RETURN_IF_ERROR(clone_node_override(*this, &cloned));
+    }
+    if (cloned == nullptr) {
+        RETURN_IF_ERROR(clone_node(&cloned));
+    }
+    DORIS_CHECK(cloned != nullptr);
+
+    VExprSPtrs cloned_children;
+    cloned_children.reserve(_children.size());
+    for (const auto& child : _children) {
+        DORIS_CHECK(child != nullptr);
+        VExprSPtr cloned_child;
+        RETURN_IF_ERROR(child->deep_clone(&cloned_child, clone_node_override));
+        cloned_children.push_back(std::move(cloned_child));
+    }
+    cloned->set_children(std::move(cloned_children));
+    cloned->reset_prepare_state();
+    *cloned_expr = std::move(cloned);
+    return Status::OK();
+}
+
 Status VExpr::prepare(RuntimeState* state, const RowDescriptor& row_desc, VExprContext* context) {
     ++context->_depth_num;
     if (context->_depth_num > config::max_depth_of_expr_tree) {
@@ -401,6 +446,15 @@ Status VExpr::open(RuntimeState* state, VExprContext* context,
     return Status::OK();
 }
 
+void VExpr::reset_prepare_state() {
+    _prepared = false;
+    _prepare_finished = false;
+    _open_finished = false;
+    for (auto& child : _children) {
+        child->reset_prepare_state();
+    }
+}
+
 void VExpr::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
     for (auto& i : _children) {
         i->close(context, scope);
@@ -751,8 +805,9 @@ Status VExpr::get_const_col(VExprContext* context,
         return Status::OK();
     }
 
-    if (_constant_col != nullptr) {
-        DCHECK(column_wrapper != nullptr);
+    if (_constant_col != nullptr && column_wrapper == nullptr) {
+        return Status::OK();
+    } else if (_constant_col != nullptr) {
         *column_wrapper = _constant_col;
         return Status::OK();
     }
diff --git a/be/src/exprs/vexpr.h b/be/src/exprs/vexpr.h
index 58bb14abe47cab..1851e9cb64a356 100644
--- a/be/src/exprs/vexpr.h
+++ b/be/src/exprs/vexpr.h
@@ -24,6 +24,7 @@
 
 #include <cstddef>
 #include <cstdint>
+#include <functional>
 #include <memory>
 #include <ostream>
 #include <string>
@@ -79,6 +80,7 @@ struct AnnRangeSearchRuntime;
 // the relatioinship between threads and classes.
 
 using Selector = IColumn::Selector;
+using VExprCloneNodeOverride = std::function<Status(const VExpr&, VExprSPtr*)>;
 
 struct AnnRangeSearchEvaluationResult {
     // Indicates whether the expr row_bitmap has been updated.
@@ -210,11 +212,13 @@ class VExpr {
 
     const DataTypePtr& data_type() const { return _data_type; }
 
-    bool is_slot_ref() const { return _node_type == TExprNodeType::SLOT_REF; }
+    virtual bool is_slot_ref() const { return _node_type == TExprNodeType::SLOT_REF; }
 
-    bool is_virtual_slot_ref() const { return _node_type == TExprNodeType::VIRTUAL_SLOT_REF; }
+    virtual bool is_virtual_slot_ref() const {
+        return _node_type == TExprNodeType::VIRTUAL_SLOT_REF;
+    }
 
-    bool is_column_ref() const { return _node_type == TExprNodeType::COLUMN_REF; }
+    virtual bool is_column_ref() const { return _node_type == TExprNodeType::COLUMN_REF; }
 
     virtual bool is_literal() const { return false; }
 
@@ -248,6 +252,10 @@ class VExpr {
 
     static bool contains_blockable_function(const VExprContextSPtrs& ctxs);
 
+    Status deep_clone(VExprSPtr* cloned_expr,
+                      const VExprCloneNodeOverride& clone_node_override = {}) const;
+    virtual Status clone_node(VExprSPtr* cloned_expr) const;
+
     bool is_nullable() const { return _data_type->is_nullable(); }
 
     PrimitiveType result_type() const { return _data_type->get_primitive_type(); }
@@ -262,6 +270,7 @@ class VExpr {
     virtual const VExprSPtrs& children() const { return _children; }
     void set_children(const VExprSPtrs& children) { _children = children; }
     void set_children(VExprSPtrs&& children) { _children = std::move(children); }
+    void reset_prepare_state();
     virtual std::string debug_string() const;
     static std::string debug_string(const VExprSPtrs& exprs);
     static std::string debug_string(const VExprContextSPtrs& ctxs);
@@ -269,7 +278,7 @@ class VExpr {
     static ColumnPtr filter_column_with_selector(const ColumnPtr& origin_column,
                                                  const Selector* selector, size_t count) {
         if (selector == nullptr) {
-            DCHECK_EQ(origin_column->size(), count);
+            DCHECK_EQ(origin_column->size(), count) << origin_column->get_name();
             return origin_column;
         }
         DCHECK_EQ(count, selector->size());
@@ -363,6 +372,8 @@ class VExpr {
     virtual uint64_t get_digest(uint64_t seed) const;
 
 protected:
+    TExprNode clone_texpr_node() const;
+
     /// Simple debug string that provides no expr subclass-specific information
     std::string debug_string(const std::string& expr_name) const {
         std::stringstream out;
diff --git a/be/src/exprs/vin_predicate.h b/be/src/exprs/vin_predicate.h
index 1d3ad4e4ce7c3e..bfdccfb6c1b586 100644
--- a/be/src/exprs/vin_predicate.h
+++ b/be/src/exprs/vin_predicate.h
@@ -60,6 +60,15 @@ class VInPredicate MOCK_REMOVE(final) : public VExpr {
     Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override;
 
     uint64_t get_digest(uint64_t seed) const override { return 0; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        TInPredicate in_predicate;
+        in_predicate.__set_is_not_in(_is_not_in);
+        node.__set_in_predicate(in_predicate);
+        *cloned_expr = VInPredicate::create_shared(node);
+        return Status::OK();
+    }
 
 private:
     FunctionBasePtr _function;
@@ -70,4 +79,4 @@ class VInPredicate MOCK_REMOVE(final) : public VExpr {
     uint32_t _in_list_value_count_threshold = 10;
     bool _is_args_all_constant = false;
 };
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/exprs/vliteral.cpp b/be/src/exprs/vliteral.cpp
index 551839f699e2e6..9b93d7097274ee 100644
--- a/be/src/exprs/vliteral.cpp
+++ b/be/src/exprs/vliteral.cpp
@@ -37,12 +37,6 @@ namespace doris {
 
 class VExprContext;
 
-void VLiteral::init(const TExprNode& node) {
-    Field field;
-    field = _data_type->get_field(node);
-    _column_ptr = _data_type->create_column_const(1, field);
-}
-
 Status VLiteral::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) {
     RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
     return Status::OK();
diff --git a/be/src/exprs/vliteral.h b/be/src/exprs/vliteral.h
index b1b8e89157d420..89988e2ba31142 100644
--- a/be/src/exprs/vliteral.h
+++ b/be/src/exprs/vliteral.h
@@ -24,6 +24,7 @@
 #include "common/status.h"
 #include "core/data_type/data_type.h"
 #include "core/data_type_serde/data_type_serde.h"
+#include "core/field.h"
 #include "exprs/vexpr.h"
 
 namespace doris {
@@ -39,10 +40,19 @@ class VLiteral : public VExpr {
     VLiteral(const TExprNode& node, bool should_init = true)
             : VExpr(node), _expr_name(_data_type->get_name()) {
         if (should_init) {
-            init(node);
+            Field field;
+            field = _data_type->get_field(node);
+            _column_ptr = _data_type->create_column_const(1, field);
         }
     }
 
+    VLiteral(const DataTypePtr& type, const Field& field) : VExpr(type, false) {
+        _data_type = type;
+        _column_ptr = _data_type->create_column_const(1, field);
+        _node_type = TExprNodeType::LITERAL;
+        _expr_name = _data_type->get_name();
+    }
+
 #ifdef BE_TEST
     VLiteral() = default;
     MOCK_FUNCTION std::string value() const;
@@ -67,13 +77,18 @@ class VLiteral : public VExpr {
     bool equals(const VExpr& other) override;
 
     uint64_t get_digest(uint64_t seed) const override;
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        Field field;
+        _column_ptr->get(0, field);
+        *cloned_expr = VLiteral::create_shared(_data_type, field);
+        return Status::OK();
+    }
 
 protected:
+    VLiteral(const DataTypePtr& type) : VExpr(type, false) {}
     ColumnPtr _column_ptr;
     std::string _expr_name;
-
-private:
-    void init(const TExprNode& node);
 };
 
 } // namespace doris
diff --git a/be/src/exprs/vslot_ref.cpp b/be/src/exprs/vslot_ref.cpp
index 87aad6b977ecbe..f02ef50d5751c3 100644
--- a/be/src/exprs/vslot_ref.cpp
+++ b/be/src/exprs/vslot_ref.cpp
@@ -41,10 +41,28 @@ VSlotRef::VSlotRef(const doris::TExprNode& node)
 VSlotRef::VSlotRef(const SlotDescriptor* desc)
         : VExpr(desc->type(), true), _slot_id(desc->id()), _column_id(-1), _column_name(nullptr) {}
 
+VSlotRef::VSlotRef(int slot_id, int column_id, int column_uniq_id, const DataTypePtr& type,
+                   std::string column_name)
+        : VExpr(type, true),
+          _slot_id(slot_id),
+          _column_id(column_id),
+          _column_uniq_id(column_uniq_id),
+          _owned_column_name(std::move(column_name)),
+          _column_name(&_owned_column_name) {}
+
 Status VSlotRef::prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
                          VExprContext* context) {
-    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
     DCHECK_EQ(_children.size(), 0);
+    if (_prepared) {
+        return Status::OK();
+    }
+    if (_column_id >= 0 && _column_name != nullptr) {
+        _prepared = true;
+        _prepare_finished = true;
+        return Status::OK();
+    }
+    _prepared = true;
+    RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
     if (_slot_id == -1) {
         _prepare_finished = true;
         return Status::OK();
@@ -109,6 +127,27 @@ DataTypePtr VSlotRef::execute_type(const Block* block) const {
     return block->get_by_position(_column_id).type;
 }
 
+Status VSlotRef::clone_node(VExprSPtr* cloned_expr) const {
+    DORIS_CHECK(cloned_expr != nullptr);
+    if (_column_id >= 0 && _column_name != nullptr) {
+        *cloned_expr = VSlotRef::create_shared(_slot_id, _column_id, _column_uniq_id, _data_type,
+                                               *_column_name);
+        return Status::OK();
+    }
+    auto node = clone_texpr_node();
+    TSlotRef slot_ref;
+    slot_ref.__set_slot_id(_slot_id);
+    node.__set_slot_ref(slot_ref);
+    node.__set_label(_column_label);
+    auto cloned = VSlotRef::create_shared(node);
+    auto* cloned_slot_ref = static_cast<VSlotRef*>(cloned.get());
+    cloned_slot_ref->_column_id = _column_id;
+    cloned_slot_ref->_column_uniq_id = _column_uniq_id;
+    cloned_slot_ref->_column_name = _column_name;
+    *cloned_expr = std::move(cloned);
+    return Status::OK();
+}
+
 const std::string& VSlotRef::expr_name() const {
     return *_column_name;
 }
diff --git a/be/src/exprs/vslot_ref.h b/be/src/exprs/vslot_ref.h
index ef61edc384c2f2..a67bdc1953cd0a 100644
--- a/be/src/exprs/vslot_ref.h
+++ b/be/src/exprs/vslot_ref.h
@@ -31,12 +31,14 @@ class TExprNode;
 class Block;
 class VExprContext;
 
-class VSlotRef MOCK_REMOVE(final) : public VExpr {
+class VSlotRef : public VExpr {
     ENABLE_FACTORY_CREATOR(VSlotRef);
 
 public:
     VSlotRef(const TExprNode& node);
     VSlotRef(const SlotDescriptor* desc);
+    VSlotRef(int slot_id, int column_id, int column_uniq_id, const DataTypePtr& type,
+             std::string column_name);
 #ifdef BE_TEST
     VSlotRef() = default;
     void set_slot_id(int slot_id) { _slot_id = slot_id; }
@@ -58,6 +60,7 @@ class VSlotRef MOCK_REMOVE(final) : public VExpr {
     int column_id() const { return _column_id; }
 
     MOCK_FUNCTION int slot_id() const { return _slot_id; }
+    int column_uniq_id() const { return _column_uniq_id; }
 
     bool equals(const VExpr& other) override;
 
@@ -67,16 +70,24 @@ class VSlotRef MOCK_REMOVE(final) : public VExpr {
         column_ids.insert(_column_id);
     }
 
-    MOCK_FUNCTION const std::string& column_name() const { return *_column_name; }
+    virtual const std::string& column_name() const { return *_column_name; }
 
     uint64_t get_digest(uint64_t seed) const override;
 
     double execute_cost() const override { return 0.0; }
+    Status clone_node(VExprSPtr* cloned_expr) const override;
+
+protected:
+    VSlotRef(int slot_id, int column_id, int column_uniq_id)
+            : _slot_id(slot_id), _column_id(column_id), _column_uniq_id(column_uniq_id) {
+        _node_type = TExprNodeType::SLOT_REF;
+    }
 
 private:
     int _slot_id;
     int _column_id;
     int _column_uniq_id = -1;
+    std::string _owned_column_name;
     const std::string* _column_name = nullptr;
     const std::string _column_label;
 };
diff --git a/be/src/exprs/vtopn_pred.h b/be/src/exprs/vtopn_pred.h
index 94887588f536da..a6edec65accd3d 100644
--- a/be/src/exprs/vtopn_pred.h
+++ b/be/src/exprs/vtopn_pred.h
@@ -63,6 +63,11 @@ class VTopNPred : public VExpr {
     }
 
     int source_node_id() const { return _source_node_id; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VTopNPred::create_shared(clone_texpr_node(), _source_node_id, nullptr);
+        return Status::OK();
+    }
 
     Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override {
         _predicate = &state->get_query_ctx()->get_runtime_predicate(_source_node_id);
diff --git a/be/src/format/CMakeLists.txt b/be/src/format/CMakeLists.txt
index ef9dab92c00f97..bc0325f3e0f252 100644
--- a/be/src/format/CMakeLists.txt
+++ b/be/src/format/CMakeLists.txt
@@ -22,6 +22,9 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/format")
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/format")
 
 file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS *.cpp)
+file(GLOB_RECURSE FORMAT_V2_SRC_FILES CONFIGURE_DEPENDS
+    ${CMAKE_CURRENT_SOURCE_DIR}/../format_v2/*.cpp)
+list(APPEND SRC_FILES ${FORMAT_V2_SRC_FILES})
 
 # Lance reader requires Rust static library (BUILD_RUST_READERS=ON)
 if (NOT BUILD_RUST_READERS)
diff --git a/be/src/format/csv/csv_reader.cpp b/be/src/format/csv/csv_reader.cpp
index 3d1e978ffe911f..b8f0be49bfea1e 100644
--- a/be/src/format/csv/csv_reader.cpp
+++ b/be/src/format/csv/csv_reader.cpp
@@ -668,8 +668,8 @@ Status CsvReader::_create_file_reader(bool need_schema) {
                                                         need_schema));
     } else {
         _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0;
-        io::FileReaderOptions reader_options =
-                FileFactory::get_reader_options(_state, _file_description);
+        io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+                _state ? _state->query_options() : _default_query_options, _file_description);
         io::FileReaderSPtr file_reader;
         if (_io_ctx_holder) {
             file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
diff --git a/be/src/format/generic_reader.h b/be/src/format/generic_reader.h
index d849d595056adb..88fc3fb85a0eb1 100644
--- a/be/src/format/generic_reader.h
+++ b/be/src/format/generic_reader.h
@@ -40,6 +40,7 @@
 #include "runtime/runtime_state.h"
 #include "storage/predicate/block_column_predicate.h"
 #include "storage/segment/common.h"
+#include "storage/segment/condition_cache.h"
 #include "util/profile_collector.h"
 
 namespace doris {
@@ -51,16 +52,6 @@ namespace doris {
 class Block;
 class VSlotRef;
 
-// Context passed from FileScanner to readers for condition cache integration.
-// On MISS: readers populate filter_result per-granule during predicate evaluation.
-// On HIT: readers skip granules where filter_result[granule] == false.
-struct ConditionCacheContext {
-    bool is_hit = false;
-    std::shared_ptr<std::vector<bool>> filter_result; // per-granule: true = has surviving rows
-    int64_t base_granule = 0; // global granule index of the first granule in filter_result
-    static constexpr int GRANULE_SIZE = 2048;
-};
-
 /// Base context for the unified init_reader(ReaderInitContext*) template method.
 /// Contains fields shared by ALL reader types. Format-specific readers define
 /// subclasses (ParquetInitContext, OrcInitContext, etc.) with extra fields.
@@ -299,6 +290,7 @@ class GenericReader : public ProfileCollector {
     // ---- get_columns cache ----
     bool _get_columns_cached = false;
     std::unordered_map<std::string, DataTypePtr> _cached_name_to_type;
+    const TQueryOptions _default_query_options;
 };
 
 /// Provides an accessor for the current batch's row positions within the file.
diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp
index 8d53b6009e6bef..1aa19574b39a58 100644
--- a/be/src/format/json/new_json_reader.cpp
+++ b/be/src/format/json/new_json_reader.cpp
@@ -498,8 +498,8 @@ Status NewJsonReader::_open_file_reader(bool need_schema) {
                                                         need_schema));
     } else {
         _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0;
-        io::FileReaderOptions reader_options =
-                FileFactory::get_reader_options(_state, _file_description);
+        io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+                _state ? _state->query_options() : _default_query_options, _file_description);
         io::FileReaderSPtr file_reader;
         if (_io_ctx_holder) {
             file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
diff --git a/be/src/format/native/native_reader.cpp b/be/src/format/native/native_reader.cpp
index 029d7ff2024f20..3632b6e4e0a1c9 100644
--- a/be/src/format/native/native_reader.cpp
+++ b/be/src/format/native/native_reader.cpp
@@ -137,8 +137,8 @@ Status NativeReader::init_reader() {
                                                   _scan_params.broker_addresses.end());
     }
 
-    io::FileReaderOptions reader_options =
-            FileFactory::get_reader_options(_state, file_description);
+    io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+            _state ? _state->query_options() : _default_query_options, file_description);
     auto reader_res =
             _io_ctx_holder ? io::DelegateReader::create_file_reader(
                                      _profile, system_properties, file_description, reader_options,
diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp
index 48a1894f02e276..06393c3195e430 100644
--- a/be/src/format/orc/vorc_reader.cpp
+++ b/be/src/format/orc/vorc_reader.cpp
@@ -382,8 +382,8 @@ Status OrcReader::_create_file_reader() {
     if (_file_input_stream == nullptr) {
         _file_description.mtime =
                 _scan_range.__isset.modification_time ? _scan_range.modification_time : 0;
-        io::FileReaderOptions reader_options =
-                FileFactory::get_reader_options(_state, _file_description);
+        io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+                _state ? _state->query_options() : _default_query_options, _file_description);
         io::FileReaderSPtr inner_reader;
         if (_io_ctx_holder != nullptr) {
             inner_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
diff --git a/be/src/format/parquet/vparquet_reader.cpp b/be/src/format/parquet/vparquet_reader.cpp
index 31078c7b8b0d0c..600cbfbfaa7d21 100644
--- a/be/src/format/parquet/vparquet_reader.cpp
+++ b/be/src/format/parquet/vparquet_reader.cpp
@@ -313,8 +313,8 @@ Status ParquetReader::_open_file() {
         ++_reader_statistics.open_file_num;
         _file_description.mtime =
                 _scan_range.__isset.modification_time ? _scan_range.modification_time : 0;
-        io::FileReaderOptions reader_options =
-                FileFactory::get_reader_options(_state, _file_description);
+        io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+                _state ? _state->query_options() : _default_query_options, _file_description);
         if (_io_ctx_holder) {
             _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
                     _profile, _system_properties, _file_description, reader_options,
diff --git a/be/src/format/table/deletion_vector_reader.cpp b/be/src/format/table/deletion_vector_reader.cpp
index bfe34a5f555f94..d7e33c923d95b7 100644
--- a/be/src/format/table/deletion_vector_reader.cpp
+++ b/be/src/format/table/deletion_vector_reader.cpp
@@ -54,9 +54,9 @@ Status DeletionVectorReader::_create_file_reader() {
         return Status::EndOfFile("stop read.");
     }
 
-    _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0;
+    _file_description.mtime = _desc.modification_time;
     io::FileReaderOptions reader_options =
-            FileFactory::get_reader_options(_state, _file_description);
+            FileFactory::get_reader_options(_state->query_options(), _file_description);
     _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
             _profile, _system_properties, _file_description, reader_options,
             io::DelegateReader::AccessMode::RANDOM, _io_ctx));
@@ -64,20 +64,13 @@ Status DeletionVectorReader::_create_file_reader() {
 }
 
 void DeletionVectorReader::_init_file_description() {
-    _file_description.path = _range.path;
-    _file_description.file_size = _range.__isset.file_size ? _range.file_size : -1;
-    if (_range.__isset.fs_name) {
-        _file_description.fs_name = _range.fs_name;
-    }
+    _file_description.path = _desc.path;
+    _file_description.file_size = _desc.file_size;
+    _file_description.fs_name = _desc.fs_name;
 }
 
 void DeletionVectorReader::_init_system_properties() {
-    if (_range.__isset.file_type) {
-        // for compatibility
-        _system_properties.system_type = _range.file_type;
-    } else {
-        _system_properties.system_type = _params.file_type;
-    }
+    _system_properties.system_type = _params.file_type;
     _system_properties.properties = _params.properties;
     _system_properties.hdfs_params = _params.hdfs_params;
     if (_params.__isset.broker_addresses) {
diff --git a/be/src/format/table/deletion_vector_reader.h b/be/src/format/table/deletion_vector_reader.h
index 0663f3b28490ef..968344a8496bc7 100644
--- a/be/src/format/table/deletion_vector_reader.h
+++ b/be/src/format/table/deletion_vector_reader.h
@@ -36,6 +36,22 @@ struct IOContext;
 } // namespace io
 
 namespace doris {
+struct DeleteFileDesc {
+    enum class Format {
+        PAIMON,
+        ICEBERG,
+    };
+
+    std::string key = "";
+    std::string path = "";
+    std::string fs_name = "";
+    int64_t start_offset = 0;
+    int64_t size = 0;
+    int64_t file_size = -1;
+    int64_t modification_time = 0;
+    Format format = Format::PAIMON;
+};
+
 class DeletionVectorReader {
     ENABLE_FACTORY_CREATOR(DeletionVectorReader);
 
@@ -43,7 +59,22 @@ class DeletionVectorReader {
     DeletionVectorReader(RuntimeState* state, RuntimeProfile* profile,
                          const TFileScanRangeParams& params, const TFileRangeDesc& range,
                          io::IOContext* io_ctx)
-            : _state(state), _profile(profile), _range(range), _params(params), _io_ctx(io_ctx) {}
+            : _state(state), _profile(profile), _params(params), _io_ctx(io_ctx) {
+        _desc = DeleteFileDesc {
+                .key = "",
+                .path = range.path,
+                .fs_name = range.__isset.fs_name ? range.fs_name : "",
+                .start_offset = range.start_offset,
+                .size = range.size,
+                .file_size = range.__isset.file_size ? range.file_size : -1,
+                .modification_time = range.__isset.modification_time ? range.modification_time : 0};
+    }
+    DeletionVectorReader(RuntimeState* state, RuntimeProfile* profile,
+                         const TFileScanRangeParams& params, const DeleteFileDesc& desc,
+                         io::IOContext* io_ctx)
+            : _state(state), _profile(profile), _params(params), _io_ctx(io_ctx) {
+        _desc = desc;
+    }
     ~DeletionVectorReader() = default;
     Status open();
     Status read_at(size_t offset, Slice result);
@@ -56,7 +87,7 @@ class DeletionVectorReader {
 private:
     RuntimeState* _state = nullptr;
     RuntimeProfile* _profile = nullptr;
-    const TFileRangeDesc& _range;
+    DeleteFileDesc _desc;
     const TFileScanRangeParams& _params;
     io::IOContext* _io_ctx = nullptr;
 
diff --git a/be/src/format/table/iceberg_reader_mixin.h b/be/src/format/table/iceberg_reader_mixin.h
index bd049342195695..2bc15f18cf141a 100644
--- a/be/src/format/table/iceberg_reader_mixin.h
+++ b/be/src/format/table/iceberg_reader_mixin.h
@@ -343,9 +343,6 @@ class IcebergReaderMixin : public BaseReader, public TableSchemaChangeHelper {
     // id -> block column name
     std::unordered_map<int, std::string> _id_to_block_column_name;
 
-    // File column names used during init
-    std::vector<std::string> _file_col_names;
-
     std::function<std::shared_ptr<segment_v2::RowIdColumnIteratorV2>()>
             _create_topn_row_id_column_iterator;
 
diff --git a/be/src/format_v2/column_data.h b/be/src/format_v2/column_data.h
new file mode 100644
index 00000000000000..7816ea8263cb42
--- /dev/null
+++ b/be/src/format_v2/column_data.h
@@ -0,0 +1,410 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "exprs/vexpr_fwd.h"
+
+namespace doris::format {
+
+// File-local top-level column id.
+//
+// Scope:
+// - Only valid inside one physical file schema returned by FileReader::get_schema().
+// - For Parquet, this is the top-level field ordinal in the new reader schema.
+// - The synthetic row-position column also uses this type, with a reserved negative id.
+//
+// Do not use this for table/global column unique ids, block positions, nested child ids, or
+// slot ids. Nested child ids are carried by LocalColumnIndex::index below.
+class LocalColumnId {
+public:
+    constexpr LocalColumnId() = default;
+    explicit constexpr LocalColumnId(int32_t id) : _id(id) {}
+
+    static constexpr LocalColumnId invalid() { return LocalColumnId(); }
+
+    constexpr int32_t value() const { return _id; }
+    constexpr bool is_valid() const { return _id >= 0; }
+
+    constexpr bool operator==(const LocalColumnId& other) const { return _id == other._id; }
+    constexpr bool operator!=(const LocalColumnId& other) const { return !(*this == other); }
+    constexpr bool operator<(const LocalColumnId& other) const { return _id < other._id; }
+
+private:
+    int32_t _id = -1;
+};
+
+// Position of a file-local column in the Block produced by one FileScanRequest.
+//
+// This is assigned by TableColumnMapper/TableReader after predicate/non-predicate columns are
+// deduplicated. It is not a file schema id and it is not stable across requests. Use value() only
+// at the boundary where an existing Block or expression API still expects a size_t/int position.
+class LocalIndex {
+public:
+    constexpr LocalIndex() = default;
+    explicit constexpr LocalIndex(size_t index) : _index(index) {}
+
+    constexpr size_t value() const { return _index; }
+    constexpr bool operator==(const LocalIndex& other) const { return _index == other._index; }
+    constexpr bool operator<(const LocalIndex& other) const { return _index < other._index; }
+
+private:
+    size_t _index = 0;
+};
+
+// Position of a table/global output column in the final Block returned by TableReader.
+//
+// This type is reserved for boundaries that need to refer to caller-visible column order. It must
+// not be used to index a file-local Block, because schema evolution and lazy materialization can
+// make file-local order different from table output order.
+class GlobalIndex {
+public:
+    constexpr GlobalIndex() = default;
+    explicit constexpr GlobalIndex(size_t index) : _index(index) {}
+
+    constexpr size_t value() const { return _index; }
+    constexpr bool operator==(const GlobalIndex& other) const { return _index == other._index; }
+    constexpr bool operator<(const GlobalIndex& other) const { return _index < other._index; }
+
+private:
+    size_t _index = 0;
+};
+
+// Index of a split-local constant/default value used to materialize columns that are not read from
+// the physical file, such as partition columns, added columns with default values, and virtual
+// table-format columns.
+//
+// It is separate from LocalIndex because constants do not occupy a position in the file reader
+// output block unless an expression explicitly materializes them.
+class ConstantIndex {
+public:
+    constexpr ConstantIndex() = default;
+    explicit constexpr ConstantIndex(size_t index) : _index(index) {}
+
+    constexpr size_t value() const { return _index; }
+    constexpr bool operator==(const ConstantIndex& other) const { return _index == other._index; }
+    constexpr bool operator<(const ConstantIndex& other) const { return _index < other._index; }
+
+private:
+    size_t _index = 0;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const LocalColumnId& id) {
+    return os << id.value();
+}
+
+inline std::ostream& operator<<(std::ostream& os, const LocalIndex& index) {
+    return os << index.value();
+}
+
+inline std::ostream& operator<<(std::ostream& os, const GlobalIndex& index) {
+    return os << index.value();
+}
+
+inline std::ostream& operator<<(std::ostream& os, const ConstantIndex& index) {
+    return os << index.value();
+}
+
+// A split/file-local constant value used to materialize a table/global column without reading a
+// physical file column.
+//
+// Common producers are partition values, schema-evolution default expressions, generated columns
+// and table-format virtual columns. The entry is keyed by ConstantIndex in ConstantMap; global_index
+// keeps the link back to the caller-visible output column.
+struct ConstantEntry {
+    GlobalIndex global_index;
+    VExprContextSPtr expr;
+    DataTypePtr type;
+};
+
+// Per mapping/split collection of constants.
+//
+// ConstantIndex only has meaning within this container. Keeping constants separate from LocalIndex
+// makes it explicit that these values do not occupy positions in the file reader output Block.
+class ConstantMap {
+public:
+    ConstantIndex add(ConstantEntry entry) {
+        const auto index = ConstantIndex(_entries.size());
+        _entries.push_back(std::move(entry));
+        return index;
+    }
+
+    const ConstantEntry& get(ConstantIndex index) const {
+        DORIS_CHECK(index.value() < _entries.size());
+        return _entries[index.value()];
+    }
+
+    void clear() { _entries.clear(); }
+    bool empty() const { return _entries.empty(); }
+    size_t size() const { return _entries.size(); }
+
+    const std::vector<ConstantEntry>& entries() const { return _entries; }
+
+private:
+    std::vector<ConstantEntry> _entries;
+};
+
+// Target of a localized filter.
+//
+// A filter can either reference a file-local Block position or a constant entry. Unset entries mean
+// the filter cannot be evaluated below the table-reader finalize stage.
+struct FilterEntry {
+    enum class Kind {
+        UNSET,
+        LOCAL,
+        CONSTANT,
+    };
+
+    static FilterEntry local(LocalIndex index) {
+        return {.kind = Kind::LOCAL, .index = index.value()};
+    }
+
+    static FilterEntry constant(ConstantIndex index) {
+        return {.kind = Kind::CONSTANT, .index = index.value()};
+    }
+
+    bool is_set() const { return kind != Kind::UNSET; }
+    bool is_local() const { return kind == Kind::LOCAL; }
+    bool is_constant() const { return kind == Kind::CONSTANT; }
+
+    LocalIndex local_index() const {
+        DORIS_CHECK(is_local());
+        return LocalIndex(index);
+    }
+
+    ConstantIndex constant_index() const {
+        DORIS_CHECK(is_constant());
+        return ConstantIndex(index);
+    }
+
+    Kind kind = Kind::UNSET;
+    size_t index = 0;
+};
+
+enum ColumnType {
+    DATA_COLUMN = 0,  // normal data column
+    ROW_NUMBER = 1,   // row number in a file
+    GLOBAL_ROWID = 2, // global unique row id across files, used by TopN filter
+};
+
+struct GlobalRowIdContext {
+    uint8_t version = 0;
+    int64_t backend_id = 0;
+    uint32_t file_id = 0;
+};
+
+// Column schema definition shared by table/global projection and file-local schema matching.
+//
+// ColumnDefinition intentionally carries schema identity only. FE column unique ids are translated
+// to GlobalIndex at the FileScannerV2 boundary and must not appear in table/file reader APIs.
+struct ColumnDefinition {
+    // Typed identifier value used to match a column against another schema.
+    //
+    // - TYPE_NULL: no explicit identifier. BY_NAME falls back to ColumnDefinition::name.
+    // - TYPE_INT: interpreted by TableColumnMapperOptions::mode as a field id or file position.
+    // - TYPE_STRING: explicit name identifier.
+    //
+    // This is not the id that FileReader uses to read data. For example, a Parquet column can be
+    // matched by its optional Parquet field_id, while the reader still addresses it by a file-local
+    // ordinal.
+    Field identifier;
+    // Reader-local id of this node inside the file schema returned by FileReader::get_schema().
+    // Top-level fields use the root column ordinal and nested fields use the child ordinal under
+    // their parent. -1 means unset; special virtual file columns may use other negative ids.
+    // Table/global ColumnDefinition values can leave this as -1 because they are not read directly
+    // by a FileReader.
+    int32_t local_id = -1;
+    // Logical table column name. This is also the matching name for by-name file formats.
+    std::string name;
+    // Historical or external names for the same logical field. Table formats such as Iceberg can
+    // use this to resolve partition path keys after column rename.
+    std::vector<std::string> name_mapping {};
+    DataTypePtr type;
+    // Semantic nested children for this schema node.
+    //
+    // Table/global columns carry projected table children. File-local schemas returned by
+    // FileReader::get_schema() also expose semantic children, not physical reader wrappers. For
+    // example, MAP children are key/value and ARRAY children contain only the element field.
+    std::vector<ColumnDefinition> children {};
+    // Expression used to materialize missing/default/generated values when the column is not read
+    // directly from the file.
+    VExprContextSPtr default_expr = nullptr;
+    // Partition columns are constants from split metadata and should not be matched against file
+    // schema unless table-format logic explicitly asks for it.
+    bool is_partition_key = false;
+    // File-local column kind. For table/global columns this remains DATA_COLUMN.
+    ColumnType column_type = ColumnType::DATA_COLUMN;
+
+    bool has_identifier() const { return !identifier.is_null(); }
+    bool has_identifier_field_id() const { return identifier.get_type() == TYPE_INT; }
+    bool has_identifier_name() const { return identifier.get_type() == TYPE_STRING; }
+
+    // DuckDB-style helper for BY_FIELD_ID matching. The mapper binds the matching mode once, so a
+    // TYPE_INT identifier is interpreted as a field id only by the field-id matcher.
+    int32_t get_identifier_field_id() const {
+        DORIS_CHECK(has_identifier_field_id());
+        return identifier.get<TYPE_INT>();
+    }
+    // DuckDB-style helper for BY_NAME matching. When no explicit string identifier is present, the
+    // logical column name is the identifier.
+    const std::string& get_identifier_name() const {
+        if (identifier.is_null()) {
+            return name;
+        }
+        DORIS_CHECK(has_identifier_name());
+        return identifier.get<TYPE_STRING>();
+    }
+    // Helper for BY_INDEX matching. BY_INDEX reuses the TYPE_INT identifier as the table-side file
+    // position, matching DuckDB's typed identifier plus mapper-mode interpretation.
+    int32_t get_identifier_position() const {
+        DORIS_CHECK(has_identifier_field_id());
+        return identifier.get<TYPE_INT>();
+    }
+
+    // Helper for reader-local projection and scan requests.
+    int32_t file_local_id() const {
+        if (local_id != -1) {
+            return local_id;
+        }
+        return get_identifier_field_id();
+    }
+
+    std::string debug_string() const;
+};
+
+static constexpr int ROW_POSITION_COLUMN_ID = -10001;
+static constexpr const char* ROW_POSITION_COLUMN_NAME = "__file_row_position";
+static constexpr int GLOBAL_ROWID_COLUMN_ID = -10002;
+
+inline ColumnDefinition row_position_column_definition() {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(ROW_POSITION_COLUMN_ID);
+    field.local_id = ROW_POSITION_COLUMN_ID;
+    field.name = ROW_POSITION_COLUMN_NAME;
+    field.type = std::make_shared<DataTypeInt64>();
+    field.column_type = ColumnType::ROW_NUMBER;
+    return field;
+}
+
+inline ColumnDefinition global_rowid_column_definition() {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_STRING>(BeConsts::GLOBAL_ROWID_COL);
+    field.local_id = GLOBAL_ROWID_COLUMN_ID;
+    field.name = BeConsts::GLOBAL_ROWID_COL;
+    field.type = std::make_shared<DataTypeString>();
+    field.column_type = ColumnType::GLOBAL_ROWID;
+    return field;
+}
+
+// Recursive file-local projection path.
+//
+// For a root entry in FileScanRequest::{predicate_columns, non_predicate_columns}, index is the
+// top-level file column id and column_id() is valid. For children, index is the file-local child id
+// under the parent node. This is the reader schema local id, not an Iceberg/Parquet field id, not a
+// table child id, and not a child output ordinal.
+//
+// project_all_children=true means the whole subtree under this node is needed. When false, children
+// lists the selected child paths. File readers can use this to avoid constructing readers for
+// unprojected nested children.
+struct LocalColumnIndex {
+    int32_t index = -1;
+    bool project_all_children = true;
+    std::vector<LocalColumnIndex> children {};
+
+    static LocalColumnIndex top_level(LocalColumnId column_id) {
+        return {.index = column_id.value()};
+    }
+
+    static LocalColumnIndex local(int32_t local_id) { return {.index = local_id}; }
+
+    static LocalColumnIndex partial_local(int32_t local_id) {
+        return {.index = local_id, .project_all_children = false};
+    }
+
+    LocalColumnId column_id() const { return LocalColumnId(index); }
+    int32_t local_id() const { return index; }
+    std::string debug_string() const;
+};
+
+inline bool is_full_projection(const LocalColumnIndex* projection) {
+    return projection == nullptr || projection->project_all_children;
+}
+
+inline bool is_partial_projection(const LocalColumnIndex* projection) {
+    return projection != nullptr && !projection->project_all_children;
+}
+
+inline const LocalColumnIndex* find_child_projection(const LocalColumnIndex* projection,
+                                                     int32_t local_id) {
+    if (is_full_projection(projection)) {
+        return nullptr;
+    }
+    const auto child_it = std::find_if(
+            projection->children.begin(), projection->children.end(),
+            [&](const LocalColumnIndex& child) { return child.local_id() == local_id; });
+    return child_it == projection->children.end() ? nullptr : &*child_it;
+}
+
+inline bool is_child_projected(const LocalColumnIndex* projection, int32_t local_id) {
+    return is_full_projection(projection) || find_child_projection(projection, local_id) != nullptr;
+}
+
+// Merge two projection trees that point to the same file-local node.
+//
+// A full projection dominates a partial projection. Two partial projections are merged by child id
+// and recursively union their child paths. The caller must only merge projections for the same
+// root/child node.
+inline Status merge_local_column_index(LocalColumnIndex* target, const LocalColumnIndex& source) {
+    DORIS_CHECK(target != nullptr);
+    DORIS_CHECK(target->index == source.index);
+    if (target->project_all_children) {
+        return Status::OK();
+    }
+    if (source.project_all_children) {
+        target->project_all_children = true;
+        target->children.clear();
+        return Status::OK();
+    }
+    for (const auto& source_child : source.children) {
+        auto target_child_it = std::find_if(
+                target->children.begin(), target->children.end(),
+                [&](const LocalColumnIndex& child) { return child.index == source_child.index; });
+        if (target_child_it == target->children.end()) {
+            target->children.push_back(source_child);
+            continue;
+        }
+        RETURN_IF_ERROR(merge_local_column_index(&*target_child_it, source_child));
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/column_mapper.cpp b/be/src/format_v2/column_mapper.cpp
new file mode 100644
index 00000000000000..e6a0e1a28e7422
--- /dev/null
+++ b/be/src/format_v2/column_mapper.cpp
@@ -0,0 +1,2029 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/column_mapper.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <sstream>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/data_type/convert_field_to_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/primitive_type.h"
+#include "exprs/runtime_filter_expr.h"
+#include "exprs/short_circuit_evaluation_expr.h"
+#include "exprs/vcase_expr.h"
+#include "exprs/vcast_expr.h"
+#include "exprs/vcondition_expr.h"
+#include "exprs/vectorized_fn_call.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vin_predicate.h"
+#include "exprs/vliteral.h"
+#include "format_v2/column_mapper_nested.h"
+#include "format_v2/expr/cast.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/schema_projection.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Exprs_types.h"
+
+namespace doris::format {
+
+namespace {
+
+std::string mapping_mode_to_string(TableColumnMappingMode mode) {
+    switch (mode) {
+    case TableColumnMappingMode::BY_FIELD_ID:
+        return "BY_FIELD_ID";
+    case TableColumnMappingMode::BY_NAME:
+        return "BY_NAME";
+    case TableColumnMappingMode::BY_INDEX:
+        return "BY_INDEX";
+    }
+    return "UNKNOWN";
+}
+
+bool column_has_name(const ColumnDefinition& column, const std::string& name) {
+    if (to_lower(column.name) == to_lower(name)) {
+        return true;
+    }
+    if (column.has_identifier_name() && to_lower(column.get_identifier_name()) == to_lower(name)) {
+        return true;
+    }
+    return std::ranges::any_of(column.name_mapping, [&](const std::string& alias) {
+        return to_lower(alias) == to_lower(name);
+    });
+}
+
+bool column_names_match(const ColumnDefinition& lhs, const ColumnDefinition& rhs) {
+    if (column_has_name(rhs, lhs.name)) {
+        return true;
+    }
+    if (lhs.has_identifier_name() && column_has_name(rhs, lhs.get_identifier_name())) {
+        return true;
+    }
+    return std::ranges::any_of(lhs.name_mapping, [&](const std::string& alias) {
+        return column_has_name(rhs, alias);
+    });
+}
+
+class ColumnMatcher {
+public:
+    virtual ~ColumnMatcher() = default;
+    virtual const ColumnDefinition* find(
+            const ColumnDefinition& table_column,
+            const std::vector<ColumnDefinition>& file_schema) const = 0;
+};
+
+class FieldIdMatcher final : public ColumnMatcher {
+public:
+    const ColumnDefinition* find(const ColumnDefinition& table_column,
+                                 const std::vector<ColumnDefinition>& file_schema) const override {
+        if (!table_column.has_identifier_field_id()) {
+            return nullptr;
+        }
+        const auto field_id = table_column.get_identifier_field_id();
+        const auto field_it = std::ranges::find_if(file_schema, [&](const ColumnDefinition& field) {
+            return field.has_identifier_field_id() && field.get_identifier_field_id() == field_id;
+        });
+        return field_it == file_schema.end() ? nullptr : &*field_it;
+    }
+};
+
+class NameMatcher final : public ColumnMatcher {
+public:
+    const ColumnDefinition* find(const ColumnDefinition& table_column,
+                                 const std::vector<ColumnDefinition>& file_schema) const override {
+        const auto field_it = std::ranges::find_if(file_schema, [&](const ColumnDefinition& field) {
+            return column_names_match(table_column, field);
+        });
+        return field_it == file_schema.end() ? nullptr : &*field_it;
+    }
+};
+
+class PositionMatcher final : public ColumnMatcher {
+public:
+    const ColumnDefinition* find(const ColumnDefinition& table_column,
+                                 const std::vector<ColumnDefinition>& file_schema) const override {
+        if (!table_column.has_identifier_field_id()) {
+            return nullptr;
+        }
+        const auto position = table_column.get_identifier_position();
+        if (position < 0 || static_cast<size_t>(position) >= file_schema.size()) {
+            return nullptr;
+        }
+        return &file_schema[static_cast<size_t>(position)];
+    }
+};
+
+const ColumnMatcher& matcher_for_mode(TableColumnMappingMode mode) {
+    static const FieldIdMatcher field_id_matcher;
+    static const NameMatcher name_matcher;
+    static const PositionMatcher position_matcher;
+    switch (mode) {
+    case TableColumnMappingMode::BY_FIELD_ID:
+        return field_id_matcher;
+    case TableColumnMappingMode::BY_NAME:
+        return name_matcher;
+    case TableColumnMappingMode::BY_INDEX:
+        return position_matcher;
+    }
+    return field_id_matcher;
+}
+
+std::string virtual_column_type_to_string(TableVirtualColumnType type) {
+    switch (type) {
+    case TableVirtualColumnType::INVALID:
+        return "INVALID";
+    case TableVirtualColumnType::ROW_ID:
+        return "ROW_ID";
+    case TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER:
+        return "LAST_UPDATED_SEQUENCE_NUMBER";
+    case TableVirtualColumnType::ICEBERG_ROWID:
+        return "ICEBERG_ROWID";
+    }
+    return "UNKNOWN";
+}
+
+std::string filter_conversion_type_to_string(FilterConversionType type) {
+    switch (type) {
+    case FilterConversionType::COPY_DIRECTLY:
+        return "COPY_DIRECTLY";
+    case FilterConversionType::CAST_FILTER:
+        return "CAST_FILTER";
+    case FilterConversionType::READER_EXPRESSION:
+        return "READER_EXPRESSION";
+    case FilterConversionType::FINALIZE_ONLY:
+        return "FINALIZE_ONLY";
+    case FilterConversionType::CONSTANT:
+        return "CONSTANT";
+    }
+    return "UNKNOWN";
+}
+
+std::string data_type_debug_string(const DataTypePtr& type) {
+    return type == nullptr ? "null" : type->get_name();
+}
+
+std::string field_debug_string(const Field& field) {
+    std::ostringstream out;
+    out << "Field{type=" << type_to_string(field.get_type()) << ", value=";
+    switch (field.get_type()) {
+    case TYPE_NULL:
+        out << "null";
+        break;
+    case TYPE_INT:
+        out << field.get<TYPE_INT>();
+        break;
+    case TYPE_BIGINT:
+        out << field.get<TYPE_BIGINT>();
+        break;
+    case TYPE_STRING:
+        out << field.get<TYPE_STRING>();
+        break;
+    default:
+        out << field.to_debug_string(0);
+        break;
+    }
+    out << "}";
+    return out.str();
+}
+
+template <typename T, typename Formatter>
+std::string join_debug_strings(const std::vector<T>& values, Formatter formatter) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t i = 0; i < values.size(); ++i) {
+        if (i > 0) {
+            out << ", ";
+        }
+        out << formatter(values[i]);
+    }
+    out << "]";
+    return out.str();
+}
+
+} // namespace
+
+const Field* find_partition_value(const ColumnDefinition& table_column,
+                                  const std::map<std::string, Field>& partition_values) {
+    const auto find_by_name = [&](const std::string& name) -> const Field* {
+        const auto value_it = partition_values.find(name);
+        return value_it == partition_values.end() ? nullptr : &value_it->second;
+    };
+    if (const auto* value = find_by_name(table_column.name); value != nullptr) {
+        return value;
+    }
+    if (table_column.has_identifier_name()) {
+        if (const auto* value = find_by_name(table_column.get_identifier_name());
+            value != nullptr) {
+            return value;
+        }
+    }
+    for (const auto& alias : table_column.name_mapping) {
+        if (const auto* value = find_by_name(alias); value != nullptr) {
+            return value;
+        }
+    }
+    return nullptr;
+}
+
+struct FileSlotRewriteInfo {
+    size_t block_position = 0;
+    DataTypePtr file_type;
+    DataTypePtr table_type;
+    std::string file_column_name;
+};
+
+struct RewriteContext {
+    RuntimeState* runtime_state = nullptr;
+    std::vector<VExprSPtr> created_exprs {};
+
+    void add_created_expr(VExprSPtr expr) { created_exprs.push_back(std::move(expr)); }
+
+    Status prepare_created_exprs(VExprContext* context) const {
+        DORIS_CHECK(context != nullptr);
+        RowDescriptor row_desc;
+        for (const auto& expr : created_exprs) {
+            if (dynamic_cast<const Cast*>(expr.get()) != nullptr && runtime_state == nullptr) {
+                return Status::InvalidArgument(
+                        "RuntimeState is required to prepare rewritten cast expression {}",
+                        expr->expr_name());
+            }
+            RETURN_IF_ERROR(expr->prepare(runtime_state, row_desc, context));
+        }
+        return Status::OK();
+    }
+};
+
+static VExprSPtr create_file_slot_ref(const VSlotRef& slot_ref,
+                                      const FileSlotRewriteInfo& rewrite_info,
+                                      RewriteContext* rewrite_context) {
+    auto ref =
+            VSlotRef::create_shared(slot_ref.slot_id(), cast_set<int>(rewrite_info.block_position),
+                                    -1, rewrite_info.file_type, rewrite_info.file_column_name);
+    rewrite_context->add_created_expr(ref);
+    return ref;
+}
+
+static bool is_cast_expr(const VExprSPtr& expr) {
+    return dynamic_cast<const Cast*>(expr.get()) != nullptr;
+}
+
+static bool is_binary_comparison_predicate(const VExprSPtr& expr) {
+    if (expr == nullptr || expr->get_num_children() != 2 ||
+        (expr->node_type() != TExprNodeType::BINARY_PRED &&
+         expr->node_type() != TExprNodeType::NULL_AWARE_BINARY_PRED)) {
+        return false;
+    }
+    switch (expr->op()) {
+    case TExprOpcode::EQ:
+    case TExprOpcode::EQ_FOR_NULL:
+    case TExprOpcode::NE:
+    case TExprOpcode::GE:
+    case TExprOpcode::GT:
+    case TExprOpcode::LE:
+    case TExprOpcode::LT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+std::string TableColumnMapperOptions::debug_string() const {
+    std::ostringstream out;
+    out << "TableColumnMapperOptions{mode=" << mapping_mode_to_string(mode) << "}";
+    return out.str();
+}
+
+std::string ColumnDefinition::debug_string() const {
+    std::ostringstream out;
+    out << "ColumnDefinition{name=" << name << ", identifier=" << field_debug_string(identifier)
+        << ", name_mapping="
+        << join_debug_strings(name_mapping, [](const std::string& name) { return name; })
+        << ", local_id=" << local_id << ", type=" << data_type_debug_string(type) << ", children="
+        << join_debug_strings(children,
+                              [](const ColumnDefinition& child) { return child.debug_string(); })
+        << ", has_default_expr=" << (default_expr != nullptr)
+        << ", is_partition_key=" << is_partition_key << "}";
+    return out.str();
+}
+
+std::string LocalColumnIndex::debug_string() const {
+    std::ostringstream out;
+    out << "LocalColumnIndex{index=" << index << ", project_all_children=" << project_all_children
+        << ", children="
+        << join_debug_strings(children,
+                              [](const LocalColumnIndex& child) { return child.debug_string(); })
+        << "}";
+    return out.str();
+}
+
+std::string ColumnMapping::debug_string() const {
+    std::ostringstream out;
+    out << "ColumnMapping{global_index=" << global_index
+        << ", table_column_name=" << table_column_name << ", file_local_id=";
+    if (file_local_id.has_value()) {
+        out << *file_local_id;
+    } else {
+        out << "null";
+    }
+    out << ", constant_index=";
+    if (constant_index.has_value()) {
+        out << *constant_index;
+    } else {
+        out << "null";
+    }
+    out << ", file_column_name=" << file_column_name
+        << ", original_file_type=" << data_type_debug_string(original_file_type)
+        << ", original_file_children="
+        << join_debug_strings(original_file_children,
+                              [](const ColumnDefinition& child) { return child.debug_string(); })
+        << ", file_type=" << data_type_debug_string(file_type)
+        << ", table_type=" << data_type_debug_string(table_type)
+        << ", has_projection=" << (projection != nullptr) << ", child_mappings="
+        << join_debug_strings(child_mappings,
+                              [](const ColumnMapping& child) { return child.debug_string(); })
+        << ", is_trivial=" << is_trivial << ", is_constant=" << constant_index.has_value()
+        << ", filter_conversion=" << filter_conversion_type_to_string(filter_conversion)
+        << ", virtual_column_type=" << virtual_column_type_to_string(virtual_column_type)
+        << ", has_default_expr=" << (default_expr != nullptr) << "}";
+    return out.str();
+}
+
+std::string TableColumnMapper::debug_string() const {
+    std::ostringstream out;
+    out << "TableColumnMapper{options=" << _options.debug_string() << ", mappings="
+        << join_debug_strings(_mappings,
+                              [](const ColumnMapping& mapping) { return mapping.debug_string(); })
+        << ", hidden_mappings="
+        << join_debug_strings(_hidden_mappings,
+                              [](const ColumnMapping& mapping) { return mapping.debug_string(); })
+        << ", constant_count=" << _constant_map.size() << "}";
+    return out.str();
+}
+
+static const FileSlotRewriteInfo* find_slot_rewrite_info(
+        const VExprSPtr& expr,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        const VSlotRef** slot_ref) {
+    if (expr == nullptr) {
+        return nullptr;
+    }
+    VExprSPtr slot_expr = expr;
+    const bool input_is_cast = is_cast_expr(expr) && expr->get_num_children() == 1;
+    if (is_cast_expr(expr) && expr->get_num_children() == 1) {
+        slot_expr = expr->children()[0];
+    }
+    if (!slot_expr->is_slot_ref()) {
+        return nullptr;
+    }
+    const auto* candidate_slot_ref = assert_cast<const VSlotRef*>(slot_expr.get());
+    const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*candidate_slot_ref));
+    if (rewrite_it == global_to_file_slot.end()) {
+        return nullptr;
+    }
+    if (input_is_cast && !expr->data_type()->equals(*rewrite_it->second.table_type)) {
+        return nullptr;
+    }
+    if (slot_ref != nullptr) {
+        *slot_ref = candidate_slot_ref;
+    }
+    return &rewrite_it->second;
+}
+
+static bool filter_conversion_has_local_source(FilterConversionType conversion) {
+    switch (conversion) {
+    case FilterConversionType::COPY_DIRECTLY:
+    case FilterConversionType::CAST_FILTER:
+    case FilterConversionType::READER_EXPRESSION:
+        return true;
+    case FilterConversionType::FINALIZE_ONLY:
+    case FilterConversionType::CONSTANT:
+        return false;
+    }
+    return false;
+}
+
+static bool column_predicate_can_use_local_source(FilterConversionType conversion) {
+    switch (conversion) {
+    case FilterConversionType::COPY_DIRECTLY:
+        return true;
+    case FilterConversionType::CAST_FILTER:
+    case FilterConversionType::READER_EXPRESSION:
+    case FilterConversionType::FINALIZE_ONLY:
+    case FilterConversionType::CONSTANT:
+        return false;
+    }
+    return false;
+}
+
+static bool table_filter_has_only_local_entries(
+        const TableFilter& table_filter, const std::map<GlobalIndex, FilterEntry>& filter_entries) {
+    for (const auto global_index : table_filter.global_indices) {
+        const auto entry_it = filter_entries.find(global_index);
+        if (entry_it == filter_entries.end() || !entry_it->second.is_local()) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static VExprSPtr unwrap_literal_for_file_cast(const VExprSPtr& expr,
+                                              const DataTypePtr& table_type) {
+    if (expr == nullptr) {
+        return nullptr;
+    }
+    if (expr->is_literal()) {
+        return expr;
+    }
+    if (is_cast_expr(expr) && expr->get_num_children() == 1 && expr->children()[0]->is_literal() &&
+        expr->children()[0]->data_type()->equals(*table_type)) {
+        return expr->children()[0];
+    }
+    return nullptr;
+}
+
+static Field literal_field_from_expr(const VExpr& literal_expr) {
+    DORIS_CHECK(literal_expr.is_literal());
+    const auto* literal = dynamic_cast<const VLiteral*>(&literal_expr);
+    DORIS_CHECK(literal != nullptr);
+    Field field;
+    literal->get_column_ptr()->get(0, field);
+    return field;
+}
+
+// Table filter localization clones an already-prepared table expr and then rewrites it to file
+// slots. Only split-local literals and BE cast nodes need table-reader-specific clone behavior;
+// plain slot refs and literals use their own VExpr::clone_node().
+static Status clone_table_expr_node(const VExpr& expr, VExprSPtr* cloned_expr) {
+    DORIS_CHECK(cloned_expr != nullptr);
+    if (const auto* split_literal = dynamic_cast<const SplitLocalFileLiteral*>(&expr)) {
+        *cloned_expr = std::make_shared<SplitLocalFileLiteral>(
+                split_literal->data_type(), literal_field_from_expr(expr),
+                split_literal->original_type(), split_literal->original_field());
+    } else if (const auto* vcast_expr = dynamic_cast<const VCastExpr*>(&expr);
+               vcast_expr != nullptr && vcast_expr->node_type() == TExprNodeType::CAST_EXPR) {
+        *cloned_expr = Cast::create_shared(vcast_expr->data_type());
+    }
+    return Status::OK();
+}
+
+Status clone_table_expr_tree(const VExprSPtr& expr, VExprSPtr* cloned_expr) {
+    DORIS_CHECK(cloned_expr != nullptr);
+    if (expr == nullptr) {
+        *cloned_expr = nullptr;
+        return Status::OK();
+    }
+    return expr->deep_clone(cloned_expr, clone_table_expr_node);
+}
+
+static VExprSPtr original_table_literal(const VExprSPtr& literal_expr,
+                                        RewriteContext* rewrite_context = nullptr) {
+    DORIS_CHECK(literal_expr != nullptr);
+    DORIS_CHECK(literal_expr->is_literal());
+    const auto* rewritten_literal = dynamic_cast<const SplitLocalFileLiteral*>(literal_expr.get());
+    if (rewritten_literal == nullptr) {
+        return literal_expr;
+    }
+    auto literal = VLiteral::create_shared(rewritten_literal->original_type(),
+                                           rewritten_literal->original_field());
+    if (rewrite_context != nullptr) {
+        rewrite_context->add_created_expr(literal);
+    }
+    return literal;
+}
+
+static ColumnDefinition hidden_column_from_slot_ref(const VSlotRef& slot_ref) {
+    ColumnDefinition column;
+    column.name = slot_ref.column_name();
+    column.identifier = Field::create_field<TYPE_STRING>(column.name);
+    column.type = slot_ref.data_type();
+    return column;
+}
+
+static void collect_top_level_slot_columns(const VExprSPtr& expr,
+                                           std::map<GlobalIndex, ColumnDefinition>* columns) {
+    DORIS_CHECK(columns != nullptr);
+    if (expr == nullptr) {
+        return;
+    }
+    if (expr->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(expr.get());
+        columns->try_emplace(slot_ref_global_index(*slot_ref),
+                             hidden_column_from_slot_ref(*slot_ref));
+        return;
+    }
+    for (const auto& child : expr->children()) {
+        collect_top_level_slot_columns(child, columns);
+    }
+}
+
+static VExprSPtr rewrite_literal_to_file_type(const VExprSPtr& literal_expr,
+                                              const FileSlotRewriteInfo& rewrite_info,
+                                              RewriteContext* rewrite_context) {
+    DORIS_CHECK(literal_expr != nullptr);
+    DORIS_CHECK(literal_expr->is_literal());
+    const auto original_literal = original_table_literal(literal_expr, rewrite_context);
+    const Field original_field = literal_field(original_literal);
+    if (rewrite_info.file_type->equals(*original_literal->data_type())) {
+        return original_literal;
+    }
+    Field file_field;
+    try {
+        convert_field_to_type(original_field, *rewrite_info.file_type, &file_field,
+                              original_literal->data_type().get());
+    } catch (const Exception&) {
+        return nullptr;
+    }
+    if (file_field.is_null()) {
+        return nullptr;
+    }
+    if (file_field.get_type() != remove_nullable(rewrite_info.file_type)->get_primitive_type()) {
+        return nullptr;
+    }
+    auto literal = std::make_shared<SplitLocalFileLiteral>(
+            rewrite_info.file_type, file_field, original_literal->data_type(), original_field);
+    rewrite_context->add_created_expr(literal);
+    return literal;
+}
+
+static bool rewrite_binary_slot_literal_predicate(
+        const VExprSPtr& expr,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        RewriteContext* rewrite_context) {
+    if (!is_binary_comparison_predicate(expr)) {
+        return false;
+    }
+    auto children = expr->children();
+    const VSlotRef* slot_ref = nullptr;
+    const FileSlotRewriteInfo* rewrite_info =
+            find_slot_rewrite_info(children[0], global_to_file_slot, &slot_ref);
+    int slot_child_idx = 0;
+    int literal_child_idx = 1;
+    if (rewrite_info == nullptr) {
+        rewrite_info = find_slot_rewrite_info(children[1], global_to_file_slot, &slot_ref);
+        slot_child_idx = 1;
+        literal_child_idx = 0;
+    }
+    if (rewrite_info == nullptr || slot_ref == nullptr) {
+        return false;
+    }
+    auto literal_expr =
+            unwrap_literal_for_file_cast(children[literal_child_idx], rewrite_info->table_type);
+    if (literal_expr == nullptr) {
+        return false;
+    }
+
+    auto rewritten_literal =
+            rewrite_literal_to_file_type(literal_expr, *rewrite_info, rewrite_context);
+    if (rewritten_literal == nullptr) {
+        children[literal_child_idx] = original_table_literal(literal_expr, rewrite_context);
+        expr->set_children(std::move(children));
+        return false;
+    }
+
+    children[slot_child_idx] = create_file_slot_ref(*slot_ref, *rewrite_info, rewrite_context);
+    children[literal_child_idx] = std::move(rewritten_literal);
+    expr->set_children(std::move(children));
+    return true;
+}
+
+static bool rewrite_in_slot_literal_predicate(
+        const VExprSPtr& expr,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        RewriteContext* rewrite_context) {
+    if (expr->node_type() != TExprNodeType::IN_PRED || expr->get_num_children() < 2) {
+        return false;
+    }
+    auto children = expr->children();
+    const VSlotRef* slot_ref = nullptr;
+    const FileSlotRewriteInfo* rewrite_info =
+            find_slot_rewrite_info(children[0], global_to_file_slot, &slot_ref);
+    if (rewrite_info == nullptr || slot_ref == nullptr) {
+        return false;
+    }
+
+    VExprSPtrs rewritten_literals;
+    rewritten_literals.reserve(children.size() - 1);
+    for (size_t child_idx = 1; child_idx < children.size(); ++child_idx) {
+        auto literal_expr =
+                unwrap_literal_for_file_cast(children[child_idx], rewrite_info->table_type);
+        if (literal_expr == nullptr) {
+            return false;
+        }
+        auto rewritten_literal =
+                rewrite_literal_to_file_type(literal_expr, *rewrite_info, rewrite_context);
+        if (rewritten_literal == nullptr) {
+            for (size_t restore_idx = 1; restore_idx < children.size(); ++restore_idx) {
+                auto restore_literal = unwrap_literal_for_file_cast(children[restore_idx],
+                                                                    rewrite_info->table_type);
+                if (restore_literal != nullptr) {
+                    children[restore_idx] =
+                            original_table_literal(restore_literal, rewrite_context);
+                }
+            }
+            expr->set_children(std::move(children));
+            return false;
+        }
+        rewritten_literals.push_back(std::move(rewritten_literal));
+    }
+
+    children[0] = create_file_slot_ref(*slot_ref, *rewrite_info, rewrite_context);
+    for (size_t literal_idx = 0; literal_idx < rewritten_literals.size(); ++literal_idx) {
+        children[literal_idx + 1] = std::move(rewritten_literals[literal_idx]);
+    }
+    expr->set_children(std::move(children));
+    return true;
+}
+
+static VExprSPtr create_file_struct_child_name_literal(const std::string& file_child_name,
+                                                       RewriteContext* rewrite_context) {
+    auto literal = VLiteral::create_shared(std::make_shared<DataTypeString>(),
+                                           Field::create_field<TYPE_STRING>(file_child_name));
+    rewrite_context->add_created_expr(literal);
+    return literal;
+}
+
+static bool needs_complex_file_slot_cast(const DataTypePtr& file_type,
+                                         const DataTypePtr& table_type) {
+    if (file_type == nullptr || table_type == nullptr || file_type->equals(*table_type)) {
+        return false;
+    }
+    const auto file_nested_type = remove_nullable(file_type);
+    const auto table_nested_type = remove_nullable(table_type);
+    if (file_nested_type->equals(*table_nested_type)) {
+        return false;
+    }
+    return is_complex_type(file_nested_type->get_primitive_type()) ||
+           is_complex_type(table_nested_type->get_primitive_type());
+}
+
+static bool collect_struct_element_chain(const VExprSPtr& expr, std::vector<VExprSPtr>* chain) {
+    DORIS_CHECK(chain != nullptr);
+    if (!is_struct_element_expr(expr)) {
+        return false;
+    }
+    const auto& parent = expr->children()[0];
+    if (is_struct_element_expr(parent)) {
+        if (!collect_struct_element_chain(parent, chain)) {
+            return false;
+        }
+    } else if (!parent->is_slot_ref()) {
+        // Only support file-local rewrite for struct child chains rooted directly at a top-level
+        // slot, for example `element_at(s, 'a')` or `element_at(element_at(s, 'a'), 'b')`.
+        //
+        // Do not localize computed complex parents such as
+        // `element_at(element_at(map_values(m), 1), 'full_name')`. The intermediate map/array
+        // result has already been reshaped by scan projection and may have a different child order
+        // from the table expression. Partially rewriting that expression against the file block can
+        // silently evaluate the wrong struct child and filter out valid rows. Those predicates must
+        // remain as table-level conjuncts and be evaluated after TableReader materialization.
+        return false;
+    }
+    chain->push_back(expr);
+    return true;
+}
+
+static bool rewrite_struct_element_path_to_file_expr(
+        const VExprSPtr& expr, const std::vector<ColumnMapping>& mappings,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        RewriteContext* rewrite_context) {
+    ResolvedNestedStructPath resolved;
+    if (!resolve_nested_struct_expr_for_file(expr, mappings, &resolved)) {
+        return false;
+    }
+
+    std::vector<VExprSPtr> struct_element_chain;
+    if (!collect_struct_element_chain(expr, &struct_element_chain) ||
+        struct_element_chain.size() != resolved.file_child_names.size() ||
+        struct_element_chain.size() != resolved.file_child_types.size()) {
+        return false;
+    }
+
+    auto root_children = struct_element_chain.front()->children();
+    if (!root_children[0]->is_slot_ref()) {
+        return false;
+    }
+    const auto* slot_ref = assert_cast<const VSlotRef*>(root_children[0].get());
+    const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref));
+    if (rewrite_it == global_to_file_slot.end()) {
+        return false;
+    }
+
+    // File-local conjuncts are prepared against the file-reader Block, so both the root slot and
+    // every struct selector must be expressed in file schema terms. For a renamed Iceberg field,
+    // keeping the table selector would prepare `element_at(file_struct<rename>, 'renamed')` and
+    // fail before any rows are read. Rewrite the whole chain while ColumnMapping still preserves
+    // the table-to-file relationship. Example:
+    //   table filter: element_at(element_at(s, 'renamed_parent'), 'renamed_leaf')
+    //   old file:     s<parent<leaf>>
+    //   file filter:  element_at(element_at(s, 'parent'), 'leaf')
+    root_children[0] = create_file_slot_ref(*slot_ref, rewrite_it->second, rewrite_context);
+    struct_element_chain.front()->set_children(std::move(root_children));
+    for (size_t idx = 0; idx < struct_element_chain.size(); ++idx) {
+        auto children = struct_element_chain[idx]->children();
+        children[1] = create_file_struct_child_name_literal(resolved.file_child_names[idx],
+                                                            rewrite_context);
+        struct_element_chain[idx]->set_children(std::move(children));
+        // The selector name and the expression return type must be moved to file schema together.
+        // Example:
+        //   table filter: element_at(element_at(s, 'new_a'), 'new_aa') = 50
+        //   old file:     s.new_a STRUCT<aa, bb>
+        //   file filter:  element_at(element_at(s, 'new_a'), 'aa') = 50
+        //
+        // If the inner element_at keeps the table return type STRUCT<new_aa, bb>, preparing the
+        // outer element_at(..., 'aa') fails before scanning because `aa` is not a table field.
+        struct_element_chain[idx]->data_type() = resolved.file_child_types[idx];
+    }
+    return true;
+}
+
+static VExprSPtr rewrite_table_expr_to_file_expr(
+        const VExprSPtr& expr,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        const std::vector<ColumnMapping>& filter_mappings, RewriteContext* rewrite_context,
+        bool* can_localize) {
+    if (expr == nullptr) {
+        return nullptr;
+    }
+    DORIS_CHECK(rewrite_context != nullptr);
+    DORIS_CHECK(can_localize != nullptr);
+    if (auto* runtime_filter = dynamic_cast<RuntimeFilterExpr*>(expr.get());
+        runtime_filter != nullptr) {
+        auto impl = runtime_filter->get_impl();
+        if (impl == nullptr) {
+            *can_localize = false;
+            return expr;
+        }
+        auto localized_impl = rewrite_table_expr_to_file_expr(
+                impl, global_to_file_slot, filter_mappings, rewrite_context, can_localize);
+        if (!*can_localize) {
+            return expr;
+        }
+        runtime_filter->set_impl(std::move(localized_impl));
+        return expr;
+    }
+    if (rewrite_binary_slot_literal_predicate(expr, global_to_file_slot, rewrite_context)) {
+        return expr;
+    }
+    if (rewrite_in_slot_literal_predicate(expr, global_to_file_slot, rewrite_context)) {
+        return expr;
+    }
+    if (is_struct_element_expr(expr)) {
+        if (!rewrite_struct_element_path_to_file_expr(expr, filter_mappings, global_to_file_slot,
+                                                      rewrite_context)) {
+            // The scanner still evaluates the original table-level conjunct after TableReader
+            // finalizes the output block. Skipping an unlocalizable file conjunct is therefore
+            // safer than preparing a partially rewritten expression against the wrong struct
+            // layout. In particular, do not generate file-local conjuncts for computed complex
+            // parents such as `element_at(element_at(map_values(m), 1), 'field')`; only direct
+            // slot-rooted struct chains are supported here.
+            *can_localize = false;
+        }
+        return expr;
+    }
+    if (expr->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(expr.get());
+        const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref));
+        if (rewrite_it != global_to_file_slot.end()) {
+            const auto& rewrite_info = rewrite_it->second;
+            auto file_slot = create_file_slot_ref(*slot_ref, rewrite_info, rewrite_context);
+            if (rewrite_info.file_type->equals(*rewrite_info.table_type)) {
+                return file_slot;
+            }
+            if (needs_complex_file_slot_cast(rewrite_info.file_type, rewrite_info.table_type)) {
+                // Generic file-local expressions cannot safely cast an evolved complex file slot
+                // back to the table type. Example:
+                //
+                //   table filter: ARRAY_CONTAINS(MAP_KEYS(m), 'person5')
+                //   old file:     m MAP<STRING, STRUCT<name, age>>
+                //   table:        m MAP<STRING, STRUCT<age, full_name, gender>>
+                //
+                // Although MAP_KEYS only reads the key column, wrapping the file slot as
+                // `CAST(file_m AS table_m)` forces the value struct cast first and fails because
+                // the old and new value structs have different fields. Keep such filters at the
+                // table level, where TableReader materializes the evolved complex value before
+                // Scanner evaluates the original conjunct. Direct slot-rooted struct child paths
+                // are handled by rewrite_struct_element_path_to_file_expr() above.
+                *can_localize = false;
+                return expr;
+            }
+            auto cast_expr = Cast::create_shared(rewrite_info.table_type);
+            cast_expr->add_child(std::move(file_slot));
+            rewrite_context->add_created_expr(cast_expr);
+            return cast_expr;
+        }
+        return expr;
+    }
+    // The input is a split-local cloned tree. A previous split-local clone may already have
+    // inserted Cast(slot). Keep that rewrite idempotent: rewrite the cast child from table slot to
+    // the current split's file slot, and drop the cast when the current split no longer needs it.
+    if (is_cast_expr(expr) && expr->get_num_children() == 1) {
+        const auto& child = expr->children()[0];
+        if (child->is_slot_ref()) {
+            const auto* slot_ref = assert_cast<const VSlotRef*>(child.get());
+            const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref));
+            if (rewrite_it != global_to_file_slot.end() &&
+                expr->data_type()->equals(*rewrite_it->second.table_type)) {
+                auto rewritten_child =
+                        create_file_slot_ref(*slot_ref, rewrite_it->second, rewrite_context);
+                if (rewrite_it->second.file_type->equals(*rewrite_it->second.table_type)) {
+                    return rewritten_child;
+                }
+                if (needs_complex_file_slot_cast(rewrite_it->second.file_type,
+                                                 rewrite_it->second.table_type)) {
+                    *can_localize = false;
+                    return expr;
+                }
+                expr->set_children({std::move(rewritten_child)});
+                return expr;
+            }
+        }
+    }
+
+    VExprSPtrs rewritten_children;
+    rewritten_children.reserve(expr->children().size());
+    for (const auto& child : expr->children()) {
+        rewritten_children.push_back(rewrite_table_expr_to_file_expr(
+                child, global_to_file_slot, filter_mappings, rewrite_context, can_localize));
+    }
+    expr->set_children(std::move(rewritten_children));
+    return expr;
+}
+
+static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id";
+static constexpr const char* ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER = "_last_updated_sequence_number";
+static constexpr int32_t ROW_LINEAGE_ROW_ID_FIELD_ID = 2147483540;
+static constexpr int32_t ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER_FIELD_ID = 2147483539;
+
+static TableVirtualColumnType row_lineage_virtual_column_type(const std::string& column_name) {
+    if (column_name == ROW_LINEAGE_ROW_ID) {
+        return TableVirtualColumnType::ROW_ID;
+    }
+    if (column_name == ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER) {
+        return TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER;
+    }
+    return TableVirtualColumnType::INVALID;
+}
+
+static TableVirtualColumnType row_lineage_virtual_column_type_by_field_id(
+        const ColumnDefinition& column) {
+    if (!column.has_identifier_field_id()) {
+        return TableVirtualColumnType::INVALID;
+    }
+    switch (column.get_identifier_field_id()) {
+    case ROW_LINEAGE_ROW_ID_FIELD_ID:
+        return TableVirtualColumnType::ROW_ID;
+    case ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER_FIELD_ID:
+        return TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER;
+    default:
+        return TableVirtualColumnType::INVALID;
+    }
+}
+
+static TableVirtualColumnType row_lineage_virtual_column_type(const ColumnDefinition& column,
+                                                              TableColumnMappingMode mode) {
+    switch (mode) {
+    case TableColumnMappingMode::BY_FIELD_ID:
+        return row_lineage_virtual_column_type_by_field_id(column);
+    case TableColumnMappingMode::BY_NAME:
+    case TableColumnMappingMode::BY_INDEX:
+        return row_lineage_virtual_column_type(column.name);
+    }
+    return TableVirtualColumnType::INVALID;
+}
+
+// Returns true when the current file type is not the exact nested type the scan should expose.
+// This is about building the projected file-side type/projection, not about whether TableReader
+// later needs to rematerialize the complex value back to table layout.
+static bool needs_projected_file_type_rebuild(const ColumnMapping& mapping) {
+    if (!is_complex_type(mapping.file_type->get_primitive_type())) {
+        return false;
+    }
+    if (mapping.child_mappings.empty()) {
+        return false;
+    }
+    DORIS_CHECK(mapping.file_type != nullptr);
+    DORIS_CHECK(mapping.table_type != nullptr);
+    if (remove_nullable(mapping.file_type)->get_primitive_type() !=
+        remove_nullable(mapping.table_type)->get_primitive_type()) {
+        return true;
+    }
+    if (!mapping.table_type->equals(*mapping.file_type)) {
+        return true;
+    }
+    for (const auto& child_mapping : mapping.child_mappings) {
+        // Rename-only child mappings do not change the file-side projected shape. If field-id
+        // matching maps table child `renamed_b` to file child `b`, the file reader can still expose
+        // the original file type as long as child count/order/types are unchanged.
+        if (!child_mapping.file_local_id.has_value() ||
+            needs_projected_file_type_rebuild(child_mapping)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static std::optional<size_t> file_child_ordinal_in_scan_type(const ColumnMapping& mapping,
+                                                             const ColumnMapping& child_mapping) {
+    if (!child_mapping.file_local_id.has_value()) {
+        return std::nullopt;
+    }
+    const auto& file_children = !mapping.projected_file_children.empty()
+                                        ? mapping.projected_file_children
+                                        : mapping.original_file_children;
+    const auto child_it = std::ranges::find_if(file_children, [&](const ColumnDefinition& child) {
+        return child.file_local_id() == *child_mapping.file_local_id;
+    });
+    if (child_it == file_children.end()) {
+        return std::nullopt;
+    }
+    return static_cast<size_t>(std::distance(file_children.begin(), child_it));
+}
+
+static bool needs_complex_rematerialize(const ColumnMapping& mapping) {
+    if (mapping.child_mappings.empty()) {
+        return false;
+    }
+    if (mapping.table_type == nullptr || mapping.file_type == nullptr ||
+        !mapping.table_type->equals(*mapping.file_type)) {
+        return true;
+    }
+    for (size_t table_child_idx = 0; table_child_idx < mapping.child_mappings.size();
+         ++table_child_idx) {
+        const auto& child_mapping = mapping.child_mappings[table_child_idx];
+        const auto file_child_idx = file_child_ordinal_in_scan_type(mapping, child_mapping);
+        if (!file_child_idx.has_value() || *file_child_idx != table_child_idx ||
+            needs_complex_rematerialize(child_mapping) ||
+            (child_mapping.table_type != nullptr && child_mapping.file_type != nullptr &&
+             !child_mapping.table_type->equals(*child_mapping.file_type))) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool mapping_can_use_file_column_directly(const ColumnMapping& mapping) {
+    if (mapping.table_type == nullptr || mapping.file_type == nullptr) {
+        return false;
+    }
+    const auto table_type = remove_nullable(mapping.table_type);
+    const auto file_type = remove_nullable(mapping.file_type);
+    const bool same_timestamptz_with_different_scale =
+            table_type->get_primitive_type() == TYPE_TIMESTAMPTZ &&
+            file_type->get_primitive_type() == TYPE_TIMESTAMPTZ;
+    if (!mapping.table_type->equals(*mapping.file_type) && !same_timestamptz_with_different_scale) {
+        return false;
+    }
+    return !needs_complex_rematerialize(mapping);
+}
+
+static const ColumnDefinition* find_file_child_for_mapping(const ColumnDefinition& table_child,
+                                                           const ColumnDefinition& file_parent,
+                                                           TableColumnMappingMode mode,
+                                                           size_t table_child_idx,
+                                                           bool allow_ordinal_fallback) {
+    const auto file_parent_type = remove_nullable(file_parent.type)->get_primitive_type();
+    switch (file_parent_type) {
+    case TYPE_ARRAY:
+        DORIS_CHECK(file_parent.children.size() == 1);
+        return &file_parent.children[0];
+    case TYPE_MAP:
+        DORIS_CHECK(file_parent.children.size() == 2);
+        if (table_child.name == "key") {
+            return &file_parent.children[0];
+        }
+        if (table_child.name == "value") {
+            return &file_parent.children[1];
+        }
+        if (table_child.local_id == 0 || table_child.local_id == 1) {
+            return &file_parent.children[table_child.local_id];
+        }
+        return nullptr;
+    default:
+        // Hive BY_INDEX is a top-level column matching rule. Once a complex root is selected by
+        // file position, nested struct children follow Hive reader's historical name matching
+        // semantics; their integer identifiers can be field ids, not file positions.
+        const auto nested_mode =
+                mode == TableColumnMappingMode::BY_INDEX ? TableColumnMappingMode::BY_NAME : mode;
+        if (const auto* file_child =
+                    matcher_for_mode(nested_mode).find(table_child, file_parent.children);
+            file_child != nullptr) {
+            return file_child;
+        }
+        if (allow_ordinal_fallback && mode == TableColumnMappingMode::BY_FIELD_ID &&
+            !table_child.has_identifier_field_id()) {
+            // Synthetic children are derived from the table DataType when nested ColumnDefinition
+            // metadata has been pruned away. They do not carry Iceberg field ids, so try a name
+            // match before falling back to ordinal order. Example:
+            //   table value type: Struct(age, full_name, gender)
+            //   old file value:   Struct(name, age)
+            // Name matching keeps `age -> age`; the later unused-child fallback can then map the
+            // renamed `full_name -> name` instead of consuming `age` twice.
+            if (const auto* file_child = NameMatcher().find(table_child, file_parent.children);
+                file_child != nullptr) {
+                return file_child;
+            }
+        }
+        // Some callers only carry the full complex DataType for a projected table column, without
+        // expanded nested ColumnDefinitions. In that case we can still preserve full materialization
+        // by walking table/file struct fields by ordinal. This is a fallback only: explicit
+        // ColumnDefinition children keep using the requested table-format matching rule, which is
+        // required for precise schema evolution.
+        if (allow_ordinal_fallback && table_child_idx < file_parent.children.size()) {
+            return &file_parent.children[table_child_idx];
+        }
+        return nullptr;
+    }
+}
+
+static ColumnDefinition synthetic_child_definition(const std::string& name, DataTypePtr type,
+                                                   int32_t local_id) {
+    ColumnDefinition child;
+    child.identifier = Field::create_field<TYPE_STRING>(name);
+    child.local_id = local_id;
+    child.name = name;
+    child.type = std::move(type);
+    return child;
+}
+
+static std::vector<ColumnDefinition> synthesize_complex_children_from_type(
+        const DataTypePtr& type) {
+    std::vector<ColumnDefinition> children;
+    if (type == nullptr) {
+        return children;
+    }
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        children.push_back(synthetic_child_definition("element", array_type->get_nested_type(), 0));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        children.push_back(synthetic_child_definition("key", map_type->get_key_type(), 0));
+        children.push_back(synthetic_child_definition("value", map_type->get_value_type(), 1));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        children.reserve(struct_type->get_elements().size());
+        for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) {
+            children.push_back(synthetic_child_definition(struct_type->get_element_name(idx),
+                                                          struct_type->get_element(idx),
+                                                          cast_set<int32_t>(idx)));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return children;
+}
+
+static bool has_table_child_named(const std::vector<ColumnDefinition>& children,
+                                  std::string_view name) {
+    return std::ranges::any_of(children, [&](const ColumnDefinition& child) {
+        return std::string_view(child.name) == name;
+    });
+}
+
+static void complete_required_complex_children_from_type(const DataTypePtr& type,
+                                                         std::vector<ColumnDefinition>* children) {
+    DORIS_CHECK(children != nullptr);
+    if (type == nullptr) {
+        return;
+    }
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        // MAP key/value are structural children, not independently materializable table fields.
+        // A key-only projection can still be attached to a whole-map output root, for example:
+        //   SELECT * FROM t WHERE ARRAY_CONTAINS(MAP_KEYS(new_map_column), 'person5')
+        //
+        // In that shape the scanner keeps the value stream readable, but the table projection can
+        // carry only the key child. Add the missing value child so recursive mapping can evolve the
+        // value type instead of letting TableReader cast old/new value structs directly.
+        if (has_table_child_named(*children, "key") && !has_table_child_named(*children, "value")) {
+            children->push_back(synthetic_child_definition("value", map_type->get_value_type(), 1));
+        }
+        break;
+    }
+    case TYPE_ARRAY:
+        // ARRAY has only one required structural child (`element`), so a non-empty projection is
+        // already rooted at the element path.
+        break;
+    case TYPE_STRUCT:
+        // STRUCT children are real fields and must remain prunable. Completing missing struct
+        // fields here would turn `SELECT s.a` into a full-struct read and undo nested projection.
+        break;
+    default:
+        break;
+    }
+}
+
+static Status validate_file_schema_children(const ColumnDefinition& file_field) {
+    if (file_field.type == nullptr) {
+        return Status::InternalError("File column '{}' has null type", file_field.name);
+    }
+    const auto nested_type = remove_nullable(file_field.type);
+    size_t expected_children = 0;
+    bool complex_with_fixed_children = true;
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY:
+        expected_children = 1;
+        break;
+    case TYPE_MAP:
+        expected_children = 2;
+        break;
+    case TYPE_STRUCT:
+        expected_children =
+                assert_cast<const DataTypeStruct*>(nested_type.get())->get_elements().size();
+        break;
+    default:
+        complex_with_fixed_children = false;
+        break;
+    }
+    if (!complex_with_fixed_children || file_field.children.size() == expected_children) {
+        return Status::OK();
+    }
+    return Status::InternalError(
+            "Malformed complex file schema for column '{}': type={}, expected_children={}, "
+            "actual_children={}",
+            file_field.name, file_field.type->get_name(), expected_children,
+            file_field.children.size());
+}
+
+static bool has_projected_file_children(const ColumnMapping& mapping) {
+    if (mapping.original_file_children.empty() || mapping.projected_file_children.empty()) {
+        return false;
+    }
+    if (mapping.original_file_children.size() != mapping.projected_file_children.size()) {
+        return true;
+    }
+    for (size_t idx = 0; idx < mapping.original_file_children.size(); ++idx) {
+        if (mapping.original_file_children[idx].file_local_id() !=
+            mapping.projected_file_children[idx].file_local_id()) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool needs_nested_file_projection(const ColumnMapping& mapping) {
+    if (has_projected_file_children(mapping)) {
+        // Return True if the projected child column is missing / re-ordered
+        return true;
+    }
+    return std::ranges::any_of(mapping.child_mappings, [](const ColumnMapping& child_mapping) {
+        return needs_nested_file_projection(child_mapping);
+    });
+}
+
+static Status build_complex_projection(const ColumnMapping& mapping, LocalColumnIndex* projection);
+
+// Build the projected file children/type according to the pruned complex projection. For example,
+// if we have a struct column `s` with children `id` and `name`, and the projection only keeps
+// `s.name`, then the file reader should expose `STRUCT<name ...>`.
+static Status rebuild_projected_file_children_and_type(
+        const DataTypePtr& file_type, const std::vector<ColumnDefinition>& original_file_children,
+        const std::vector<ColumnMapping>& child_mappings,
+        std::vector<ColumnDefinition>* projected_file_children, DataTypePtr* projected_type) {
+    DORIS_CHECK(file_type != nullptr);
+    DORIS_CHECK(projected_file_children != nullptr);
+    DORIS_CHECK(projected_type != nullptr);
+    ColumnDefinition field;
+    field.type = file_type;
+    field.children = original_file_children;
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(-1);
+    projection.children.reserve(child_mappings.size());
+    for (const auto* child_mapping : present_child_mappings_in_file_order(child_mappings)) {
+        DORIS_CHECK(child_mapping->file_local_id.has_value());
+        LocalColumnIndex child_projection;
+        RETURN_IF_ERROR(build_complex_projection(*child_mapping, &child_projection));
+        projection.children.push_back(std::move(child_projection));
+    }
+
+    ColumnDefinition projected_field;
+    RETURN_IF_ERROR(project_column_definition(field, projection, &projected_field));
+    *projected_file_children = std::move(projected_field.children);
+    *projected_type = std::move(projected_field.type);
+    return Status::OK();
+}
+
+// Build the complex column projection according to the ColumnMapping which is re-ordered by the
+// file-schema's order.
+//
+// For MAP, a partial projection represents value-subtree pruning only. The key child is not a
+// projected output shape; file readers still read full keys to construct ColumnMap offsets and keep
+// key semantics unchanged. If a caller tries to project only/prune the key child, the common schema
+// projection helper rejects it.
+static Status build_complex_projection(const ColumnMapping& mapping, LocalColumnIndex* projection) {
+    if (projection == nullptr) {
+        return Status::InvalidArgument("projection is null");
+    }
+    DORIS_CHECK(mapping.file_local_id.has_value());
+    *projection = LocalColumnIndex::local(*mapping.file_local_id);
+    projection->project_all_children = mapping.child_mappings.empty();
+    projection->children.clear();
+    const auto present_children = present_child_mappings_in_file_order(mapping.child_mappings);
+    if (!projection->project_all_children && present_children.empty()) {
+        // All requested table children under this complex node are missing/default-only. The file
+        // reader cannot expose an empty complex projection, but TableReader can still rematerialize
+        // the table shape from a full file subtree and fill the missing children with defaults.
+        projection->project_all_children = true;
+        return Status::OK();
+    }
+    for (const auto* child_mapping : present_children) {
+        LocalColumnIndex child_projection;
+        RETURN_IF_ERROR(build_complex_projection(*child_mapping, &child_projection));
+        projection->children.push_back(std::move(child_projection));
+    }
+    if (!projection->project_all_children && projection->children.empty()) {
+        return Status::NotSupported("Projection for complex column {} contains no file children",
+                                    mapping.file_column_name);
+    }
+    return Status::OK();
+}
+
+using FilterProjectionMap = std::map<LocalColumnId, LocalColumnIndex>;
+
+// Update the mapping's file type according to the projection, and determine whether the projection
+// is trivial (i.e. the projected file type is the same as the table type, so no need to
+// rematerialize the complex value back to table layout after reading from file).
+static Status apply_projection_to_mapping_file_type(const LocalColumnIndex& projection,
+                                                    ColumnMapping* mapping) {
+    DORIS_CHECK(mapping != nullptr);
+    if (mapping->original_file_type == nullptr) {
+        mapping->original_file_type = mapping->file_type;
+    }
+    if (mapping->original_file_type == nullptr ||
+        !is_complex_type(remove_nullable(mapping->original_file_type)->get_primitive_type())) {
+        return Status::OK();
+    }
+    ColumnDefinition field;
+    field.type = mapping->original_file_type;
+    field.children = mapping->original_file_children;
+    ColumnDefinition projected_field;
+    RETURN_IF_ERROR(project_column_definition(field, projection, &projected_field));
+    mapping->file_type = std::move(projected_field.type);
+    mapping->projected_file_children = std::move(projected_field.children);
+    mapping->is_trivial = mapping_can_use_file_column_directly(*mapping);
+    return Status::OK();
+}
+
+static Status merge_filter_projection(const FilterProjectionMap* filter_projections,
+                                      LocalColumnIndex* projection) {
+    DORIS_CHECK(projection != nullptr);
+    if (filter_projections == nullptr) {
+        return Status::OK();
+    }
+    const auto filter_projection_it = filter_projections->find(projection->column_id());
+    if (filter_projection_it == filter_projections->end()) {
+        return Status::OK();
+    }
+    // Merge predicate-only nested paths into the root projection that is about to be scanned.
+    // Example: `SELECT s.a WHERE s.b > 1` first builds the output projection `s -> a` from
+    // ColumnMapping, while build_nested_struct_filter_projection_map() records `s -> b`. This merge
+    // produces one file scan projection `s -> a,b`.
+    RETURN_IF_ERROR(merge_local_column_index(projection, filter_projection_it->second));
+    return Status::OK();
+}
+
+static bool table_root_is_map(const ColumnMapping& mapping) {
+    if (mapping.table_type == nullptr) {
+        return false;
+    }
+    return remove_nullable(mapping.table_type)->get_primitive_type() == TYPE_MAP;
+}
+
+static Status add_scan_column(FileScanRequest* file_request, ColumnMapping* mapping,
+                              bool is_predicate_column, bool force_full_complex_scan_projection,
+                              const FilterProjectionMap* filter_projections = nullptr) {
+    const auto file_column_id = LocalColumnId(mapping->file_local_id.value());
+    LocalColumnIndex projection = LocalColumnIndex::top_level(file_column_id);
+    // Columnar readers can turn a complex mapping into a nested file projection, but
+    // row-oriented readers must scan the full top-level complex field because all children are
+    // encoded in the same text cell.
+    if (!force_full_complex_scan_projection && needs_nested_file_projection(*mapping)) {
+        RETURN_IF_ERROR(build_complex_projection(*mapping, &projection));
+    }
+    if (is_predicate_column && !force_full_complex_scan_projection) {
+        DCHECK(filter_projections != nullptr);
+        // If a projected complex root is also used by a predicate, rebuild the predicate scan
+        // projection from the output mapping before merging predicate-only children. For
+        // `SELECT s.a WHERE s.b > 1`, build_complex_projection() produces `s -> a` and
+        // merge_filter_projection() adds `s -> b`, so the predicate column reads both children.
+        RETURN_IF_ERROR(merge_filter_projection(filter_projections, &projection));
+    }
+    FileScanRequestBuilder builder(file_request);
+    if (is_predicate_column) {
+        return builder.add_predicate_column(std::move(projection));
+    }
+    return builder.add_non_predicate_column(std::move(projection));
+}
+
+static const LocalColumnIndex* find_scan_projection(
+        const std::vector<LocalColumnIndex>& scan_columns, LocalColumnId file_column_id) {
+    const auto projection_it =
+            std::ranges::find_if(scan_columns, [&](const LocalColumnIndex& projection) {
+                return projection.column_id() == file_column_id;
+            });
+    return projection_it == scan_columns.end() ? nullptr : &*projection_it;
+}
+
+// Apply the final scan projection of one root file column back to its ColumnMapping. This updates
+// mapping.file_type/projected_file_children from the original file schema to the exact shape that
+// FileReader will return.
+//
+// Example: for `SELECT s.a WHERE s.b > 1`, add_scan_column() keeps only one predicate scan
+// projection `s -> a,b`. Applying that projection changes the mapping's file type from the full
+// file struct `s<a,b,c>` to the projected file struct `s<a,b>`, so later filter rewrite and
+// TableReader final materialization use the same column shape as the file-local block.
+static Status apply_scan_projection_to_mapping_file_type(const FileScanRequest& file_request,
+                                                         ColumnMapping* mapping) {
+    DORIS_CHECK(mapping != nullptr);
+    DORIS_CHECK(mapping->file_local_id.has_value());
+    const auto file_column_id = LocalColumnId(*mapping->file_local_id);
+    // Predicate columns are the actual scan projection when a column is used by row-level filters:
+    // add_scan_column() removes the duplicate non-predicate projection in that case.
+    const auto* projection = find_scan_projection(file_request.predicate_columns, file_column_id);
+    if (projection == nullptr) {
+        projection = find_scan_projection(file_request.non_predicate_columns, file_column_id);
+    }
+    DORIS_CHECK(projection != nullptr);
+    return apply_projection_to_mapping_file_type(*projection, mapping);
+}
+
+// Build extra scan projections required only by row-level filters on nested struct children.
+//
+// Example: for `SELECT s.a FROM t WHERE s.b.c > 1`, the output projection may only contain `s.a`,
+// but the file reader must also read `s.b.c` to evaluate the predicate. This function collects the
+// table-side filter path, resolves it through ColumnMapping first, and records the corresponding
+// file-side projection in filter_projections. This keeps renamed fields consistent across the scan
+// projection, row-level conjunct rewrite, and nested predicate pruning. Example:
+//   table filter path: s -> renamed_b -> c
+//   old file path:     s -> b -> c
+//   recorded path:     s -> b -> c
+// When add_scan_column() adds the same root as a predicate column, it rebuilds that root from the
+// output mapping, merges this filter-only projection into it, and removes the duplicate
+// non-predicate root entry.
+static Status build_nested_struct_filter_projection_map(
+        const std::vector<TableFilter>& table_filters, const std::vector<ColumnMapping>& mappings,
+        FilterProjectionMap* filter_projections) {
+    DORIS_CHECK(filter_projections != nullptr);
+    filter_projections->clear();
+    for (const auto& table_filter : table_filters) {
+        if (table_filter.conjunct == nullptr) {
+            continue;
+        }
+        // Collect all nested struct paths in the table filter. For example, for
+        // `s.id > 5 AND element_at(s, 'renamed_name') = 'abc'`, collect the table paths
+        // `s -> id` and `s -> renamed_name`, then resolve each one to its file-side projection.
+        std::vector<NestedStructPath> paths;
+        collect_nested_struct_paths(table_filter.conjunct->root(), &paths);
+        for (const auto& path : paths) {
+            auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) {
+                return mapping.global_index == path.root_global_index;
+            });
+            if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value() ||
+                path.selectors.empty()) {
+                continue;
+            }
+
+            ResolvedNestedStructPath resolved;
+            LocalColumnIndex root_projection;
+            if (!resolve_nested_struct_path_for_file(path, mappings, &resolved)) {
+                if (!table_root_is_map(*mapping_it)) {
+                    continue;
+                }
+                // Direct map value filters such as `m.value.a > 1` need the value leaf for row
+                // evaluation even when the query only projects another value child. This is only a
+                // scan projection fallback; complex map/array expressions are still not rewritten
+                // into file-local conjuncts.
+                LocalColumnIndex child_projection;
+                RETURN_IF_ERROR(build_file_child_projection_from_schema(
+                        mapping_it->original_file_children, path.selectors, &child_projection));
+                if (child_projection.local_id() < 0) {
+                    continue;
+                }
+                root_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id);
+                root_projection.children.push_back(std::move(child_projection));
+            } else {
+                root_projection = std::move(resolved.file_projection);
+            }
+            auto filter_projection_it = filter_projections->find(root_projection.column_id());
+            if (filter_projection_it == filter_projections->end()) {
+                filter_projections->emplace(root_projection.column_id(),
+                                            std::move(root_projection));
+                continue;
+            }
+            RETURN_IF_ERROR(
+                    merge_local_column_index(&filter_projection_it->second, root_projection));
+        }
+    }
+    return Status::OK();
+}
+
+static void rebuild_projection(ColumnMapping* mapping, LocalIndex block_position) {
+    DORIS_CHECK(mapping->file_local_id.has_value());
+    if (mapping->is_trivial || needs_complex_rematerialize(*mapping)) {
+        mapping->projection = VExprContext::create_shared(VSlotRef::create_shared(
+                cast_set<int>(block_position.value()), cast_set<int>(block_position.value()), -1,
+                mapping->file_type, mapping->file_column_name));
+        return;
+    }
+
+    auto expr = Cast::create_shared(mapping->table_type);
+    expr->add_child(VSlotRef::create_shared(cast_set<int>(block_position.value()),
+                                            cast_set<int>(block_position.value()), -1,
+                                            mapping->file_type, mapping->file_column_name));
+    mapping->projection = VExprContext::create_shared(expr);
+}
+
+// Build file slot rewrite info from the localized filter targets. Only local targets can enter
+// file-reader expressions; constant and unset targets stay above the file reader.
+static std::map<GlobalIndex, FileSlotRewriteInfo> build_file_slot_rewrite_map(
+        const std::vector<ColumnMapping>& mappings,
+        const std::map<GlobalIndex, FilterEntry>& filter_entries) {
+    std::map<GlobalIndex, FileSlotRewriteInfo> global_to_file_slot;
+    for (const auto& mapping : mappings) {
+        const auto entry_it = filter_entries.find(mapping.global_index);
+        if (entry_it == filter_entries.end() || !entry_it->second.is_local()) {
+            continue;
+        }
+        DORIS_CHECK(mapping.file_local_id.has_value());
+        global_to_file_slot.emplace(
+                mapping.global_index,
+                FileSlotRewriteInfo {.block_position = entry_it->second.local_index().value(),
+                                     .file_type = mapping.file_type,
+                                     .table_type = mapping.table_type,
+                                     .file_column_name = mapping.file_column_name});
+    }
+    return global_to_file_slot;
+}
+
+Status TableColumnMapper::_create_by_index_mapping(const ColumnDefinition& table_column,
+                                                   const std::vector<ColumnDefinition>& file_schema,
+                                                   ColumnMapping* mapping) {
+    DORIS_CHECK(mapping != nullptr);
+    DORIS_CHECK(!table_column.is_partition_key);
+
+    // Key contract: in BY_INDEX mode, `ColumnDefinition::identifier` TYPE_INT is interpreted as the
+    // 0-based position of this column inside `file_schema`. FE writes the physical file position
+    // of each non-partition projected column into that identifier. This interpretation allows:
+    //   - sparse projection: read only a subset of file columns (for example only `_col2`
+    //     and `_col4`);
+    //   - column reordering: table column order differs from file column order;
+    //   - no many-to-one mapping: FE must guarantee that each file position is referenced by at
+    //     most one table column.
+    const auto file_index = table_column.get_identifier_position();
+
+    // Case A: file_index is in range, so build a direct positional mapping.
+    // The file column name (for example `_col0`) is intentionally ignored here.
+    if (file_index >= 0 && static_cast<size_t>(file_index) < file_schema.size()) {
+        return _create_direct_mapping(table_column, file_schema[static_cast<size_t>(file_index)],
+                                      mapping);
+    }
+
+    // Case B: file_index is out of range, which means the file does not contain this column.
+    // Route it through the missing-column path used by schema evolution.
+    if (table_column.default_expr != nullptr) {
+        _set_constant_mapping(mapping, table_column.default_expr);
+        return Status::OK();
+    }
+    // Keep the mapping empty (`file_local_id` remains `nullopt`) and let the upper finalize
+    // stage fill NULL/default values.
+    return Status::OK();
+}
+
+void TableColumnMapper::_set_constant_mapping(ColumnMapping* mapping, VExprContextSPtr expr) {
+    DORIS_CHECK(mapping != nullptr);
+    DORIS_CHECK(expr != nullptr);
+    mapping->default_expr = std::move(expr);
+    mapping->constant_index = _constant_map.add(ConstantEntry {
+            .global_index = mapping->global_index,
+            .expr = mapping->default_expr,
+            .type = mapping->table_type,
+    });
+    mapping->filter_conversion = FilterConversionType::CONSTANT;
+}
+
+Status TableColumnMapper::_create_mapping_for_column(const ColumnDefinition& table_column,
+                                                     GlobalIndex global_index,
+                                                     ColumnMapping* mapping) {
+    DORIS_CHECK(mapping != nullptr);
+    *mapping = ColumnMapping {};
+    mapping->global_index = global_index;
+    mapping->table_column_name = table_column.name;
+    mapping->table_type = table_column.type;
+    const auto row_lineage_type = row_lineage_virtual_column_type(table_column, _options.mode);
+    if (const auto* partition_value = find_partition_value(table_column, _partition_values);
+        table_column.is_partition_key && partition_value != nullptr) {
+        // Partition values are split constants and must take precedence over defaults.
+        _set_constant_mapping(mapping, VExprContext::create_shared(VLiteral::create_shared(
+                                               mapping->table_type, *partition_value)));
+    } else if (_options.mode == TableColumnMappingMode::BY_INDEX &&
+               !table_column.is_partition_key && table_column.has_identifier_field_id()) {
+        // BY_INDEX interprets ColumnDefinition::identifier as physical file position.
+        RETURN_IF_ERROR(_create_by_index_mapping(table_column, _file_schema, mapping));
+    } else if (const auto* file_field = _find_file_field(table_column, _file_schema)) {
+        // Normal physical file column mapping.
+        RETURN_IF_ERROR(_create_direct_mapping(table_column, *file_field, mapping));
+        if (row_lineage_type != TableVirtualColumnType::INVALID) {
+            // Iceberg v3 rewritten files may physically contain row lineage metadata fields.
+            // File non-null values must be preserved, while file NULLs still inherit from data file
+            // metadata in IcebergTableReader. Therefore the mapping has a real file source plus a
+            // virtual post-materialization step, and filters must wait for finalize output.
+            mapping->virtual_column_type = row_lineage_type;
+            mapping->filter_conversion = FilterConversionType::FINALIZE_ONLY;
+        }
+    } else if (row_lineage_type != TableVirtualColumnType::INVALID) {
+        // Iceberg row lineage metadata fields are optional in data files. Missing fields are exposed
+        // as all-NULL table columns first; IcebergTableReader fills inherited values only when the
+        // split carries first_row_id / last_updated_sequence_number metadata.
+        // FE may attach a default_expr to these hidden metadata columns, but the Iceberg v3
+        // inheritance rule must take precedence over the generic missing-column default path.
+        mapping->virtual_column_type = row_lineage_type;
+    } else if (table_column.name == BeConsts::ICEBERG_ROWID_COL) {
+        // Doris internal Iceberg row locator is never a physical Iceberg data column. It is built
+        // from file path, row position and partition metadata for delete/update/merge.
+        mapping->virtual_column_type = TableVirtualColumnType::ICEBERG_ROWID;
+    } else if (table_column.default_expr != nullptr) {
+        // Missing schema-evolution column with an explicit default expression.
+        _set_constant_mapping(mapping, table_column.default_expr);
+    } else {
+        if (table_column.is_partition_key) {
+            return Status::InvalidArgument(
+                    "Table column '{}' (global_index={}) does not have a matching partition value",
+                    table_column.name, mapping->global_index.value());
+        }
+    }
+    return Status::OK();
+}
+
+Status TableColumnMapper::_create_hidden_filter_mapping(const ColumnDefinition& table_column,
+                                                        GlobalIndex global_index,
+                                                        ColumnMapping* mapping) {
+    auto status = _create_mapping_for_column(table_column, global_index, mapping);
+    if (mapping->file_local_id.has_value() || mapping->constant_index.has_value() ||
+        mapping->virtual_column_type != TableVirtualColumnType::INVALID) {
+        return Status::OK();
+    }
+    if (_options.mode == TableColumnMappingMode::BY_NAME) {
+        return status;
+    }
+
+    // Predicate-only slot refs carry the table name/type but do not carry the table-format field
+    // id used by BY_FIELD_ID or the file position used by BY_INDEX. Use a name fallback only for
+    // hidden filter localization; projected columns still obey the requested mapping mode.
+    const auto* file_field =
+            matcher_for_mode(TableColumnMappingMode::BY_NAME).find(table_column, _file_schema);
+    if (file_field == nullptr) {
+        return status;
+    }
+    ColumnMapping fallback_mapping;
+    fallback_mapping.global_index = global_index;
+    fallback_mapping.table_column_name = table_column.name;
+    fallback_mapping.table_type = table_column.type;
+    RETURN_IF_ERROR(_create_direct_mapping(table_column, *file_field, &fallback_mapping));
+    *mapping = std::move(fallback_mapping);
+    return Status::OK();
+}
+
+Status TableColumnMapper::_build_hidden_filter_mappings(
+        const std::vector<TableFilter>& table_filters) {
+    _hidden_mappings.clear();
+
+    std::map<GlobalIndex, ColumnDefinition> filter_columns;
+    for (const auto& table_filter : table_filters) {
+        if (table_filter.conjunct != nullptr) {
+            collect_top_level_slot_columns(table_filter.conjunct->root(), &filter_columns);
+        }
+    }
+
+    // TableColumnPredicates only carry GlobalIndex and predicate objects. They do not provide the
+    // top-level column name/type needed to build a hidden mapping, so a predicate-only column can
+    // be hidden-mapped only when the same root slot also appears in a conjunct.
+    for (const auto& [global_index, table_column] : filter_columns) {
+        if (_find_mapping(global_index) != nullptr) {
+            // Ignore columns that are already mapped by the projected columns
+            continue;
+        }
+        ColumnMapping mapping;
+        RETURN_IF_ERROR(_create_hidden_filter_mapping(table_column, global_index, &mapping));
+        if (mapping.file_local_id.has_value() || mapping.constant_index.has_value() ||
+            mapping.virtual_column_type != TableVirtualColumnType::INVALID) {
+            _hidden_mappings.push_back(std::move(mapping));
+        }
+    }
+    return Status::OK();
+}
+
+Status TableColumnMapper::create_mapping(const std::vector<ColumnDefinition>& projected_columns,
+                                         const std::map<std::string, Field>& partition_values,
+                                         const std::vector<ColumnDefinition>& file_schema) {
+    clear();
+    _partition_values = partition_values;
+    _file_schema = file_schema;
+    for (size_t column_idx = 0; column_idx < projected_columns.size(); ++column_idx) {
+        ColumnMapping mapping;
+        RETURN_IF_ERROR(_create_mapping_for_column(projected_columns[column_idx],
+                                                   GlobalIndex(column_idx), &mapping));
+        _mappings.push_back(std::move(mapping));
+    }
+    return Status::OK();
+}
+
+std::vector<ColumnMapping> TableColumnMapper::_filter_visible_mappings() const {
+    std::vector<ColumnMapping> mappings;
+    mappings.reserve(_mappings.size() + _hidden_mappings.size());
+    mappings.insert(mappings.end(), _mappings.begin(), _mappings.end());
+    mappings.insert(mappings.end(), _hidden_mappings.begin(), _hidden_mappings.end());
+    return mappings;
+}
+
+Status TableColumnMapper::_build_filter_entries(const FileScanRequest& file_request) {
+    _filter_entries.clear();
+    const auto mappings = _filter_visible_mappings();
+    for (const auto& mapping : mappings) {
+        FilterEntry entry;
+        if (mapping.constant_index.has_value()) {
+            entry = FilterEntry::constant(*mapping.constant_index);
+        } else if (mapping.file_local_id.has_value() &&
+                   filter_conversion_has_local_source(mapping.filter_conversion)) {
+            const auto local_position_it =
+                    file_request.local_positions.find(LocalColumnId(*mapping.file_local_id));
+            if (local_position_it != file_request.local_positions.end()) {
+                entry = FilterEntry::local(local_position_it->second);
+            }
+        }
+        _filter_entries.emplace(mapping.global_index, entry);
+    }
+    return Status::OK();
+}
+
+Status TableColumnMapper::create_scan_request(
+        const std::vector<TableFilter>& table_filters,
+        const TableColumnPredicates& table_column_predicates,
+        const std::vector<ColumnDefinition>& projected_columns, FileScanRequest* file_request,
+        RuntimeState* runtime_state) {
+    // FileReader evaluates expressions against a file-local block. This mapper owns the
+    // table-column to file-column conversion, so it also owns the file-local block positions.
+    file_request->predicate_columns.clear();
+    file_request->non_predicate_columns.clear();
+    file_request->local_positions.clear();
+    file_request->conjuncts.clear();
+    file_request->delete_conjuncts.clear();
+    file_request->column_predicate_filters.clear();
+    _filter_entries.clear();
+    // 1. Build referenced non-predicate columns
+    for (size_t column_idx = 0; column_idx < projected_columns.size(); ++column_idx) {
+        const auto global_index = GlobalIndex(column_idx);
+        auto* mapping = _find_mapping(global_index);
+        if (mapping != nullptr && mapping->file_local_id.has_value()) {
+            // A file column can be read lazily as a non-predicate column only when it is not used
+            // by row-level expression filters. Single-column ColumnPredicate filters are pruning
+            // hints only and must not force row-level predicate materialization.
+            bool used_by_filter = false;
+            for (const auto& table_filter : table_filters) {
+                const auto& global_indices = table_filter.global_indices;
+                if (std::find(global_indices.begin(), global_indices.end(), global_index) !=
+                            global_indices.end() &&
+                    filter_conversion_has_local_source(mapping->filter_conversion)) {
+                    used_by_filter = true;
+                    break;
+                }
+            }
+            if (!used_by_filter || !enable_lazy_materialization()) {
+                RETURN_IF_ERROR(add_scan_column(file_request, mapping, false,
+                                                force_full_complex_scan_projection()));
+            }
+        }
+    }
+    // 2. Build referenced predicate columns
+    // Hidden filter mappings must be built before localizing filters, so that they can be localized together with visible mappings and referenced by localized filter expressions.
+    RETURN_IF_ERROR(_build_hidden_filter_mappings(table_filters));
+    RETURN_IF_ERROR(
+            localize_filters(table_filters, table_column_predicates, file_request, runtime_state));
+    // 3. Rebuild output projection expressions for projected columns. localize_filters() has
+    // already applied the final scan projection to mapping.file_type/projected_file_children before
+    // rewriting filter expressions.
+    for (auto& mapping : _mappings) {
+        if (!mapping.file_local_id.has_value()) {
+            continue;
+        }
+        auto position_it =
+                file_request->local_positions.find(LocalColumnId(*mapping.file_local_id));
+        DORIS_CHECK(position_it != file_request->local_positions.end())
+                << file_request->local_positions.size() << " " << *mapping.file_local_id << " "
+                << mapping.file_column_name;
+        rebuild_projection(&mapping, position_it->second);
+    }
+    return Status::OK();
+}
+
+ColumnMapping* TableColumnMapper::_find_mapping(GlobalIndex global_index) {
+    for (auto& mapping : _mappings) {
+        if (mapping.global_index == global_index) {
+            return &mapping;
+        }
+    }
+    return nullptr;
+}
+
+ColumnMapping* TableColumnMapper::_find_filter_mapping(GlobalIndex global_index) {
+    if (auto* mapping = _find_mapping(global_index); mapping != nullptr) {
+        return mapping;
+    }
+    for (auto& mapping : _hidden_mappings) {
+        if (mapping.global_index == global_index) {
+            return &mapping;
+        }
+    }
+    return nullptr;
+}
+
+Status TableColumnMapper::localize_filters(const std::vector<TableFilter>& table_filters,
+                                           const TableColumnPredicates& table_column_predicates,
+                                           FileScanRequest* file_request,
+                                           RuntimeState* runtime_state) {
+    FilterProjectionMap filter_projections;
+    auto filter_mappings = _filter_visible_mappings();
+    RETURN_IF_ERROR(build_nested_struct_filter_projection_map(table_filters, filter_mappings,
+                                                              &filter_projections));
+    for (const auto& table_filter : table_filters) {
+        for (const auto& global_index : table_filter.global_indices) {
+            auto* mapping = _find_filter_mapping(global_index);
+            if (mapping == nullptr || !mapping->file_local_id.has_value() ||
+                !filter_conversion_has_local_source(mapping->filter_conversion)) {
+                continue;
+            }
+            RETURN_IF_ERROR(add_scan_column(file_request, mapping, enable_lazy_materialization(),
+                                            force_full_complex_scan_projection(),
+                                            &filter_projections));
+        }
+    }
+    // Rebuild the file type for every scan-local mapping before expression rewrite. Predicate-only
+    // hidden mappings must see the same projected file type as the file reader will produce.
+    for (auto& mapping : _mappings) {
+        if (mapping.file_local_id.has_value() &&
+            file_request->local_positions.contains(LocalColumnId(*mapping.file_local_id))) {
+            RETURN_IF_ERROR(apply_scan_projection_to_mapping_file_type(*file_request, &mapping));
+        }
+    }
+    for (auto& mapping : _hidden_mappings) {
+        if (mapping.file_local_id.has_value() &&
+            file_request->local_positions.contains(LocalColumnId(*mapping.file_local_id))) {
+            RETURN_IF_ERROR(apply_scan_projection_to_mapping_file_type(*file_request, &mapping));
+        }
+    }
+    RETURN_IF_ERROR(_build_filter_entries(*file_request));
+
+    // Build the complete table-slot rewrite map after all predicate columns have been assigned.
+    // This keeps expression localization independent from filter iteration order.
+    filter_mappings = _filter_visible_mappings();
+    const auto global_to_file_slot = build_file_slot_rewrite_map(filter_mappings, _filter_entries);
+    for (const auto& table_filter : table_filters) {
+        if (table_filter.conjunct != nullptr &&
+            table_filter_has_only_local_entries(table_filter, _filter_entries)) {
+            RewriteContext rewrite_context {.runtime_state = runtime_state};
+            VExprSPtr rewrite_root;
+            Status clone_status;
+            try {
+                clone_status = clone_table_expr_tree(table_filter.conjunct->root(), &rewrite_root);
+            } catch (const Exception& e) {
+                // Some table filters contain complex intermediate values, for example
+                // `element_at(MAP_VALUES(m)[1], 'age') > 30`. The current file-local rewrite only
+                // understands top-level slots and struct-element paths rooted at top-level slots;
+                // cloning such expressions can hit the generic TExpr complex-type limitation.
+                // Leave them above TableReader, where Scanner evaluates the original table-level
+                // conjunct after final materialization.
+#ifndef NDEBUG
+                return Status::InternalError(
+                        "Failed to clone table filter for file-local rewrite: {}, expr={}",
+                        e.to_string(), table_filter.conjunct->root()->debug_string());
+#else
+                continue;
+#endif
+            } catch (const std::exception& e) {
+#ifndef NDEBUG
+                return Status::InternalError(
+                        "Failed to clone table filter for file-local rewrite: {}, expr={}",
+                        e.what(), table_filter.conjunct->root()->debug_string());
+#else
+                continue;
+#endif
+            }
+            if (!clone_status.ok()) {
+#ifndef NDEBUG
+                return Status::InternalError(
+                        "Failed to clone table filter for file-local rewrite: {}, expr={}",
+                        clone_status.to_string(), table_filter.conjunct->root()->debug_string());
+#else
+                continue;
+#endif
+            }
+            bool can_localize = true;
+            auto localized_root = rewrite_table_expr_to_file_expr(rewrite_root, global_to_file_slot,
+                                                                  filter_mappings, &rewrite_context,
+                                                                  &can_localize);
+            if (!can_localize) {
+                continue;
+            }
+            auto localized_conjunct = VExprContext::create_shared(std::move(localized_root));
+            RETURN_IF_ERROR(rewrite_context.prepare_created_exprs(localized_conjunct.get()));
+            file_request->conjuncts.push_back(std::move(localized_conjunct));
+        }
+    }
+    if (enable_column_predicate_filters()) {
+        for (const auto& [global_index, predicates] : table_column_predicates) {
+            const auto* mapping = _find_filter_mapping(global_index);
+            const auto entry_it = _filter_entries.find(global_index);
+            if (mapping == nullptr || !mapping->file_local_id.has_value() || predicates.empty() ||
+                entry_it == _filter_entries.end() || !entry_it->second.is_local() ||
+                !column_predicate_can_use_local_source(mapping->filter_conversion) ||
+                mapping->file_type == nullptr) {
+                continue;
+            }
+            FileColumnPredicateFilter column_predicate_filter;
+            column_predicate_filter.file_column_id = LocalColumnId(*mapping->file_local_id);
+            column_predicate_filter.target =
+                    FileNestedPredicateTarget(column_predicate_filter.file_column_id);
+            const auto file_primitive_type =
+                    remove_nullable(mapping->file_type)->get_primitive_type();
+            for (const auto& predicate : predicates) {
+                DORIS_CHECK(predicate != nullptr);
+                if (predicate->primitive_type() == file_primitive_type) {
+                    column_predicate_filter.predicates.push_back(predicate);
+                }
+            }
+            if (column_predicate_filter.predicates.empty()) {
+                continue;
+            }
+            file_request->column_predicate_filters.push_back(std::move(column_predicate_filter));
+        }
+        for (const auto& table_filter : table_filters) {
+            if (table_filter.conjunct == nullptr ||
+                !table_filter_has_only_local_entries(table_filter, _filter_entries)) {
+                continue;
+            }
+            std::vector<FileColumnPredicateFilter> nested_column_predicate_filters;
+            collect_nested_column_predicate_filters(table_filter.conjunct->root(), filter_mappings,
+                                                    &nested_column_predicate_filters);
+            for (auto& column_predicate_filter : nested_column_predicate_filters) {
+                merge_column_predicate_filter(std::move(column_predicate_filter),
+                                              &file_request->column_predicate_filters);
+            }
+        }
+    }
+    return Status::OK();
+}
+
+const ColumnDefinition* TableColumnMapper::_find_file_field(
+        const ColumnDefinition& table_column,
+        const std::vector<ColumnDefinition>& file_schema) const {
+    if (table_column.name.starts_with(BeConsts::GLOBAL_ROWID_COL)) {
+        const auto field_it = std::ranges::find_if(file_schema, [](const ColumnDefinition& field) {
+            return field.column_type == ColumnType::GLOBAL_ROWID;
+        });
+        return field_it == file_schema.end() ? nullptr : &*field_it;
+    }
+    return matcher_for_mode(_options.mode).find(table_column, file_schema);
+}
+
+Status TableColumnMapper::_create_direct_mapping(const ColumnDefinition& table_column,
+                                                 const ColumnDefinition& file_field,
+                                                 ColumnMapping* mapping) const {
+    DORIS_CHECK(mapping != nullptr);
+    DORIS_CHECK(file_field.local_id >= 0 || file_field.local_id == GLOBAL_ROWID_COLUMN_ID);
+    mapping->file_local_id = file_field.local_id;
+    mapping->table_column_name = table_column.name;
+    mapping->file_column_name = file_field.name;
+    mapping->original_file_type = file_field.type;
+    mapping->original_file_children = file_field.children;
+    mapping->projected_file_children = file_field.children;
+    mapping->file_type = file_field.type;
+    mapping->is_trivial = mapping_can_use_file_column_directly(*mapping);
+    mapping->filter_conversion = mapping->is_trivial ? FilterConversionType::COPY_DIRECTLY
+                                                     : FilterConversionType::CAST_FILTER;
+    mapping->child_mappings.clear();
+
+    auto table_children = table_column.children;
+    const auto nested_table_type = remove_nullable(mapping->table_type);
+    // Some scan paths, especially SELECT *, only carry the complete complex DataType for a table
+    // column and leave ColumnDefinition::children empty. If the file type is an older complex
+    // schema, treating this as a leaf mapping would make TableReader fall back to a plain CAST.
+    // That is invalid for evolved structs with different field counts.
+    //
+    // Example:
+    //   table column type: Map(String, Struct(age, full_name, gender))
+    //   old file type:    Map(String, Struct(age, name))
+    //   table children:   empty
+    //
+    // Synthesize key/value/struct-field children from the table type so the normal recursive
+    // mapping path can rematerialize `name -> full_name` and fill missing `gender` with defaults,
+    // instead of trying to CAST Struct(age, name) to Struct(age, full_name, gender).
+    const bool synthesized_table_children =
+            table_children.empty() && is_complex_type(nested_table_type->get_primitive_type()) &&
+            !mapping->table_type->equals(*mapping->file_type);
+    if (synthesized_table_children) {
+        table_children = synthesize_complex_children_from_type(mapping->table_type);
+    } else if (!table_children.empty() && !mapping->table_type->equals(*mapping->file_type)) {
+        complete_required_complex_children_from_type(mapping->table_type, &table_children);
+    }
+
+    if (!table_children.empty()) {
+        if (!is_complex_type(remove_nullable(mapping->file_type)->get_primitive_type())) {
+            return Status::NotSupported(
+                    "Cannot map complex table column '{}' to scalar parquet column '{}', table "
+                    "type={}, file type={}",
+                    table_column.name, file_field.name, mapping->table_type->get_name(),
+                    mapping->file_type->get_name());
+        }
+        RETURN_IF_ERROR(validate_file_schema_children(file_field));
+        std::vector<int32_t> synthesized_used_file_child_ids;
+        for (size_t table_child_idx = 0; table_child_idx < table_children.size();
+             ++table_child_idx) {
+            const auto& table_child = table_children[table_child_idx];
+            const auto* file_child =
+                    find_file_child_for_mapping(table_child, file_field, _options.mode,
+                                                table_child_idx, synthesized_table_children);
+            if (synthesized_table_children && file_child != nullptr) {
+                const auto file_child_id = file_child->file_local_id();
+                if (std::ranges::find(synthesized_used_file_child_ids, file_child_id) !=
+                    synthesized_used_file_child_ids.end()) {
+                    file_child = nullptr;
+                    for (const auto& candidate : file_field.children) {
+                        const auto candidate_id = candidate.file_local_id();
+                        if (std::ranges::find(synthesized_used_file_child_ids, candidate_id) ==
+                            synthesized_used_file_child_ids.end()) {
+                            file_child = &candidate;
+                            break;
+                        }
+                    }
+                }
+                if (file_child != nullptr) {
+                    synthesized_used_file_child_ids.push_back(file_child->file_local_id());
+                }
+            }
+            if (file_child == nullptr) {
+                ColumnMapping child_mapping;
+                child_mapping.table_column_name = table_child.name;
+                child_mapping.file_column_name = table_child.name;
+                child_mapping.table_type = table_child.type;
+                child_mapping.file_type = table_child.type;
+                child_mapping.filter_conversion = FilterConversionType::FINALIZE_ONLY;
+                mapping->child_mappings.push_back(std::move(child_mapping));
+                continue;
+            }
+            ColumnMapping child_mapping;
+            child_mapping.table_column_name = table_child.name;
+            child_mapping.table_type = table_child.type;
+            RETURN_IF_ERROR(_create_direct_mapping(table_child, *file_child, &child_mapping));
+            mapping->child_mappings.push_back(std::move(child_mapping));
+        }
+        if (needs_projected_file_type_rebuild(*mapping)) {
+            // If complex projection prunes some children, we have to rebuild the projected file type to make sure the reader expression can find the correct child types by name.
+            RETURN_IF_ERROR(rebuild_projected_file_children_and_type(
+                    mapping->file_type, mapping->original_file_children, mapping->child_mappings,
+                    &mapping->projected_file_children, &mapping->file_type));
+            DCHECK(mapping->table_type != nullptr);
+            mapping->is_trivial = mapping_can_use_file_column_directly(*mapping);
+            // TODO: ? READER_EXPRESSION
+            mapping->filter_conversion = mapping->is_trivial
+                                                 ? FilterConversionType::COPY_DIRECTLY
+                                                 : FilterConversionType::READER_EXPRESSION;
+        }
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/column_mapper.h b/be/src/format_v2/column_mapper.h
new file mode 100644
index 00000000000000..2ffbbbb9414d83
--- /dev/null
+++ b/be/src/format_v2/column_mapper.h
@@ -0,0 +1,294 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/field.h"
+#include "exprs/vexpr_fwd.h"
+#include "format_v2/file_reader.h"
+
+namespace doris {
+class ColumnPredicate;
+class RuntimeState;
+} // namespace doris
+
+namespace doris::format {
+
+struct ColumnDefinition;
+struct TableFilter;
+
+// Table-level simple predicates grouped by table/global output position. The key is not
+// LocalColumnId: TableColumnMapper resolves it through ColumnMapping before creating file pruning
+// hints.
+using TableColumnPredicates = std::map<GlobalIndex, std::vector<std::shared_ptr<ColumnPredicate>>>;
+
+enum class TableColumnMappingMode {
+    // Match by ColumnDefinition::identifier TYPE_INT as field id.
+    BY_FIELD_ID,
+    // Match by ColumnDefinition::identifier TYPE_STRING, or logical name when identifier is null.
+    BY_NAME,
+    // Match top-level columns by file position. This mainly serves Hive1 ORC style files whose
+    // column names are placeholder values such as `_col0` / `_col1`, where position is the only
+    // reliable way to select the correct column.
+    BY_INDEX,
+};
+
+enum TableVirtualColumnType {
+    INVALID = 0, // not a virtual column
+    // Iceberg v3 row lineage metadata column `_row_id`. Physical non-null values
+    // are preserved; NULL or missing values inherit first_row_id + row_position.
+    ROW_ID = 1,
+    // Iceberg v3 row lineage metadata column `_last_updated_sequence_number`.
+    // Physical non-null values are preserved; NULL or missing values inherit the
+    // data file's last_updated_sequence_number.
+    LAST_UPDATED_SEQUENCE_NUMBER = 2,
+    // Doris internal Iceberg row locator column `__DORIS_ICEBERG_ROWID_COL__`.
+    // It is a struct used by delete/update/merge, not the Iceberg `_row_id`.
+    ICEBERG_ROWID = 3,
+};
+
+enum class FilterConversionType {
+    COPY_DIRECTLY, // filter can be copied directly from file layer without any change, e.g. column type and table type are the same and no complex nested projection is involved.
+    CAST_FILTER, // filter can be converted from file layer by adding a cast, e.g. column type is nullable but table type is not, or file column has a trivial nested projection but table column has a complex nested projection.
+    READER_EXPRESSION,
+    FINALIZE_ONLY, // filter cannot be converted to file layer and should be evaluated at table reader finalize phase, e.g. predicates on ICEBERG_ROW_ID column which is generated by IcebergReader.
+    CONSTANT,
+};
+
+// Nested global-to-local child mapping. The root index points either to a request-local slot or to
+// a child id, depending on the owner. child_mapping keeps the recursive table-child to file-child
+// relationship explicit instead of encoding it in ColumnMapping flags.
+struct IndexMapping {
+    int32_t index = -1;
+    std::map<int32_t, std::shared_ptr<IndexMapping>> child_mapping;
+};
+
+// Recursive result produced after one table/global column is assigned to a file-local source.
+struct ColumnMapResult {
+    std::optional<LocalColumnId> local_column_id;
+    std::optional<LocalColumnIndex> column_index;
+    std::optional<IndexMapping> mapping;
+};
+
+// Final mapping entry from one global result column to one file-local source.
+struct ColumnMapEntry {
+    IndexMapping mapping;
+    DataTypePtr local_type;
+    DataTypePtr global_type;
+    FilterConversionType filter_conversion = FilterConversionType::FINALIZE_ONLY;
+};
+
+// Collection of final result-column mappings produced for one file/split.
+struct ResultColumnMapping {
+    std::map<GlobalIndex, ColumnMapEntry> global_to_local;
+};
+
+// Mapping result from one table column to one file column.
+// This is the main boundary object between table-level schema semantics and file-local schema
+// semantics.
+struct ColumnMapping {
+    // Position of the top-level projected column in the table/global output block. Table-level
+    // filters and column predicates refer to this index after FileScannerV2 translates FE ids at
+    // the scanner boundary.
+    GlobalIndex global_index;
+    std::string table_column_name;
+    // File-reader local id for the mapped node.
+    //
+    // For a root mapping it is convertible to LocalColumnId. For a nested mapping it is the
+    // LocalColumnIndex child id under the parent projection. This is deliberately separated from
+    // ColumnDefinition::identifier, which is the table-to-file matching key such as Parquet/Iceberg
+    // field_id or column name.
+    //
+    // Empty means the table column is constant, missing, partition-only, or virtual.
+    std::optional<int32_t> file_local_id;
+    std::string file_column_name;
+    // Full file type/children before nested projection pruning. Used to rebuild projected types
+    // and to localize nested filters that reference children not present in the output projection.
+    DataTypePtr original_file_type;
+    std::vector<ColumnDefinition> original_file_children;
+    // File children after applying the scan projection. The order follows the file-local semantic
+    // schema, not table child order. TableReader uses this to map table-output children back to the
+    // file-local block layout when projection, predicate-only children, and schema evolution mix.
+    std::vector<ColumnDefinition> projected_file_children;
+    // Split/file-local constant entry when this mapping is produced from partition/default/virtual
+    // expression instead of physical file data.
+    std::optional<ConstantIndex> constant_index;
+    // Effective file type after applying casts/remaps/nested projection pruning.
+    DataTypePtr file_type;
+    // Target table/global type after final materialization.
+    DataTypePtr table_type;
+
+    // Final projection expression used to convert file-local values into table/global values, such
+    // as casts, defaults, partition values, generated columns, or complex-column remaps.
+    VExprContextSPtr projection;
+
+    // Mapping tree for nested table children. The order follows table output children, while file
+    // children can be pruned/reordered through each child mapping's file-reader local id.
+    std::vector<ColumnMapping> child_mappings;
+    // True when file value can be used directly as table value without cast or child remap.
+    bool is_trivial = false;
+    // How filters referencing this table/global column can be converted below table-reader
+    // finalize. This is metadata for localize_filters() and future constant-filter evaluation.
+    FilterConversionType filter_conversion = FilterConversionType::FINALIZE_ONLY;
+    TableVirtualColumnType virtual_column_type = TableVirtualColumnType::INVALID;
+    VExprContextSPtr default_expr;
+
+    std::string debug_string() const;
+};
+
+struct TableColumnMapperOptions {
+    TableColumnMappingMode mode = TableColumnMappingMode::BY_FIELD_ID;
+
+    std::string debug_string() const;
+};
+
+Status clone_table_expr_tree(const VExprSPtr& expr, VExprSPtr* cloned_expr);
+const Field* find_partition_value(const ColumnDefinition& table_column,
+                                  const std::map<std::string, Field>& partition_values);
+
+// Generic mapping layer from table schema to file schema.
+// Iceberg uses BY_FIELD_ID. Plain by-name formats can reuse this component as well, so keep this
+// abstraction table-format neutral instead of making it Iceberg-only.
+class TableColumnMapper {
+public:
+    explicit TableColumnMapper(TableColumnMapperOptions options = {})
+            : _options(std::move(options)) {}
+    virtual ~TableColumnMapper() = default;
+
+    // Build column mappings from table schema to file schema.
+    // The resulting ColumnMapping describes how each table column is produced from a file column,
+    // a constant, or an expression. Later projection, filter localization, and table-block
+    // finalization should all reuse the same mapping.
+    virtual Status create_mapping(const std::vector<ColumnDefinition>& projected_columns,
+                                  const std::map<std::string, Field>& partition_values,
+                                  const std::vector<ColumnDefinition>& file_schema);
+
+    // Convert a table-level scan request into a file-local scan request. table_filters preserve
+    // row-level filtering semantics and are rewritten as file-local conjuncts. table_column_predicates
+    // are converted only into file-layer pruning hints and do not participate in batch row
+    // filtering.
+    virtual Status create_scan_request(const std::vector<TableFilter>& table_filters,
+                                       const TableColumnPredicates& table_column_predicates,
+                                       const std::vector<ColumnDefinition>& projected_columns,
+                                       FileScanRequest* file_request,
+                                       RuntimeState* runtime_state = nullptr);
+
+    // Localize table-level filters to the file schema.
+    // Trivial mappings can copy structured predicates directly. Type changes may be localized with
+    // a safe cast. Expressions that cannot be pushed down safely should be handled through
+    // reader_expression_map or table-level finalize/filter fallback.
+    virtual Status localize_filters(const std::vector<TableFilter>& table_filters,
+                                    const TableColumnPredicates& table_column_predicates,
+                                    FileScanRequest* file_request,
+                                    RuntimeState* runtime_state = nullptr);
+    void clear() {
+        _mappings.clear();
+        _hidden_mappings.clear();
+        _constant_map.clear();
+        _filter_entries.clear();
+        _file_schema.clear();
+        _partition_values.clear();
+    }
+    const std::vector<ColumnMapping>& mappings() const { return _mappings; }
+    const std::map<GlobalIndex, FilterEntry>& filter_entries() const { return _filter_entries; }
+    const ConstantMap& constant_map() const { return _constant_map; }
+    std::string debug_string() const;
+
+protected:
+    // Columnar readers such as Parquet can read predicate columns first, evaluate row filters, and
+    // lazily read the rest. Row-oriented readers such as CSV/Text materialize one row at a time and
+    // should keep all required columns in one scan list.
+    virtual bool enable_lazy_materialization() const { return true; }
+    // File-layer column predicate filters are reader-specific pruning hints. Parquet consumes them
+    // for row-group/page-index/statistics pruning; simple delimited readers do not.
+    virtual bool enable_column_predicate_filters() const { return true; }
+    // Row-oriented readers such as CSV/Text cannot physically read only a nested child from one
+    // delimited text field. They must scan the whole complex top-level field and let TableReader
+    // rematerialize the requested table child after row-level filters have run.
+    virtual bool force_full_complex_scan_projection() const { return false; }
+
+    const ColumnDefinition* _find_file_field(
+            const ColumnDefinition& table_column,
+            const std::vector<ColumnDefinition>& file_schema) const;
+    Status _create_direct_mapping(const ColumnDefinition& table_column,
+                                  const ColumnDefinition& file_field, ColumnMapping* mapping) const;
+
+    Status _create_by_index_mapping(const ColumnDefinition& table_column,
+                                    const std::vector<ColumnDefinition>& file_schema,
+                                    ColumnMapping* mapping);
+    Status _build_filter_entries(const FileScanRequest& file_request);
+    Status _build_result_column_mapping(const FileScanRequest& file_request);
+
+    void _set_constant_mapping(ColumnMapping* mapping, VExprContextSPtr expr);
+    Status _create_mapping_for_column(const ColumnDefinition& table_column,
+                                      GlobalIndex global_index, ColumnMapping* mapping);
+    Status _create_hidden_filter_mapping(const ColumnDefinition& table_column,
+                                         GlobalIndex global_index, ColumnMapping* mapping);
+    Status _build_hidden_filter_mappings(const std::vector<TableFilter>& table_filters);
+    std::vector<ColumnMapping> _filter_visible_mappings() const;
+
+    ColumnMapping* _find_mapping(GlobalIndex global_index);
+    ColumnMapping* _find_filter_mapping(GlobalIndex global_index);
+
+    TableColumnMapperOptions _options;
+    // Column mapping for each projected column, in the same order as projected_columns. Each entry
+    // describes how to get one table/global column from file-local sources, and carries metadata
+    // for filter localization and result finalize.
+    std::vector<ColumnMapping> _mappings;
+    // Predicate-only top-level columns are not output projection columns, so keep their mappings
+    // here. They are visible only to filter localization and file-reader predicate construction.
+    std::vector<ColumnMapping> _hidden_mappings;
+    std::map<GlobalIndex, FilterEntry> _filter_entries;
+    ConstantMap _constant_map;
+    // Split-local schema state retained from create_mapping() so create_scan_request() can build
+    // hidden mappings for top-level filter slots that are absent from projected_columns.
+    std::vector<ColumnDefinition> _file_schema;
+    std::map<std::string, Field> _partition_values;
+};
+
+// Parquet consumes the full FileScanRequest shape: predicate columns for lazy materialization and
+// column_predicate_filters for statistics/page-index pruning.
+class ParquetColumnMapper final : public TableColumnMapper {
+public:
+    using TableColumnMapper::TableColumnMapper;
+};
+
+// Mapper for readers that always materialize every required file column before filtering. The
+// table-to-file schema mapping is still generic, but the FileScanRequest layout is simpler:
+// predicate_columns and column_predicate_filters are not populated.
+class MaterializedColumnMapper final : public TableColumnMapper {
+public:
+    using TableColumnMapper::TableColumnMapper;
+
+protected:
+    bool enable_lazy_materialization() const override { return false; }
+    bool enable_column_predicate_filters() const override { return false; }
+    bool force_full_complex_scan_projection() const override { return true; }
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/column_mapper_nested.cpp b/be/src/format_v2/column_mapper_nested.cpp
new file mode 100644
index 00000000000000..0e3539242fff26
--- /dev/null
+++ b/be/src/format_v2/column_mapper_nested.cpp
@@ -0,0 +1,1050 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/column_mapper_nested.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/exception.h"
+#include "core/assert_cast.h"
+#include "core/data_type/convert_field_to_type.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/primitive_type.h"
+#include "exprs/create_predicate_function.h"
+#include "exprs/vexpr.h"
+#include "exprs/vin_predicate.h"
+#include "format_v2/expr/cast.h"
+#include "gen_cpp/Exprs_types.h"
+#include "storage/predicate/null_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris::format {
+
+namespace {
+
+static bool is_cast_expr(const VExprSPtr& expr) {
+    return dynamic_cast<const Cast*>(expr.get()) != nullptr;
+}
+
+static bool is_binary_comparison_predicate(const VExprSPtr& expr) {
+    if (expr == nullptr || expr->get_num_children() != 2 ||
+        (expr->node_type() != TExprNodeType::BINARY_PRED &&
+         expr->node_type() != TExprNodeType::NULL_AWARE_BINARY_PRED)) {
+        return false;
+    }
+    switch (expr->op()) {
+    case TExprOpcode::EQ:
+    case TExprOpcode::EQ_FOR_NULL:
+    case TExprOpcode::NE:
+    case TExprOpcode::GE:
+    case TExprOpcode::GT:
+    case TExprOpcode::LE:
+    case TExprOpcode::LT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static bool is_null_predicate_function(const VExprSPtr& expr, bool* is_null) {
+    DORIS_CHECK(is_null != nullptr);
+    if (expr == nullptr || expr->node_type() != TExprNodeType::FUNCTION_CALL ||
+        expr->get_num_children() != 1) {
+        return false;
+    }
+    if (expr->fn().name.function_name == "is_null_pred") {
+        *is_null = true;
+        return true;
+    }
+    if (expr->fn().name.function_name == "is_not_null_pred") {
+        *is_null = false;
+        return true;
+    }
+    return false;
+}
+
+static bool is_signed_integer_type(PrimitiveType type) {
+    switch (type) {
+    case TYPE_TINYINT:
+    case TYPE_SMALLINT:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_LARGEINT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static int primitive_integer_width(PrimitiveType type) {
+    switch (type) {
+    case TYPE_TINYINT:
+        return 1;
+    case TYPE_SMALLINT:
+        return 2;
+    case TYPE_INT:
+        return 4;
+    case TYPE_BIGINT:
+        return 8;
+    case TYPE_LARGEINT:
+        return 16;
+    default:
+        return 0;
+    }
+}
+
+static bool is_decimal_type(PrimitiveType type) {
+    switch (type) {
+    case TYPE_DECIMAL32:
+    case TYPE_DECIMAL64:
+    case TYPE_DECIMALV2:
+    case TYPE_DECIMAL128I:
+    case TYPE_DECIMAL256:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static bool is_order_preserving_safe_cast(const DataTypePtr& from_type,
+                                          const DataTypePtr& to_type) {
+    if (from_type == nullptr || to_type == nullptr) {
+        return false;
+    }
+    const auto from_nested_type = remove_nullable(from_type);
+    const auto to_nested_type = remove_nullable(to_type);
+    if (from_nested_type->equals(*to_nested_type)) {
+        return true;
+    }
+
+    const auto from_primitive_type = from_nested_type->get_primitive_type();
+    const auto to_primitive_type = to_nested_type->get_primitive_type();
+    if (is_signed_integer_type(from_primitive_type) && is_signed_integer_type(to_primitive_type)) {
+        return primitive_integer_width(to_primitive_type) >=
+               primitive_integer_width(from_primitive_type);
+    }
+    if (from_primitive_type == TYPE_FLOAT && to_primitive_type == TYPE_DOUBLE) {
+        return true;
+    }
+    if (is_decimal_type(from_primitive_type) && is_decimal_type(to_primitive_type)) {
+        return from_nested_type->get_scale() == to_nested_type->get_scale() &&
+               to_nested_type->get_precision() >= from_nested_type->get_precision();
+    }
+    return false;
+}
+
+static bool parse_struct_child_selector(const VExprSPtr& expr, StructChildSelector* selector) {
+    DORIS_CHECK(selector != nullptr);
+    if (expr == nullptr || !expr->is_literal()) {
+        return false;
+    }
+    const Field field = literal_field(expr);
+    switch (field.get_type()) {
+    case TYPE_STRING:
+    case TYPE_CHAR:
+    case TYPE_VARCHAR:
+        selector->by_name = true;
+        selector->name = std::string(field.as_string_view());
+        return true;
+    case TYPE_BOOLEAN:
+        selector->by_name = false;
+        selector->ordinal = field.get<TYPE_BOOLEAN>() ? 1 : 0;
+        return selector->ordinal > 0;
+    case TYPE_TINYINT:
+        selector->by_name = false;
+        if (field.get<TYPE_TINYINT>() <= 0) {
+            return false;
+        }
+        selector->ordinal = cast_set<size_t>(field.get<TYPE_TINYINT>());
+        return true;
+    case TYPE_SMALLINT:
+        selector->by_name = false;
+        if (field.get<TYPE_SMALLINT>() <= 0) {
+            return false;
+        }
+        selector->ordinal = cast_set<size_t>(field.get<TYPE_SMALLINT>());
+        return true;
+    case TYPE_INT:
+        selector->by_name = false;
+        if (field.get<TYPE_INT>() <= 0) {
+            return false;
+        }
+        selector->ordinal = cast_set<size_t>(field.get<TYPE_INT>());
+        return true;
+    case TYPE_BIGINT:
+        selector->by_name = false;
+        if (field.get<TYPE_BIGINT>() <= 0) {
+            return false;
+        }
+        selector->ordinal = cast_set<size_t>(field.get<TYPE_BIGINT>());
+        return true;
+    default:
+        return false;
+    }
+}
+
+static bool extract_nested_struct_path(const VExprSPtr& expr, NestedStructPath* path) {
+    DORIS_CHECK(path != nullptr);
+    if (!is_struct_element_expr(expr)) {
+        return false;
+    }
+
+    // Process for element_at(struct, 'field') or element_at(struct, 1) expression.
+    StructChildSelector selector;
+    if (!parse_struct_child_selector(expr->children()[1], &selector)) {
+        return false;
+    }
+
+    const auto& parent = expr->children()[0];
+    if (parent->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(parent.get());
+        path->root_global_index = slot_ref_global_index(*slot_ref);
+        path->selectors.clear();
+        path->selectors.push_back(std::move(selector));
+        return true;
+    }
+
+    // Process for element_at(element_at(struct<struct>, 'field'), 'field') or
+    // element_at(element_at(struct<struct>, 1), 1) expression.
+    if (!extract_nested_struct_path(parent, path)) {
+        return false;
+    }
+    path->selectors.push_back(std::move(selector));
+    return true;
+}
+
+static bool extract_nested_struct_path_for_pruning(const VExprSPtr& expr, NestedStructPath* path) {
+    DORIS_CHECK(path != nullptr);
+    // Simple `ELEMENT_AT`
+    if (extract_nested_struct_path(expr, path)) {
+        return true;
+    }
+
+    // `ELEMENT_AT` with `CAST`
+    if (!is_cast_expr(expr) || expr->get_num_children() != 1) {
+        return false;
+    }
+    const auto& child = expr->children()[0];
+    if (!is_order_preserving_safe_cast(child->data_type(), expr->data_type())) {
+        return false;
+    }
+    // A safe widening cast is null-preserving and keeps the comparison ordering of the nested
+    // primitive leaf, so file-layer pruning can target the original leaf statistics. The row-level
+    // filter still evaluates the original cast expression after read.
+    return extract_nested_struct_path_for_pruning(child, path);
+}
+
+static const ColumnDefinition* resolve_file_child(const std::vector<ColumnDefinition>& children,
+                                                  const StructChildSelector& selector) {
+    if (selector.by_name) {
+        const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) {
+            return child.name == selector.name;
+        });
+        return child_it == children.end() ? nullptr : &*child_it;
+    }
+    if (selector.ordinal == 0 || selector.ordinal > children.size()) {
+        return nullptr;
+    }
+    return &children[selector.ordinal - 1];
+}
+
+static const DataTypeStruct* struct_type_or_null(const DataTypePtr& type) {
+    if (type == nullptr) {
+        return nullptr;
+    }
+    const auto nested_type = remove_nullable(type);
+    if (nested_type->get_primitive_type() != TYPE_STRUCT) {
+        return nullptr;
+    }
+    return assert_cast<const DataTypeStruct*>(nested_type.get());
+}
+
+static std::optional<int32_t> struct_child_index(const ColumnMapping& mapping,
+                                                 const StructChildSelector& selector) {
+    const auto* struct_type = struct_type_or_null(mapping.table_type);
+    if (struct_type == nullptr) {
+        return std::nullopt;
+    }
+    if (selector.by_name) {
+        const auto position = struct_type->try_get_position_by_name(selector.name);
+        if (!position.has_value()) {
+            return std::nullopt;
+        }
+        return cast_set<int32_t>(*position);
+    }
+    if (selector.ordinal == 0 || selector.ordinal > struct_type->get_elements().size()) {
+        return std::nullopt;
+    }
+    return cast_set<int32_t>(selector.ordinal - 1);
+}
+
+// Get the global child index for a child mapping. If the mapping's table type is struct, resolve
+// the child index by the child mapping's table column name; otherwise, use the fallback child index.
+static int32_t child_mapping_global_index(const ColumnMapping& mapping,
+                                          const ColumnMapping& child_mapping,
+                                          size_t fallback_child_idx) {
+    const auto* struct_type = struct_type_or_null(mapping.table_type);
+    if (struct_type == nullptr) {
+        return cast_set<int32_t>(fallback_child_idx);
+    }
+    const auto position = struct_type->try_get_position_by_name(child_mapping.table_column_name);
+    DORIS_CHECK(position.has_value()) << "Cannot find child '" << child_mapping.table_column_name
+                                      << "' in table type " << mapping.table_type->get_name();
+    return cast_set<int32_t>(*position);
+}
+
+static const ColumnMapping* resolve_mapped_child(const ColumnMapping& mapping,
+                                                 int32_t global_child_index) {
+    for (size_t child_idx = 0; child_idx < mapping.child_mappings.size(); ++child_idx) {
+        const auto& child_mapping = mapping.child_mappings[child_idx];
+        if (child_mapping_global_index(mapping, child_mapping, child_idx) == global_child_index) {
+            return &child_mapping;
+        }
+    }
+    return nullptr;
+}
+
+enum class NestedProjectionResolveResult {
+    RESOLVED,
+    NOT_REPRESENTED,
+    MISSING_FILE_CHILD,
+};
+
+// Resolve a table-side nested struct path through the existing ColumnMapping tree and build the
+// corresponding file-local projection. For example, if table column `s` has children
+// `{a, renamed_b}` and file column `s` has children `{a, b}`, the filter path
+// `struct_element(s, 'renamed_b')` is resolved to the file projection `s -> b` by following the
+// child mapping instead of matching the table child name against the file schema. Return
+// MISSING_FILE_CHILD when ColumnMapping explicitly says a table child is absent from this file; in
+// that case callers must not fall back to schema-name lookup, because Iceberg can drop a field and
+// later add a different field with the same name.
+static NestedProjectionResolveResult resolve_nested_projection_with_mapping(
+        const NestedStructPath& path, const std::vector<ColumnMapping>& mappings,
+        LocalColumnIndex* root_projection) {
+    DORIS_CHECK(root_projection != nullptr);
+    *root_projection = {};
+    if (path.selectors.empty()) {
+        return NestedProjectionResolveResult::NOT_REPRESENTED;
+    }
+    const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) {
+        return mapping.global_index == path.root_global_index;
+    });
+    if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value()) {
+        return NestedProjectionResolveResult::NOT_REPRESENTED;
+    }
+
+    *root_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id);
+    auto* current_projection = root_projection;
+    const auto* current_mapping = &*mapping_it;
+
+    // Traverse the ColumnMapping tree according to the table-side struct selectors and emit the
+    // corresponding file-local child ids. A missing child mapping means this predicate-only path
+    // may need schema fallback; an existing child mapping without a file id means the table child
+    // is genuinely absent from this file and must stay above the file reader.
+    for (size_t selector_idx = 0; selector_idx < path.selectors.size(); ++selector_idx) {
+        const auto global_child_index =
+                struct_child_index(*current_mapping, path.selectors[selector_idx]);
+        if (!global_child_index.has_value()) {
+            *root_projection = {};
+            return NestedProjectionResolveResult::NOT_REPRESENTED;
+        }
+        const auto* child_mapping = resolve_mapped_child(*current_mapping, *global_child_index);
+        if (child_mapping == nullptr) {
+            *root_projection = {};
+            return NestedProjectionResolveResult::NOT_REPRESENTED;
+        }
+        if (!child_mapping->file_local_id.has_value()) {
+            *root_projection = {};
+            return NestedProjectionResolveResult::MISSING_FILE_CHILD;
+        }
+
+        auto child_projection = LocalColumnIndex::partial_local(*child_mapping->file_local_id);
+        child_projection.project_all_children = selector_idx + 1 == path.selectors.size();
+        current_projection->children.push_back(std::move(child_projection));
+        current_projection = &current_projection->children.back();
+        current_mapping = child_mapping;
+    }
+    return NestedProjectionResolveResult::RESOLVED;
+}
+
+static bool table_root_is_struct(const ColumnMapping& mapping) {
+    return struct_type_or_null(mapping.table_type) != nullptr;
+}
+
+static const std::vector<ColumnDefinition>& scan_file_children(const ColumnMapping& mapping) {
+    return !mapping.projected_file_children.empty() ? mapping.projected_file_children
+                                                    : mapping.original_file_children;
+}
+
+static const ColumnDefinition* resolve_file_leaf_from_projection(
+        const std::vector<ColumnDefinition>& children, const LocalColumnIndex& projection) {
+    const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) {
+        return child.file_local_id() == projection.local_id();
+    });
+    if (child_it == children.end()) {
+        return nullptr;
+    }
+    if (projection.children.empty()) {
+        return &*child_it;
+    }
+    if (projection.children.size() != 1) {
+        return nullptr;
+    }
+    return resolve_file_leaf_from_projection(child_it->children, projection.children[0]);
+}
+
+static bool collect_file_child_names_from_projection(const std::vector<ColumnDefinition>& children,
+                                                     const LocalColumnIndex& projection,
+                                                     std::vector<std::string>* file_child_names,
+                                                     std::vector<DataTypePtr>* file_child_types) {
+    DORIS_CHECK(file_child_names != nullptr);
+    DORIS_CHECK(file_child_types != nullptr);
+    const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) {
+        return child.file_local_id() == projection.local_id();
+    });
+    if (child_it == children.end()) {
+        return false;
+    }
+    file_child_names->push_back(child_it->name);
+    file_child_types->push_back(child_it->type);
+    if (projection.children.empty()) {
+        return true;
+    }
+    if (projection.children.size() != 1) {
+        return false;
+    }
+    return collect_file_child_names_from_projection(child_it->children, projection.children[0],
+                                                    file_child_names, file_child_types);
+}
+
+struct NestedPredicateTarget {
+    LocalColumnIndex file_projection;
+    FileNestedPredicateTarget file_target;
+    std::string leaf_name;
+    DataTypePtr leaf_type;
+};
+
+static std::unique_ptr<FileStructPredicateTarget> build_struct_predicate_target_from_projection(
+        const std::vector<ColumnDefinition>& children, const LocalColumnIndex& projection) {
+    const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) {
+        return child.file_local_id() == projection.local_id();
+    });
+    if (child_it == children.end()) {
+        return nullptr;
+    }
+    std::unique_ptr<FileStructPredicateTarget> nested_child;
+    if (!projection.children.empty()) {
+        if (projection.children.size() != 1) {
+            return nullptr;
+        }
+        nested_child = build_struct_predicate_target_from_projection(child_it->children,
+                                                                     projection.children[0]);
+        if (nested_child == nullptr) {
+            return nullptr;
+        }
+    }
+    return std::make_unique<FileStructPredicateTarget>(child_it->file_local_id(), child_it->name,
+                                                       std::move(nested_child));
+}
+
+static bool build_struct_predicate_target(const ColumnMapping& root_mapping,
+                                          const LocalColumnIndex& root_projection,
+                                          FileNestedPredicateTarget* file_target) {
+    DORIS_CHECK(file_target != nullptr);
+    if (!root_projection.column_id().is_valid() || root_projection.children.size() != 1) {
+        return false;
+    }
+    auto struct_target = build_struct_predicate_target_from_projection(
+            root_mapping.original_file_children, root_projection.children[0]);
+    if (struct_target == nullptr) {
+        return false;
+    }
+    *file_target = FileNestedPredicateTarget(root_projection.column_id(), std::move(struct_target));
+    return true;
+}
+
+static bool resolve_nested_predicate_target(const NestedStructPath& path,
+                                            const std::vector<ColumnMapping>& mappings,
+                                            NestedPredicateTarget* target) {
+    DORIS_CHECK(target != nullptr);
+    ResolvedNestedStructPath resolved;
+    if (!resolve_nested_struct_path_for_file(path, mappings, &resolved)) {
+        return false;
+    }
+
+    const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) {
+        return mapping.global_index == path.root_global_index;
+    });
+    if (mapping_it == mappings.end() || resolved.file_projection.children.size() != 1) {
+        return false;
+    }
+    const auto* file_leaf = resolve_file_leaf_from_projection(mapping_it->original_file_children,
+                                                              resolved.file_projection.children[0]);
+    if (file_leaf == nullptr || file_leaf->type == nullptr) {
+        return false;
+    }
+    target->leaf_type = remove_nullable(file_leaf->type);
+    if (is_complex_type(target->leaf_type->get_primitive_type())) {
+        return false;
+    }
+    target->leaf_name = file_leaf->name;
+    target->file_projection = std::move(resolved.file_projection);
+    if (!build_struct_predicate_target(*mapping_it, target->file_projection,
+                                       &target->file_target)) {
+        return false;
+    }
+    return true;
+}
+
+static VExprSPtr original_table_literal_for_nested_predicate(const VExprSPtr& literal_expr) {
+    DORIS_CHECK(literal_expr != nullptr);
+    DORIS_CHECK(literal_expr->is_literal());
+    const auto* rewritten_literal = dynamic_cast<const SplitLocalFileLiteral*>(literal_expr.get());
+    if (rewritten_literal == nullptr) {
+        return literal_expr;
+    }
+    return VLiteral::create_shared(rewritten_literal->original_type(),
+                                   rewritten_literal->original_field());
+}
+
+static std::optional<PredicateType> to_column_predicate_type(TExprOpcode::type opcode) {
+    switch (opcode) {
+    case TExprOpcode::EQ:
+        return PredicateType::EQ;
+    case TExprOpcode::NE:
+        return PredicateType::NE;
+    case TExprOpcode::GT:
+        return PredicateType::GT;
+    case TExprOpcode::GE:
+        return PredicateType::GE;
+    case TExprOpcode::LT:
+        return PredicateType::LT;
+    case TExprOpcode::LE:
+        return PredicateType::LE;
+    default:
+        return std::nullopt;
+    }
+}
+
+static TExprOpcode::type reverse_comparison_opcode(TExprOpcode::type opcode) {
+    switch (opcode) {
+    case TExprOpcode::GT:
+        return TExprOpcode::LT;
+    case TExprOpcode::GE:
+        return TExprOpcode::LE;
+    case TExprOpcode::LT:
+        return TExprOpcode::GT;
+    case TExprOpcode::LE:
+        return TExprOpcode::GE;
+    default:
+        return opcode;
+    }
+}
+
+static std::shared_ptr<ColumnPredicate> create_comparison_column_predicate(
+        PredicateType predicate_type, uint32_t column_id, const std::string& column_name,
+        const DataTypePtr& data_type, const Field& value) {
+    switch (predicate_type) {
+    case PredicateType::EQ:
+        return create_comparison_predicate<PredicateType::EQ>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::NE:
+        return create_comparison_predicate<PredicateType::NE>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::GT:
+        return create_comparison_predicate<PredicateType::GT>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::GE:
+        return create_comparison_predicate<PredicateType::GE>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::LT:
+        return create_comparison_predicate<PredicateType::LT>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::LE:
+        return create_comparison_predicate<PredicateType::LE>(column_id, column_name, data_type,
+                                                              value, false);
+    default:
+        return nullptr;
+    }
+}
+
+static bool extract_child_id_path_from_projection(const LocalColumnIndex& root_projection,
+                                                  std::vector<int32_t>* file_child_id_path) {
+    DORIS_CHECK(file_child_id_path != nullptr);
+    file_child_id_path->clear();
+    const auto* current_projection = &root_projection;
+    while (!current_projection->children.empty()) {
+        if (current_projection->children.size() != 1) {
+            file_child_id_path->clear();
+            return false;
+        }
+        current_projection = &current_projection->children[0];
+        file_child_id_path->push_back(current_projection->local_id());
+    }
+    return !file_child_id_path->empty();
+}
+
+static std::shared_ptr<ColumnPredicate> build_nested_comparison_predicate(
+        const VExprSPtr& literal_expr, TExprOpcode::type opcode, LocalColumnId root_file_column_id,
+        const std::string& leaf_name, const DataTypePtr& file_leaf_type) {
+    if (literal_expr == nullptr || !literal_expr->is_literal() || file_leaf_type == nullptr) {
+        return nullptr;
+    }
+    const auto predicate_type = to_column_predicate_type(opcode);
+    if (!predicate_type.has_value()) {
+        return nullptr;
+    }
+    const auto original_literal = original_table_literal_for_nested_predicate(literal_expr);
+    const Field original_field = literal_field(original_literal);
+    Field file_field;
+    try {
+        convert_field_to_type(original_field, *file_leaf_type, &file_field,
+                              original_literal->data_type().get());
+    } catch (const Exception&) {
+        return nullptr;
+    }
+    if (file_field.is_null()) {
+        return nullptr;
+    }
+    try {
+        return create_comparison_column_predicate(*predicate_type,
+                                                  cast_set<uint32_t>(root_file_column_id.value()),
+                                                  leaf_name, file_leaf_type, file_field);
+    } catch (const Exception&) {
+        return nullptr;
+    }
+}
+
+static std::shared_ptr<ColumnPredicate> build_nested_in_list_predicate(
+        const VExprSPtrs& literal_exprs, LocalColumnId root_file_column_id,
+        const std::string& leaf_name, const DataTypePtr& file_leaf_type) {
+    if (literal_exprs.empty() || file_leaf_type == nullptr) {
+        return nullptr;
+    }
+
+    auto value_column = file_leaf_type->create_column();
+    for (const auto& literal_expr : literal_exprs) {
+        if (literal_expr == nullptr || !literal_expr->is_literal()) {
+            return nullptr;
+        }
+        const auto original_literal = original_table_literal_for_nested_predicate(literal_expr);
+        const Field original_field = literal_field(original_literal);
+        Field file_field;
+        try {
+            convert_field_to_type(original_field, *file_leaf_type, &file_field,
+                                  original_literal->data_type().get());
+        } catch (const Exception&) {
+            return nullptr;
+        }
+        if (file_field.is_null()) {
+            return nullptr;
+        }
+        value_column->insert(file_field);
+    }
+
+    std::shared_ptr<HybridSetBase> values;
+    try {
+        values.reset(create_set(file_leaf_type->get_primitive_type(), literal_exprs.size(), false));
+        ColumnPtr value_column_ptr = std::move(value_column);
+        values->insert_range_from(value_column_ptr, 0, value_column_ptr->size());
+        return create_in_list_predicate<PredicateType::IN_LIST>(
+                cast_set<uint32_t>(root_file_column_id.value()), leaf_name, file_leaf_type, values,
+                false);
+    } catch (const Exception&) {
+        return nullptr;
+    }
+}
+
+static std::shared_ptr<ColumnPredicate> build_nested_null_predicate(
+        bool is_null, LocalColumnId root_file_column_id, const std::string& leaf_name,
+        const DataTypePtr& file_leaf_type) {
+    if (file_leaf_type == nullptr) {
+        return nullptr;
+    }
+    const auto leaf_primitive_type = remove_nullable(file_leaf_type)->get_primitive_type();
+    return NullPredicate::create_shared(cast_set<uint32_t>(root_file_column_id.value()), leaf_name,
+                                        is_null, leaf_primitive_type);
+}
+
+static bool set_nested_column_filter_target(const NestedPredicateTarget& target,
+                                            FileColumnPredicateFilter* column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    std::vector<int32_t> file_child_id_path;
+    if (!extract_child_id_path_from_projection(target.file_projection, &file_child_id_path)) {
+        return false;
+    }
+    column_filter->file_column_id = target.file_projection.column_id();
+    column_filter->file_child_id_path = std::move(file_child_id_path);
+    column_filter->target = target.file_target;
+    return true;
+}
+
+static bool extract_nested_binary_comparison_filter(const VExprSPtr& expr,
+                                                    const std::vector<ColumnMapping>& mappings,
+                                                    FileColumnPredicateFilter* column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    if (!is_binary_comparison_predicate(expr)) {
+        return false;
+    }
+    NestedStructPath path;
+    VExprSPtr literal_expr;
+    TExprOpcode::type opcode = expr->op();
+    if (extract_nested_struct_path_for_pruning(expr->children()[0], &path) &&
+        expr->children()[1]->is_literal()) {
+        literal_expr = expr->children()[1];
+    } else if (extract_nested_struct_path_for_pruning(expr->children()[1], &path) &&
+               expr->children()[0]->is_literal()) {
+        literal_expr = expr->children()[0];
+        opcode = reverse_comparison_opcode(opcode);
+    } else {
+        return false;
+    }
+
+    NestedPredicateTarget target;
+    if (!resolve_nested_predicate_target(path, mappings, &target)) {
+        return false;
+    }
+    auto predicate = build_nested_comparison_predicate(literal_expr, opcode,
+                                                       target.file_projection.column_id(),
+                                                       target.leaf_name, target.leaf_type);
+    if (predicate == nullptr) {
+        return false;
+    }
+    if (!set_nested_column_filter_target(target, column_filter)) {
+        return false;
+    }
+    column_filter->predicates.push_back(std::move(predicate));
+    return true;
+}
+
+static bool extract_nested_in_list_filter(const VExprSPtr& expr,
+                                          const std::vector<ColumnMapping>& mappings,
+                                          FileColumnPredicateFilter* column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    if (expr == nullptr || expr->node_type() != TExprNodeType::IN_PRED ||
+        expr->get_num_children() < 2) {
+        return false;
+    }
+    if (const auto* in_predicate = dynamic_cast<const VInPredicate*>(expr.get());
+        in_predicate != nullptr && in_predicate->is_not_in()) {
+        return false;
+    }
+
+    NestedStructPath path;
+    if (!extract_nested_struct_path_for_pruning(expr->children()[0], &path)) {
+        return false;
+    }
+
+    VExprSPtrs literal_exprs;
+    literal_exprs.reserve(expr->get_num_children() - 1);
+    for (size_t child_idx = 1; child_idx < expr->children().size(); ++child_idx) {
+        if (!expr->children()[child_idx]->is_literal()) {
+            return false;
+        }
+        literal_exprs.push_back(expr->children()[child_idx]);
+    }
+
+    NestedPredicateTarget target;
+    if (!resolve_nested_predicate_target(path, mappings, &target)) {
+        return false;
+    }
+    auto predicate = build_nested_in_list_predicate(
+            literal_exprs, target.file_projection.column_id(), target.leaf_name, target.leaf_type);
+    if (predicate == nullptr) {
+        return false;
+    }
+    if (!set_nested_column_filter_target(target, column_filter)) {
+        return false;
+    }
+    column_filter->predicates.push_back(std::move(predicate));
+    return true;
+}
+
+static bool extract_nested_null_filter(const VExprSPtr& expr,
+                                       const std::vector<ColumnMapping>& mappings,
+                                       FileColumnPredicateFilter* column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    bool is_null = false;
+    if (!is_null_predicate_function(expr, &is_null)) {
+        return false;
+    }
+
+    NestedStructPath path;
+    if (!extract_nested_struct_path_for_pruning(expr->children()[0], &path)) {
+        return false;
+    }
+
+    NestedPredicateTarget target;
+    if (!resolve_nested_predicate_target(path, mappings, &target)) {
+        return false;
+    }
+    auto predicate = build_nested_null_predicate(is_null, target.file_projection.column_id(),
+                                                 target.leaf_name, target.leaf_type);
+    if (predicate == nullptr) {
+        return false;
+    }
+    if (!set_nested_column_filter_target(target, column_filter)) {
+        return false;
+    }
+    column_filter->predicates.push_back(std::move(predicate));
+    return true;
+}
+
+} // namespace
+
+SplitLocalFileLiteral::SplitLocalFileLiteral(const DataTypePtr& file_type, const Field& file_field,
+                                             DataTypePtr original_type, Field original_field)
+        : VLiteral(file_type, file_field),
+          _original_type(std::move(original_type)),
+          _original_field(std::move(original_field)) {}
+
+GlobalIndex slot_ref_global_index(const VSlotRef& slot_ref) {
+    DORIS_CHECK(slot_ref.column_id() >= 0);
+    return GlobalIndex(cast_set<size_t>(slot_ref.column_id()));
+}
+
+bool is_struct_element_expr(const VExprSPtr& expr) {
+    if (expr == nullptr || expr->get_num_children() != 2) {
+        return false;
+    }
+    const auto& function_name = expr->fn().name.function_name;
+    if (function_name == "struct_element") {
+        return true;
+    }
+    if (function_name != "element_at") {
+        return false;
+    }
+    const auto& parent_type = expr->children()[0]->data_type();
+    return parent_type != nullptr &&
+           remove_nullable(parent_type)->get_primitive_type() == TYPE_STRUCT;
+}
+
+Field literal_field(const VExprSPtr& literal_expr) {
+    DORIS_CHECK(literal_expr != nullptr);
+    DORIS_CHECK(literal_expr->is_literal());
+    const auto* literal = dynamic_cast<const VLiteral*>(literal_expr.get());
+    DORIS_CHECK(literal != nullptr);
+    Field field;
+    literal->get_column_ptr()->get(0, field);
+    return field;
+}
+
+bool resolve_nested_struct_path_for_file(const NestedStructPath& path,
+                                         const std::vector<ColumnMapping>& mappings,
+                                         ResolvedNestedStructPath* resolved,
+                                         bool require_scan_projection) {
+    DORIS_CHECK(resolved != nullptr);
+    *resolved = {};
+    const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) {
+        return mapping.global_index == path.root_global_index;
+    });
+    if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value() ||
+        path.selectors.empty()) {
+        return false;
+    }
+
+    // Prefer ColumnMapping over schema-name lookup. This is the only path that can correctly
+    // localize renamed Iceberg fields: a table filter `element_at(s, 'renamed_b')` must become a
+    // file filter on physical child `b`, even if the old file type is `STRUCT<b ...>`.
+    const auto mapping_result =
+            resolve_nested_projection_with_mapping(path, mappings, &resolved->file_projection);
+    if (mapping_result == NestedProjectionResolveResult::MISSING_FILE_CHILD) {
+        return false;
+    }
+    if (mapping_result == NestedProjectionResolveResult::NOT_REPRESENTED) {
+        if (!table_root_is_struct(*mapping_it)) {
+            return false;
+        }
+        LocalColumnIndex child_projection;
+        if (!build_file_child_projection_from_schema(mapping_it->original_file_children,
+                                                     path.selectors, &child_projection)
+                     .ok() ||
+            child_projection.local_id() < 0) {
+            return false;
+        }
+        resolved->file_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id);
+        resolved->file_projection.children.push_back(std::move(child_projection));
+    }
+
+    if (resolved->file_projection.children.size() != 1) {
+        *resolved = {};
+        return false;
+    }
+    // When rewriting the final localized element_at chain, it executes on the file column produced
+    // by this scan, so the intermediate return types must match the projected file shape, not the
+    // full historical file schema. Example:
+    //   SELECT s.c WHERE element_at(element_at(s, 'b'), 'cc') LIKE 'NestedC%'
+    // reads only b.cc and c; the inner element_at(s, 'b') returns Struct(cc), not
+    // Struct(cc, new_dd).
+    //
+    // Earlier projection collection also calls this resolver before filter-only children have been
+    // merged into the scan projection. That phase only needs the file path, so it still resolves
+    // names/types from the original file schema.
+    const auto& child_source = require_scan_projection ? scan_file_children(*mapping_it)
+                                                       : mapping_it->original_file_children;
+    if (!collect_file_child_names_from_projection(
+                child_source, resolved->file_projection.children[0], &resolved->file_child_names,
+                &resolved->file_child_types) ||
+        resolved->file_child_names.size() != path.selectors.size() ||
+        resolved->file_child_types.size() != path.selectors.size()) {
+        *resolved = {};
+        return false;
+    }
+    return true;
+}
+
+bool resolve_nested_struct_expr_for_file(const VExprSPtr& expr,
+                                         const std::vector<ColumnMapping>& mappings,
+                                         ResolvedNestedStructPath* resolved) {
+    DORIS_CHECK(resolved != nullptr);
+    NestedStructPath path;
+    if (!extract_nested_struct_path(expr, &path)) {
+        *resolved = {};
+        return false;
+    }
+    return resolve_nested_struct_path_for_file(path, mappings, resolved, true);
+}
+
+// Collect nested struct leaf references that can be turned into file-reader projections and
+// primitive pruning predicates. For example, from `s.a > 1 AND element_at(s, 'b') = 2`, this
+// records two paths rooted at `s`: `s -> a` and `s -> b`. Non-struct expressions are traversed
+// recursively, while a recognized struct path is emitted once so the caller can merge it into the
+// scan projection for that top-level file column.
+void collect_nested_struct_paths(const VExprSPtr& expr, std::vector<NestedStructPath>* paths) {
+    DORIS_CHECK(paths != nullptr);
+    if (expr == nullptr) {
+        return;
+    }
+    NestedStructPath path;
+    if (extract_nested_struct_path_for_pruning(expr, &path)) {
+        paths->push_back(std::move(path));
+        return;
+    }
+    for (const auto& child : expr->children()) {
+        collect_nested_struct_paths(child, paths);
+    }
+}
+
+std::vector<const ColumnMapping*> present_child_mappings_in_file_order(
+        const std::vector<ColumnMapping>& child_mappings) {
+    std::vector<const ColumnMapping*> result;
+    result.reserve(child_mappings.size());
+    for (const auto& child_mapping : child_mappings) {
+        if (child_mapping.file_local_id.has_value()) {
+            result.push_back(&child_mapping);
+        }
+    }
+    std::ranges::sort(result, [](const ColumnMapping* lhs, const ColumnMapping* rhs) {
+        DORIS_CHECK(lhs->file_local_id.has_value());
+        DORIS_CHECK(rhs->file_local_id.has_value());
+        return *lhs->file_local_id < *rhs->file_local_id;
+    });
+    return result;
+}
+
+// Build the nested child projection under a top-level file column by walking file schema children
+// directly. The returned projection does not include the root column id; callers attach it under a
+// `LocalColumnIndex::partial_local(root_id)` when merging into the scan request.
+Status build_file_child_projection_from_schema(const std::vector<ColumnDefinition>& children,
+                                               std::span<const StructChildSelector> selectors,
+                                               LocalColumnIndex* projection) {
+    DORIS_CHECK(projection != nullptr);
+    if (selectors.empty()) {
+        return Status::InvalidArgument("Nested struct selector path is empty");
+    }
+    const auto* child = resolve_file_child(children, selectors.front());
+    if (child == nullptr) {
+        return Status::OK();
+    }
+    *projection = LocalColumnIndex::local(child->file_local_id());
+    projection->project_all_children = selectors.size() == 1;
+    projection->children.clear();
+    if (selectors.size() == 1) {
+        return Status::OK();
+    }
+    if (child->children.empty() ||
+        remove_nullable(child->type)->get_primitive_type() != TYPE_STRUCT) {
+        *projection = LocalColumnIndex {};
+        return Status::OK();
+    }
+    LocalColumnIndex child_projection;
+    RETURN_IF_ERROR(build_file_child_projection_from_schema(child->children, selectors.subspan(1),
+                                                            &child_projection));
+    if (child_projection.local_id() < 0) {
+        *projection = LocalColumnIndex {};
+        return Status::OK();
+    }
+    projection->children.push_back(std::move(child_projection));
+    return Status::OK();
+}
+
+// Merge predicates that target the same physical file column or nested leaf. For example,
+// `WHERE s.b > 1 AND s.b < 10` produces two predicates for the same target `s -> b`; keeping them
+// in one FileColumnPredicateFilter lets the file reader apply both pruning checks to the same leaf
+// instead of carrying duplicate target entries.
+void merge_column_predicate_filter(FileColumnPredicateFilter column_filter,
+                                   std::vector<FileColumnPredicateFilter>* filters) {
+    DORIS_CHECK(filters != nullptr);
+    auto existing_filter_it = std::ranges::find_if(*filters, [&](const auto& existing_filter) {
+        return existing_filter.same_target_as(column_filter);
+    });
+    if (existing_filter_it == filters->end()) {
+        filters->push_back(std::move(column_filter));
+        return;
+    }
+    existing_filter_it->predicates.insert(existing_filter_it->predicates.end(),
+                                          column_filter.predicates.begin(),
+                                          column_filter.predicates.end());
+}
+
+// Extract file-column pruning predicates from localized row-level conjuncts that reference nested
+// struct leaves. This is separate from file_request->conjuncts: conjuncts do row filtering, while
+// FileColumnPredicateFilter carries primitive leaf predicates for file/page/statistics pruning.
+//
+// Example: for `WHERE s.b.c > 10 AND element_at(s, 'd') IS NOT NULL`, this function emits pruning
+// filters for the nested targets `s -> b -> c` and `s -> d`. The caller only invokes it after
+// table_filter_has_only_local_entries() succeeds, so each root slot already has a file-local scan
+// source in _filter_entries.
+void collect_nested_column_predicate_filters(const VExprSPtr& expr,
+                                             const std::vector<ColumnMapping>& mappings,
+                                             std::vector<FileColumnPredicateFilter>* filters) {
+    DORIS_CHECK(filters != nullptr);
+    if (expr == nullptr) {
+        return;
+    }
+    if (expr->node_type() == TExprNodeType::COMPOUND_PRED &&
+        expr->op() == TExprOpcode::COMPOUND_AND) {
+        for (const auto& child : expr->children()) {
+            collect_nested_column_predicate_filters(child, mappings, filters);
+        }
+        return;
+    }
+    FileColumnPredicateFilter column_filter;
+    if (extract_nested_binary_comparison_filter(expr, mappings, &column_filter) ||
+        extract_nested_in_list_filter(expr, mappings, &column_filter) ||
+        extract_nested_null_filter(expr, mappings, &column_filter)) {
+        merge_column_predicate_filter(std::move(column_filter), filters);
+    }
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/column_mapper_nested.h b/be/src/format_v2/column_mapper_nested.h
new file mode 100644
index 00000000000000..b8b3f1f3334a8f
--- /dev/null
+++ b/be/src/format_v2/column_mapper_nested.h
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <span>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/field.h"
+#include "exprs/vexpr_fwd.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/file_reader.h"
+
+namespace doris::format {
+
+struct StructChildSelector {
+    bool by_name = true;
+    std::string name;
+    size_t ordinal = 0;
+};
+
+struct NestedStructPath {
+    GlobalIndex root_global_index;
+    std::vector<StructChildSelector> selectors;
+};
+
+struct ResolvedNestedStructPath {
+    LocalColumnIndex file_projection;
+    std::vector<std::string> file_child_names;
+    std::vector<DataTypePtr> file_child_types;
+};
+
+// A split-local literal produced by slot-literal predicate localization. This wrapper keeps the
+// original table literal so a cloned conjunct can be localized again for another split.
+class SplitLocalFileLiteral final : public VLiteral {
+public:
+    SplitLocalFileLiteral(const DataTypePtr& file_type, const Field& file_field,
+                          DataTypePtr original_type, Field original_field);
+
+    const DataTypePtr& original_type() const { return _original_type; }
+    const Field& original_field() const { return _original_field; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        Field file_field;
+        get_column_ptr()->get(0, file_field);
+        *cloned_expr = std::make_shared<SplitLocalFileLiteral>(_data_type, file_field,
+                                                               _original_type, _original_field);
+        return Status::OK();
+    }
+
+private:
+    DataTypePtr _original_type;
+    Field _original_field;
+};
+
+GlobalIndex slot_ref_global_index(const VSlotRef& slot_ref);
+bool is_struct_element_expr(const VExprSPtr& expr);
+Field literal_field(const VExprSPtr& literal_expr);
+
+bool resolve_nested_struct_path_for_file(const NestedStructPath& path,
+                                         const std::vector<ColumnMapping>& mappings,
+                                         ResolvedNestedStructPath* resolved,
+                                         bool require_scan_projection = false);
+
+bool resolve_nested_struct_expr_for_file(const VExprSPtr& expr,
+                                         const std::vector<ColumnMapping>& mappings,
+                                         ResolvedNestedStructPath* resolved);
+
+void collect_nested_struct_paths(const VExprSPtr& expr, std::vector<NestedStructPath>* paths);
+
+std::vector<const ColumnMapping*> present_child_mappings_in_file_order(
+        const std::vector<ColumnMapping>& child_mappings);
+
+Status build_file_child_projection_from_schema(const std::vector<ColumnDefinition>& children,
+                                               std::span<const StructChildSelector> selectors,
+                                               LocalColumnIndex* projection);
+
+void merge_column_predicate_filter(FileColumnPredicateFilter column_filter,
+                                   std::vector<FileColumnPredicateFilter>* filters);
+
+void collect_nested_column_predicate_filters(const VExprSPtr& expr,
+                                             const std::vector<ColumnMapping>& mappings,
+                                             std::vector<FileColumnPredicateFilter>* filters);
+
+} // namespace doris::format
diff --git a/be/src/format_v2/delimited_text/csv_reader.cpp b/be/src/format_v2/delimited_text/csv_reader.cpp
new file mode 100644
index 00000000000000..711146a9880479
--- /dev/null
+++ b/be/src/format_v2/delimited_text/csv_reader.cpp
@@ -0,0 +1,295 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/csv_reader.h"
+
+#include <cstring>
+#include <utility>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type_serde/data_type_string_serde.h"
+#include "format/file_reader/new_plain_binary_line_reader.h"
+#include "format/file_reader/new_plain_text_line_reader.h"
+#include "gen_cpp/internal_service.pb.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/decompressor.h"
+#include "util/utf8_check.h"
+
+namespace doris::format::csv {
+namespace {
+
+bool starts_with_at(const Slice& line, size_t pos, const std::string& needle) {
+    return !needle.empty() && pos + needle.size() <= line.size &&
+           std::memcmp(line.data + pos, needle.data(), needle.size()) == 0;
+}
+
+bool is_csv_text_format(TFileFormatType::type format_type) {
+    switch (format_type) {
+    case TFileFormatType::FORMAT_CSV_PLAIN:
+    case TFileFormatType::FORMAT_CSV_GZ:
+    case TFileFormatType::FORMAT_CSV_BZ2:
+    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+    case TFileFormatType::FORMAT_CSV_LZOP:
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+    case TFileFormatType::FORMAT_CSV_DEFLATE:
+        return true;
+    default:
+        return false;
+    }
+}
+
+} // namespace
+
+CsvReader::CsvReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                     std::unique_ptr<io::FileDescription>& file_description,
+                     std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                     const TFileScanRangeParams* scan_params,
+                     const std::vector<SlotDescriptor*>& file_slot_descs,
+                     TFileCompressType::type range_compress_type,
+                     std::optional<TUniqueId> stream_load_id)
+        : DelimitedTextReader(system_properties, file_description, std::move(io_ctx), profile,
+                              scan_params, file_slot_descs, range_compress_type,
+                              std::move(stream_load_id), "CSV") {}
+
+CsvReader::~CsvReader() = default;
+
+Status CsvReader::_init_format_state() {
+    _file_format_type = _scan_params->format_type;
+    _file_compress_type =
+            _range_compress_type != TFileCompressType::UNKNOWN
+                    ? _range_compress_type
+                    : (_scan_params->__isset.compress_type ? _scan_params->compress_type
+                                                           : TFileCompressType::UNKNOWN);
+    if (_file_compress_type == TFileCompressType::UNKNOWN &&
+        _file_format_type == TFileFormatType::FORMAT_CSV_PLAIN) {
+        // FORMAT_CSV_PLAIN is an uncompressed byte stream even when FE does not fill
+        // compress_type. Non-first splits rely on this normalization; otherwise UNKNOWN would be
+        // rejected by the split-compressed-file guard in the shared reader base.
+        _file_compress_type = TFileCompressType::PLAIN;
+    }
+
+    const auto& text_params = _scan_params->file_attributes.text_params;
+    _value_separator = text_params.column_separator;
+    _line_delimiter = text_params.line_delimiter;
+    if (text_params.__isset.enclose) {
+        _enclose = text_params.enclose;
+    }
+    if (text_params.__isset.escape) {
+        _escape = text_params.escape;
+    }
+    _trim_tailing_spaces = _runtime_state != nullptr &&
+                           _runtime_state->trim_tailing_spaces_for_external_table_query();
+    _options.escape_char = _escape;
+    _options.quote_char = _enclose;
+    _options.collection_delim =
+            text_params.collection_delimiter.empty() ? ',' : text_params.collection_delimiter[0];
+    _options.map_key_delim =
+            text_params.mapkv_delimiter.empty() ? ':' : text_params.mapkv_delimiter[0];
+    if (text_params.__isset.null_format) {
+        _options.null_format = text_params.null_format.data();
+        _options.null_len = text_params.null_format.length();
+    }
+    if (_scan_params->file_attributes.__isset.trim_double_quotes) {
+        _trim_double_quotes = _scan_params->file_attributes.trim_double_quotes;
+    }
+    _options.converted_from_string = _trim_double_quotes;
+    if (_runtime_state != nullptr) {
+        _keep_cr = _runtime_state->query_options().keep_carriage_return;
+    }
+    if (text_params.__isset.empty_field_as_null) {
+        _empty_field_as_null = text_params.empty_field_as_null;
+    }
+    return Status::OK();
+}
+
+Status CsvReader::_create_decompressor() {
+    if (_file_compress_type != TFileCompressType::UNKNOWN) {
+        return Decompressor::create_decompressor(_file_compress_type, &_decompressor);
+    }
+    return Decompressor::create_decompressor(_file_format_type, &_decompressor);
+}
+
+Status CsvReader::_create_line_reader() {
+    if (is_csv_text_format(_file_format_type)) {
+        std::shared_ptr<TextLineReaderContextIf> text_line_reader_ctx;
+        if (_enclose == 0) {
+            text_line_reader_ctx = std::make_shared<PlainTextLineReaderCtx>(
+                    _line_delimiter, _line_delimiter.size(), _keep_cr);
+        } else {
+            // The enclosed-line context finds logical records that may span physical newlines.
+            // Field slicing still happens in `_split_line()` because the v2 scan request may ask
+            // for CSV ordinals in a different order from the physical file.
+            const size_t col_sep_num =
+                    _source_file_slot_descs.size() > 1 ? _source_file_slot_descs.size() - 1 : 0;
+            text_line_reader_ctx = std::make_shared<EncloseCsvLineReaderCtx>(
+                    _line_delimiter, _line_delimiter.size(), _value_separator,
+                    _value_separator.size(), col_sep_num, _enclose, _escape, _keep_cr);
+        }
+        _line_reader = NewPlainTextLineReader::create_unique(
+                _profile, _file_reader, _decompressor.get(), std::move(text_line_reader_ctx), _size,
+                _start_offset);
+        return Status::OK();
+    }
+    if (_file_format_type == TFileFormatType::FORMAT_PROTO) {
+        _line_reader = NewPlainBinaryLineReader::create_unique(_file_reader);
+        return Status::OK();
+    }
+    return Status::InternalError<false>("Unknown CSV format type {}", _file_format_type);
+}
+
+Status CsvReader::_validate_line(const Slice& line) {
+    if (_file_format_type != TFileFormatType::FORMAT_PROTO && _enable_text_validate_utf8 &&
+        !validate_utf8(line.data, line.size)) {
+        return Status::InternalError<false>("Only support csv data in utf8 codec");
+    }
+    return Status::OK();
+}
+
+void CsvReader::_split_line(const Slice& line) {
+    _split_values.clear();
+    if (_file_format_type == TFileFormatType::FORMAT_PROTO) {
+        auto** row_ptr = reinterpret_cast<PDataRow**>(line.data);
+        PDataRow* row = *row_ptr;
+        for (const PDataColumn& col : row->col()) {
+            _split_values.emplace_back(col.value());
+        }
+        return;
+    }
+
+    // The text line reader is responsible for split boundaries and multi-line quoted fields.
+    // Field slicing still happens here because FileScannerV2 asks columns by file-local id, so we
+    // must be able to materialize only the requested CSV ordinals without building a row object.
+    // Example: for `1,"a,b",10` and column separator `,`, this loop returns three slices:
+    // `1`, `a,b`, and `10`; the comma inside quotes does not create an extra field.
+    bool in_quote = false;
+    bool escaped = false;
+    size_t start = 0;
+    size_t i = 0;
+    while (i < line.size) {
+        const char ch = line.data[i];
+        if (_enclose != 0) {
+            if (escaped) {
+                escaped = false;
+                ++i;
+                continue;
+            }
+            if (_escape != 0 && ch == _escape) {
+                escaped = true;
+                ++i;
+                continue;
+            }
+            if (ch == _enclose) {
+                if (in_quote && i + 1 < line.size && line.data[i + 1] == _enclose) {
+                    i += 2;
+                    continue;
+                }
+                in_quote = !in_quote;
+                ++i;
+                continue;
+            }
+        }
+        if (!in_quote && starts_with_at(line, i, _value_separator)) {
+            size_t value_start = start;
+            size_t value_len = i - start;
+            while (_trim_tailing_spaces && value_len > 0 &&
+                   line.data[value_start + value_len - 1] == ' ') {
+                --value_len;
+            }
+            if (_trim_double_quotes && value_len > 1 && line.data[value_start] == '"' &&
+                line.data[value_start + value_len - 1] == '"') {
+                ++value_start;
+                value_len -= 2;
+            } else if (_enclose != 0 && value_len > 1 && line.data[value_start] == _enclose &&
+                       line.data[value_start + value_len - 1] == _enclose) {
+                ++value_start;
+                value_len -= 2;
+            }
+            _split_values.emplace_back(line.data + value_start, value_len);
+            i += _value_separator.size();
+            start = i;
+            continue;
+        }
+        ++i;
+    }
+
+    size_t value_start = start;
+    size_t value_len = line.size - start;
+    while (_trim_tailing_spaces && value_len > 0 && line.data[value_start + value_len - 1] == ' ') {
+        --value_len;
+    }
+    if (_trim_double_quotes && value_len > 1 && line.data[value_start] == '"' &&
+        line.data[value_start + value_len - 1] == '"') {
+        ++value_start;
+        value_len -= 2;
+    } else if (_enclose != 0 && value_len > 1 && line.data[value_start] == _enclose &&
+               line.data[value_start + value_len - 1] == _enclose) {
+        ++value_start;
+        value_len -= 2;
+    }
+    _split_values.emplace_back(line.data + value_start, value_len);
+}
+
+Status CsvReader::_deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                        Slice value) {
+    DORIS_CHECK(output != nullptr);
+    if (column.nullable_string_fast_path) {
+        auto& null_column = assert_cast<ColumnNullable&>(*output);
+        // String is the hottest CSV type. Avoid the generic nullable serde wrapper here:
+        // deserialize directly into the nested string column and append the null map bit ourselves.
+        if (_empty_field_as_null && value.size == 0) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        // CSV keeps empty-field handling separate from null_format matching. An empty
+        // null_format must not turn every empty CSV field into NULL unless FE explicitly sets
+        // empty_field_as_null; OpenCSV-compatible tables expect empty fields to stay empty strings.
+        if (_options.null_len > 0 && value.size == _options.null_len &&
+            std::memcmp(value.data, _options.null_format, value.size) == 0) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        static DataTypeStringSerDe string_serde(TYPE_STRING);
+        auto status = string_serde.deserialize_one_cell_from_csv(null_column.get_nested_column(),
+                                                                 value, _options);
+        if (!status.ok()) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        null_column.get_null_map_data().push_back(0);
+        return Status::OK();
+    }
+    return column.serde->deserialize_one_cell_from_csv(*output, value, _options);
+}
+
+Slice CsvReader::_normalize_value(Slice value) const {
+    if (_empty_field_as_null && value.size == 0) {
+        return Slice(_options.null_format, _options.null_len);
+    }
+    return value;
+}
+
+bool CsvReader::_can_split() const {
+    return (_file_compress_type == TFileCompressType::PLAIN) ||
+           (_file_compress_type == TFileCompressType::UNKNOWN &&
+            _file_format_type == TFileFormatType::FORMAT_CSV_PLAIN);
+}
+
+} // namespace doris::format::csv
diff --git a/be/src/format_v2/delimited_text/csv_reader.h b/be/src/format_v2/delimited_text/csv_reader.h
new file mode 100644
index 00000000000000..e5d1ce25a74f40
--- /dev/null
+++ b/be/src/format_v2/delimited_text/csv_reader.h
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+
+#include "format_v2/delimited_text/delimited_text_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "util/slice.h"
+
+namespace doris {
+class SlotDescriptor;
+} // namespace doris
+
+namespace doris::format::csv {
+
+// FileScannerV2 CSV reader.
+//
+// CSV files do not carry a physical schema. FE provides the table slot descriptors plus
+// TFileScanRangeParams::column_idxs, where each file slot maps to a CSV field ordinal. This reader
+// exposes that information as a v2 file-local schema and implements CSV parsing directly in the v2
+// FileReader contract.
+class CsvReader final : public ::doris::format::DelimitedTextReader {
+public:
+    // `file_slot_descs` must contain only columns physically readable from the CSV payload.
+    // Partition/default/virtual columns are materialized by TableReader after this reader returns
+    // a file-local block. Keeping that boundary is important because CSV has no embedded schema
+    // from which those non-file columns could be derived.
+    CsvReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+              std::unique_ptr<io::FileDescription>& file_description,
+              std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+              const TFileScanRangeParams* scan_params,
+              const std::vector<SlotDescriptor*>& file_slot_descs,
+              TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN,
+              std::optional<TUniqueId> stream_load_id = std::nullopt);
+    ~CsvReader() override;
+
+private:
+    Status _init_format_state() override;
+    Status _create_decompressor() override;
+    Status _create_line_reader() override;
+    Status _validate_line(const Slice& line) override;
+    void _split_line(const Slice& line) override;
+    Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                 Slice value) override;
+    Slice _normalize_value(Slice value) const override;
+    bool _can_split() const override;
+
+    TFileFormatType::type _file_format_type = TFileFormatType::FORMAT_CSV_PLAIN;
+    char _enclose = 0;
+    bool _trim_double_quotes = false;
+    bool _trim_tailing_spaces = false;
+    bool _empty_field_as_null = false;
+    bool _keep_cr = false;
+};
+
+} // namespace doris::format::csv
diff --git a/be/src/format_v2/delimited_text/delimited_text_reader.cpp b/be/src/format_v2/delimited_text/delimited_text_reader.cpp
new file mode 100644
index 00000000000000..ba4986ee531740
--- /dev/null
+++ b/be/src/format_v2/delimited_text/delimited_text_reader.cpp
@@ -0,0 +1,666 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/delimited_text_reader.h"
+
+#include <algorithm>
+#include <cstring>
+#include <limits>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/vexpr_context.h"
+#include "format/line_reader.h"
+#include "format_v2/column_mapper.h"
+#include "io/file_factory.h"
+#include "io/fs/tracing_file_reader.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/decompressor.h"
+#include "util/string_util.h"
+
+namespace doris::format {
+namespace {
+
+constexpr const char* DELIMITED_TEXT_PROFILE = "DelimitedTextReader";
+
+void update_counter(RuntimeProfile::Counter* counter, int64_t value) {
+    if (counter != nullptr) {
+        COUNTER_UPDATE(counter, value);
+    }
+}
+
+DataTypePtr nullable_type(DataTypePtr type) {
+    return type != nullptr && type->is_nullable() ? std::move(type)
+                                                  : make_nullable(std::move(type));
+}
+
+DataTypePtr delimited_file_type_from_slot_type(const DataTypePtr& type) {
+    if (type == nullptr) {
+        return nullptr;
+    }
+
+    const bool is_nullable = type->is_nullable();
+    const auto nested_type = remove_nullable(type);
+    DataTypePtr file_type;
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_CHAR:
+    case TYPE_VARCHAR:
+        // Delimited text files do not carry CHAR/VARCHAR length metadata. FE slot types describe
+        // the table target type, not a bounded physical file type. Expose bounded strings as
+        // unbounded STRING on the file side so TableReader can later enforce the table length.
+        // Example: a TEXT field "hangzhou" mapped to table CHAR(3) must be read as STRING and
+        // truncated to "han" during table materialization.
+        file_type = std::make_shared<DataTypeString>();
+        break;
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        file_type = std::make_shared<DataTypeArray>(
+                delimited_file_type_from_slot_type(array_type->get_nested_type()));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        file_type = std::make_shared<DataTypeMap>(
+                delimited_file_type_from_slot_type(map_type->get_key_type()),
+                delimited_file_type_from_slot_type(map_type->get_value_type()));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        DataTypes file_children;
+        file_children.reserve(struct_type->get_elements().size());
+        for (const auto& child_type : struct_type->get_elements()) {
+            file_children.push_back(delimited_file_type_from_slot_type(child_type));
+        }
+        file_type =
+                std::make_shared<DataTypeStruct>(file_children, struct_type->get_element_names());
+        break;
+    }
+    default:
+        file_type = nested_type;
+        break;
+    }
+
+    return is_nullable ? make_nullable(file_type) : file_type;
+}
+
+ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id);
+
+std::vector<ColumnDefinition> synthesize_file_children_from_type(const DataTypePtr& type) {
+    std::vector<ColumnDefinition> children;
+    if (type == nullptr) {
+        return children;
+    }
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        children.push_back(synthetic_file_child("element", array_type->get_nested_type(), 0));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        children.push_back(synthetic_file_child("key", map_type->get_key_type(), 0));
+        children.push_back(synthetic_file_child("value", map_type->get_value_type(), 1));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        children.reserve(struct_type->get_elements().size());
+        for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) {
+            children.push_back(synthetic_file_child(struct_type->get_element_name(idx),
+                                                    struct_type->get_element(idx),
+                                                    cast_set<int32_t>(idx)));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return children;
+}
+
+ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id) {
+    ColumnDefinition child;
+    child.identifier = Field::create_field<TYPE_STRING>(name);
+    child.local_id = local_id;
+    child.name = name;
+    child.type = std::move(type);
+    child.children = synthesize_file_children_from_type(child.type);
+    return child;
+}
+
+} // namespace
+
+DelimitedTextReader::DelimitedTextReader(
+        std::shared_ptr<io::FileSystemProperties>& system_properties,
+        std::unique_ptr<io::FileDescription>& file_description,
+        std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+        const TFileScanRangeParams* scan_params,
+        const std::vector<SlotDescriptor*>& file_slot_descs,
+        TFileCompressType::type range_compress_type, std::optional<TUniqueId> stream_load_id,
+        std::string reader_name)
+        : FileReader(system_properties, file_description, std::move(io_ctx), profile),
+          _scan_params(scan_params),
+          _source_file_slot_descs(file_slot_descs),
+          _range_compress_type(range_compress_type),
+          _stream_load_id(std::move(stream_load_id)),
+          _reader_name(std::move(reader_name)) {}
+
+DelimitedTextReader::~DelimitedTextReader() {
+    static_cast<void>(close());
+}
+
+void DelimitedTextReader::_init_profile() {
+    if (_profile == nullptr || _text_profile.raw_lines_read != nullptr) {
+        return;
+    }
+
+    ADD_TIMER_WITH_LEVEL(_profile, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.open_file_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "OpenFileTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.create_line_reader_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "CreateLineReaderTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.read_line_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadLineTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.split_line_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "SplitLineTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.deserialize_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DeserializeTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.conjunct_filter_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ConjunctFilterTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.delete_conjunct_filter_time = ADD_CHILD_TIMER_WITH_LEVEL(
+            _profile, "DeleteConjunctFilterTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.raw_lines_read = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RawLinesRead", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.rows_read_before_filter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RowsReadBeforeFilter", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.rows_filtered_by_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RowsFilteredByConjunct", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.rows_filtered_by_delete_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RowsFilteredByDeleteConjunct", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.rows_returned = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RowsReturned", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.empty_lines_read = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "EmptyLinesRead", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.skipped_lines = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "SkippedLines", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.cells_deserialized = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "CellsDeserialized", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+}
+
+Status DelimitedTextReader::init(RuntimeState* state) {
+    _init_profile();
+    _runtime_state = state;
+    if (_scan_params == nullptr) {
+        return Status::InvalidArgument("{} v2 reader requires scan params", _reader_name);
+    }
+    if (_file_description == nullptr) {
+        return Status::InvalidArgument("{} v2 reader requires file description", _reader_name);
+    }
+    if (!_scan_params->__isset.file_attributes ||
+        !_scan_params->file_attributes.__isset.text_params) {
+        return Status::InvalidArgument("{} v2 reader requires text file attributes", _reader_name);
+    }
+    _enable_text_validate_utf8 = !_scan_params->file_attributes.__isset.enable_text_validate_utf8 ||
+                                 _scan_params->file_attributes.enable_text_validate_utf8;
+
+    RETURN_IF_ERROR(_init_format_state());
+
+    // Delimited text files have no physical column ids. FE sends `column_idxs` to describe how
+    // each physical file slot maps to a field ordinal in the text row. The local id exposed in the
+    // file schema is therefore the text-field ordinal, not the slot vector position.
+    _source_column_idxs.clear();
+    if (_scan_params->__isset.column_idxs && !_scan_params->column_idxs.empty()) {
+        if (_scan_params->column_idxs.size() != _source_file_slot_descs.size()) {
+            return Status::InvalidArgument(
+                    "{} v2 reader column_idxs size {} does not match file slot size {}",
+                    _reader_name, _scan_params->column_idxs.size(), _source_file_slot_descs.size());
+        }
+        _source_column_idxs.reserve(_scan_params->column_idxs.size());
+        for (const auto column_idx : _scan_params->column_idxs) {
+            _source_column_idxs.push_back(column_idx);
+        }
+    } else {
+        _source_column_idxs.reserve(_source_file_slot_descs.size());
+        for (size_t i = 0; i < _source_file_slot_descs.size(); ++i) {
+            _source_column_idxs.push_back(static_cast<int32_t>(i));
+        }
+    }
+
+    _source_serdes = create_data_type_serdes(_source_file_slot_descs);
+    _file_schema.clear();
+    _file_schema.reserve(_source_file_slot_descs.size());
+    for (size_t i = 0; i < _source_file_slot_descs.size(); ++i) {
+        const auto* slot = _source_file_slot_descs[i];
+        DORIS_CHECK(slot != nullptr);
+        ColumnDefinition field;
+        field.identifier = Field::create_field<TYPE_STRING>(slot->col_name());
+        field.local_id = _source_column_idxs[i];
+        field.name = slot->col_name();
+        field.type = nullable_type(delimited_file_type_from_slot_type(slot->get_data_type_ptr()));
+        // Delimited text stores a complex value in one top-level text field, but TableColumnMapper
+        // still needs semantic children to localize nested projections and predicates. Expose
+        // ARRAY element, MAP key/value, and STRUCT fields as file-schema children while keeping the
+        // top-level local id as the physical text field ordinal from column_idxs.
+        field.children = synthesize_file_children_from_type(field.type);
+        _file_schema.push_back(std::move(field));
+    }
+    _eof = false;
+    return Status::OK();
+}
+
+Status DelimitedTextReader::get_schema(std::vector<ColumnDefinition>* file_schema) const {
+    if (file_schema == nullptr) {
+        return Status::InvalidArgument("{} v2 file_schema is null", _reader_name);
+    }
+    *file_schema = _file_schema;
+    return Status::OK();
+}
+
+std::unique_ptr<TableColumnMapper> DelimitedTextReader::create_column_mapper(
+        TableColumnMapperOptions options) const {
+    return std::make_unique<MaterializedColumnMapper>(std::move(options));
+}
+
+Status DelimitedTextReader::open(std::shared_ptr<FileScanRequest> request) {
+    RETURN_IF_ERROR(FileReader::open(std::move(request)));
+    DORIS_CHECK(_request != nullptr);
+    RETURN_IF_ERROR(_build_requested_columns(*_request, &_requested_columns));
+    {
+        SCOPED_TIMER(_text_profile.open_file_time);
+        RETURN_IF_ERROR(_open_file());
+    }
+    RETURN_IF_ERROR(_create_decompressor());
+    {
+        SCOPED_TIMER(_text_profile.create_line_reader_time);
+        RETURN_IF_ERROR(_create_line_reader());
+    }
+    _line_reader_eof = false;
+    _bom_removed = false;
+    _eof = false;
+    return Status::OK();
+}
+
+Status DelimitedTextReader::get_block(Block* file_block, size_t* rows, bool* eof) {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    if (_line_reader == nullptr) {
+        return Status::InternalError("{} v2 reader is not open", _reader_name);
+    }
+
+    const auto batch_size = _runtime_state != nullptr ? _runtime_state->batch_size() : 4096;
+    const auto max_block_bytes = _runtime_state != nullptr
+                                         ? _runtime_state->preferred_block_size_bytes()
+                                         : std::numeric_limits<size_t>::max();
+    *rows = 0;
+    *eof = false;
+
+    {
+        auto columns_guard = file_block->mutate_columns_scoped();
+        auto& columns = columns_guard.mutable_columns();
+        // Delimited text readers are column-pruned but not lazy materialized: all file-local
+        // columns requested by TableReader are decoded before file-local conjuncts are evaluated.
+        while (*rows < batch_size && !_line_reader_eof &&
+               Block::columns_byte_size(columns) < max_block_bytes) {
+            Slice line;
+            bool line_eof = false;
+            RETURN_IF_ERROR(_read_next_line(&line, &line_eof));
+            if (line_eof) {
+                break;
+            }
+            RETURN_IF_ERROR(_fill_columns_from_line(line, &columns, rows));
+        }
+    }
+
+    const size_t rows_before_filter = *rows;
+    update_counter(_text_profile.rows_read_before_filter, rows_before_filter);
+
+    size_t rows_after_delete_filter = rows_before_filter;
+    if (_request != nullptr && rows_before_filter > 0 && !_request->delete_conjuncts.empty()) {
+        {
+            SCOPED_TIMER(_text_profile.delete_conjunct_filter_time);
+            RETURN_IF_ERROR(VExprContext::filter_block(_request->delete_conjuncts, file_block,
+                                                       file_block->columns()));
+        }
+        rows_after_delete_filter =
+                file_block->columns() == 0 ? rows_before_filter : file_block->rows();
+        update_counter(_text_profile.rows_filtered_by_delete_conjunct,
+                       rows_before_filter - rows_after_delete_filter);
+    }
+
+    size_t rows_after_filter = rows_after_delete_filter;
+    if (_request != nullptr && rows_after_delete_filter > 0 && !_request->conjuncts.empty()) {
+        {
+            SCOPED_TIMER(_text_profile.conjunct_filter_time);
+            RETURN_IF_ERROR(VExprContext::filter_block(_request->conjuncts, file_block,
+                                                       file_block->columns()));
+        }
+        rows_after_filter =
+                file_block->columns() == 0 ? rows_after_delete_filter : file_block->rows();
+        const auto rows_filtered_by_conjunct = rows_after_delete_filter - rows_after_filter;
+        update_counter(_text_profile.rows_filtered_by_conjunct, rows_filtered_by_conjunct);
+        if (_io_ctx != nullptr) {
+            _io_ctx->predicate_filtered_rows += rows_filtered_by_conjunct;
+        }
+    }
+
+    *rows = rows_after_filter;
+    update_counter(_text_profile.rows_returned, *rows);
+    _reader_statistics.read_rows += *rows;
+    *eof = _line_reader_eof && *rows == 0;
+    _eof = *eof;
+    return Status::OK();
+}
+
+Status DelimitedTextReader::get_aggregate_result(const FileAggregateRequest& request,
+                                                 FileAggregateResult* result) {
+    DORIS_CHECK(result != nullptr);
+    if (request.agg_type != TPushAggOp::type::COUNT) {
+        return Status::NotSupported("{} v2 reader only supports COUNT aggregate pushdown",
+                                    _reader_name);
+    }
+    if (_line_reader == nullptr) {
+        return Status::InternalError("{} v2 reader is not open", _reader_name);
+    }
+
+    int64_t count = 0;
+    while (!_line_reader_eof) {
+        Slice line;
+        bool line_eof = false;
+        RETURN_IF_ERROR(_read_next_line(&line, &line_eof));
+        if (line_eof) {
+            break;
+        }
+        if (line.size == 0) {
+            update_counter(_text_profile.empty_lines_read, 1);
+            if (_empty_line_as_record() ||
+                (_runtime_state != nullptr && _runtime_state->is_read_csv_empty_line_as_null())) {
+                ++count;
+            }
+            continue;
+        }
+        RETURN_IF_ERROR(_validate_line(line));
+        ++count;
+    }
+    result->count = count;
+    result->columns.clear();
+    update_counter(_text_profile.rows_read_before_filter, count);
+    update_counter(_text_profile.rows_returned, count);
+    _reader_statistics.read_rows += count;
+    _eof = true;
+    return Status::OK();
+}
+
+Status DelimitedTextReader::close() {
+    if (_line_reader != nullptr) {
+        _line_reader->close();
+        _line_reader.reset();
+    }
+    _decompressor.reset();
+    _file_reader.reset();
+    _tracing_file_reader.reset();
+    _requested_columns.clear();
+    return Status::OK();
+}
+
+bool DelimitedTextReader::_is_null_format(Slice value) const {
+    if (value.size != _options.null_len) {
+        return false;
+    }
+    if (_options.null_len == 0) {
+        return true;
+    }
+    return std::memcmp(value.data, _options.null_format, value.size) == 0;
+}
+
+Status DelimitedTextReader::_build_requested_columns(const FileScanRequest& request,
+                                                     std::vector<RequestedColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+
+    // `request.local_positions` is keyed by FileReader schema local id. For delimited text readers
+    // that local id is the field ordinal from column_idxs, so reverse-map it to the source slot
+    // descriptor before choosing the serde.
+    std::vector<RequestedColumn> by_position(request.local_positions.size());
+    for (const auto& [file_column_id, block_position] : request.local_positions) {
+        const auto source_it = std::find(_source_column_idxs.begin(), _source_column_idxs.end(),
+                                         file_column_id.value());
+        if (source_it == _source_column_idxs.end()) {
+            return Status::InvalidArgument("{} v2 request references unknown local column id {}",
+                                           _reader_name, file_column_id.value());
+        }
+        const auto source_index = std::distance(_source_column_idxs.begin(), source_it);
+        DORIS_CHECK(source_index >= 0 &&
+                    static_cast<size_t>(source_index) < _source_file_slot_descs.size());
+        if (block_position.value() >= by_position.size()) {
+            return Status::InvalidArgument("{} v2 request has invalid block position {}",
+                                           _reader_name, block_position.value());
+        }
+        const auto* slot = _source_file_slot_descs[source_index];
+        const auto type = slot->get_data_type_ptr();
+        RequestedColumn requested_column;
+        requested_column.file_column_id = file_column_id;
+        requested_column.block_position = block_position;
+        requested_column.slot_desc = slot;
+        requested_column.serde = _source_serdes[source_index];
+        requested_column.nullable_string_fast_path =
+                type->is_nullable() && is_string_type(type->get_primitive_type());
+        by_position[block_position.value()] = std::move(requested_column);
+    }
+
+    for (size_t i = 0; i < by_position.size(); ++i) {
+        if (!by_position[i].file_column_id.is_valid()) {
+            return Status::InvalidArgument("{} v2 request misses block position {}", _reader_name,
+                                           i);
+        }
+    }
+    *columns = std::move(by_position);
+    return Status::OK();
+}
+
+Status DelimitedTextReader::_open_file() {
+    _start_offset = _file_description->range_start_offset;
+    _size = _file_description->range_size;
+    // Some callers, especially stream-load/http_stream, do not know the total length up front.
+    // For a first split this is fine: NewPlainTextLineReader can read until the underlying reader
+    // returns EOF. For non-first splits we still need a concrete range so the pre-read/skip-one-line
+    // boundary logic does not read an unbounded stream.
+    if (_size <= 0 && _file_description->file_size >= 0) {
+        _size = _file_description->file_size - _start_offset;
+    }
+    if (_size < 0 && _start_offset > 0) {
+        return Status::InvalidArgument("{} v2 reader requires a valid split size for {}",
+                                       _reader_name, _file_description->path);
+    }
+    _skip_lines = 0;
+    if (_start_offset == 0) {
+        if (_scan_params->file_attributes.__isset.header_type &&
+            !_scan_params->file_attributes.header_type.empty()) {
+            const auto header_type = to_lower(_scan_params->file_attributes.header_type);
+            if (header_type == BeConsts::CSV_WITH_NAMES) {
+                _skip_lines = 1;
+            } else if (header_type == BeConsts::CSV_WITH_NAMES_AND_TYPES) {
+                _skip_lines = 2;
+            }
+        } else if (_scan_params->file_attributes.__isset.skip_lines) {
+            _skip_lines = _scan_params->file_attributes.skip_lines;
+        }
+    } else {
+        if (!_can_split()) {
+            return Status::InternalError<false>("For now we do not support split compressed file");
+        }
+        // Non-first splits normally start in the middle of a record. Pre-read at most one line
+        // delimiter byte range, then skip one line in `_read_next_line()`, so the first returned
+        // row is always complete. Example with '\n':
+        //   file bytes:  "1,a\n2,b\n"
+        //   split start:     ^
+        //   pre-read:     ^
+        //   skipped line: "a"
+        //   returned row: "2,b"
+        const int64_t pre_read_len =
+                std::min(static_cast<int64_t>(_line_delimiter.size()), _start_offset);
+        _start_offset -= pre_read_len;
+        _size += pre_read_len;
+        _skip_lines = 1;
+    }
+
+    if (_scan_params->file_type == TFileType::FILE_STREAM) {
+        if (!_stream_load_id.has_value()) {
+            return Status::InvalidArgument("{} v2 stream reader requires load id", _reader_name);
+        }
+        // Stream load/http_stream data lives in NewLoadStreamMgr rather than a filesystem. The
+        // generic FileFactory path only supports real file systems, so FILE_STREAM must use the
+        // same pipe-reader lookup as the old CSV reader.
+        RETURN_IF_ERROR(FileFactory::create_pipe_reader(*_stream_load_id, &_file_reader,
+                                                        _runtime_state, /*need_schema=*/false));
+    } else {
+        auto reader_options = FileFactory::get_reader_options(_runtime_state->query_options(),
+                                                              *_file_description);
+        auto file_reader = DORIS_TRY(FileFactory::create_file_reader(
+                *_system_properties, *_file_description, reader_options, _profile));
+        _file_reader = _io_ctx && _io_ctx->file_reader_stats
+                               ? std::make_shared<io::TracingFileReader>(std::move(file_reader),
+                                                                         _io_ctx->file_reader_stats)
+                               : file_reader;
+    }
+    if (_file_reader->size() == 0 && _scan_params->file_type != TFileType::FILE_STREAM &&
+        _scan_params->file_type != TFileType::FILE_BROKER) {
+        return Status::EndOfFile("init reader failed, empty {} file: {}", _reader_name,
+                                 _file_description->path);
+    }
+    return Status::OK();
+}
+
+Status DelimitedTextReader::_read_next_line(Slice* line, bool* eof) {
+    DORIS_CHECK(line != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    while (true) {
+        const uint8_t* ptr = nullptr;
+        size_t size = 0;
+        {
+            SCOPED_TIMER(_text_profile.read_line_time);
+            RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, &_line_reader_eof, _io_ctx.get()));
+        }
+        if (_line_reader_eof && size == 0) {
+            *eof = true;
+            return Status::OK();
+        }
+        if (_skip_lines == 0 && !_bom_removed) {
+            // BOM is stripped only from the first logical data line. Header lines are skipped
+            // before this branch, so a BOM inside a skipped header does not leak into user data.
+            ptr = _remove_bom(ptr, &size);
+            _bom_removed = true;
+        }
+        if (_skip_lines > 0) {
+            --_skip_lines;
+            _bom_removed = true;
+            update_counter(_text_profile.skipped_lines, 1);
+            continue;
+        }
+        *line = Slice(ptr, size);
+        *eof = false;
+        update_counter(_text_profile.raw_lines_read, 1);
+        return Status::OK();
+    }
+}
+
+Status DelimitedTextReader::_fill_columns_from_line(const Slice& line,
+                                                    std::vector<MutableColumnPtr>* columns,
+                                                    size_t* rows) {
+    DORIS_CHECK(columns != nullptr);
+    if (line.size == 0) {
+        update_counter(_text_profile.empty_lines_read, 1);
+        if (!_empty_line_as_record()) {
+            if (_runtime_state != nullptr && _runtime_state->is_read_csv_empty_line_as_null()) {
+                for (const auto& column : _requested_columns) {
+                    RETURN_IF_ERROR(_append_null((*columns)[column.block_position.value()].get()));
+                    update_counter(_text_profile.cells_deserialized, 1);
+                }
+                ++(*rows);
+            }
+            return Status::OK();
+        }
+    }
+    RETURN_IF_ERROR(_validate_line(line));
+
+    {
+        SCOPED_TIMER(_text_profile.split_line_time);
+        _split_line(line);
+    }
+    SCOPED_TIMER(_text_profile.deserialize_time);
+    for (const auto& column : _requested_columns) {
+        auto* output = (*columns)[column.block_position.value()].get();
+        const int32_t field_index = column.file_column_id.value();
+        // Missing trailing fields are query-compatible with the old readers: they become NULL
+        // rather than shifting subsequent projected columns or rejecting the row.
+        Slice value = field_index >= 0 && static_cast<size_t>(field_index) < _split_values.size()
+                              ? _split_values[field_index]
+                              : Slice(_options.null_format, _options.null_len);
+        RETURN_IF_ERROR(_deserialize_one_cell(column, output, _normalize_value(value)));
+        update_counter(_text_profile.cells_deserialized, 1);
+    }
+    ++(*rows);
+    return Status::OK();
+}
+
+Status DelimitedTextReader::_validate_line(const Slice& line) {
+    (void)line;
+    return Status::OK();
+}
+
+Slice DelimitedTextReader::_normalize_value(Slice value) const {
+    return value;
+}
+
+bool DelimitedTextReader::_empty_line_as_record() const {
+    return false;
+}
+
+bool DelimitedTextReader::_can_split() const {
+    return _file_compress_type == TFileCompressType::PLAIN;
+}
+
+Status DelimitedTextReader::_append_null(IColumn* output) {
+    DORIS_CHECK(output != nullptr);
+    auto* nullable = assert_cast<ColumnNullable*>(output);
+    nullable->insert_data(nullptr, 0);
+    return Status::OK();
+}
+
+const uint8_t* DelimitedTextReader::_remove_bom(const uint8_t* ptr, size_t* size) {
+    DORIS_CHECK(size != nullptr);
+    if (ptr != nullptr && *size >= 3 && static_cast<uint8_t>(ptr[0]) == 0xEF &&
+        static_cast<uint8_t>(ptr[1]) == 0xBB && static_cast<uint8_t>(ptr[2]) == 0xBF) {
+        *size -= 3;
+        return ptr + 3;
+    }
+    return ptr;
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/delimited_text/delimited_text_reader.h b/be/src/format_v2/delimited_text/delimited_text_reader.h
new file mode 100644
index 00000000000000..06cb93dd7f7b65
--- /dev/null
+++ b/be/src/format_v2/delimited_text/delimited_text_reader.h
@@ -0,0 +1,176 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "core/data_type_serde/data_type_serde.h"
+#include "format_v2/file_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_profile.h"
+#include "util/slice.h"
+
+namespace doris {
+class Decompressor;
+class LineReader;
+class SlotDescriptor;
+} // namespace doris
+
+namespace doris::format {
+
+// Shared FileReader implementation for delimited text-like formats in FileScannerV2.
+//
+// CSV and Hive text have different row parsing and cell serde rules, but their v2 FileReader
+// control flow is the same: expose a file-local schema from FE slot descriptors, resolve
+// FileScanRequest local positions, read physical lines, materialize requested columns, apply
+// file-local conjuncts, and optionally count rows by scanning. This base keeps that contract in one
+// place while derived readers provide only format-specific hooks.
+class DelimitedTextReader : public FileReader {
+public:
+    ~DelimitedTextReader() override;
+
+    Status init(RuntimeState* state) override;
+    Status get_schema(std::vector<ColumnDefinition>* file_schema) const override;
+    std::unique_ptr<TableColumnMapper> create_column_mapper(
+            TableColumnMapperOptions options) const override;
+    Status open(std::shared_ptr<FileScanRequest> request) override;
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override;
+    Status get_aggregate_result(const FileAggregateRequest& request,
+                                FileAggregateResult* result) override;
+    Status close() override;
+
+protected:
+    struct DelimitedTextProfile {
+        RuntimeProfile::Counter* open_file_time = nullptr;
+        RuntimeProfile::Counter* create_line_reader_time = nullptr;
+        RuntimeProfile::Counter* read_line_time = nullptr;
+        RuntimeProfile::Counter* split_line_time = nullptr;
+        RuntimeProfile::Counter* deserialize_time = nullptr;
+        RuntimeProfile::Counter* conjunct_filter_time = nullptr;
+        RuntimeProfile::Counter* delete_conjunct_filter_time = nullptr;
+        RuntimeProfile::Counter* raw_lines_read = nullptr;
+        RuntimeProfile::Counter* rows_read_before_filter = nullptr;
+        RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr;
+        RuntimeProfile::Counter* rows_filtered_by_delete_conjunct = nullptr;
+        RuntimeProfile::Counter* rows_returned = nullptr;
+        RuntimeProfile::Counter* empty_lines_read = nullptr;
+        RuntimeProfile::Counter* skipped_lines = nullptr;
+        RuntimeProfile::Counter* cells_deserialized = nullptr;
+    };
+
+    struct RequestedColumn {
+        LocalColumnId file_column_id = LocalColumnId::invalid();
+        LocalIndex block_position;
+        const SlotDescriptor* slot_desc = nullptr;
+        DataTypeSerDeSPtr serde;
+        bool nullable_string_fast_path = false;
+    };
+
+    DelimitedTextReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                        std::unique_ptr<io::FileDescription>& file_description,
+                        std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                        const TFileScanRangeParams* scan_params,
+                        const std::vector<SlotDescriptor*>& file_slot_descs,
+                        TFileCompressType::type range_compress_type,
+                        std::optional<TUniqueId> stream_load_id, std::string reader_name);
+
+    // Initialize format-specific options after the common init path has validated scan params and
+    // runtime state. Implementations must fill `_value_separator`, `_line_delimiter`,
+    // `_file_compress_type`, `_options`, and any parser-specific state before the common schema
+    // construction reads column_idxs.
+    virtual Status _init_format_state() = 0;
+    // Create the decompressor used by the line reader. CSV may infer compression from the file
+    // format enum, while Hive text uses only the explicit compress_type.
+    virtual Status _create_decompressor() = 0;
+    // Create the physical line reader. Implementations choose plain/enclosed/binary line contexts,
+    // but must store the result in `_line_reader` for the common get_block/count paths.
+    virtual Status _create_line_reader() = 0;
+    // Validate one logical line before splitting. CSV enforces UTF-8 for query reads; Hive text
+    // deliberately accepts arbitrary bytes and uses the default OK implementation.
+    virtual Status _validate_line(const Slice& line);
+    // Split one logical line into `_split_values`. The common materialization path then resolves
+    // requested field ordinals against `_split_values`.
+    virtual void _split_line(const Slice& line) = 0;
+    // Deserialize a single normalized field into the requested output column using the
+    // format-specific serde API.
+    virtual Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                         Slice value) = 0;
+    // Let formats rewrite a raw field before serde. CSV uses this for empty_field_as_null; Hive
+    // text keeps the raw field because empty string and NULL are distinct unless null_format
+    // matches exactly.
+    virtual Slice _normalize_value(Slice value) const;
+    // Whether an empty physical line is one logical record. CSV keeps the existing default
+    // skip behavior, while Hive TEXTFILE treats an empty line as a record with one empty field.
+    virtual bool _empty_line_as_record() const;
+    // Whether this file can start at a non-zero split offset. Compressed delimited files cannot be
+    // split because the decompressor needs the stream from the beginning.
+    virtual bool _can_split() const;
+
+    Status _append_null(IColumn* output);
+    // Match the generic nullable serde semantics exactly: a field is NULL when its raw slice is
+    // byte-for-byte equal to null_format. This also covers Hive tables that set
+    // serialization.null.format to the empty string.
+    bool _is_null_format(Slice value) const;
+    const uint8_t* _remove_bom(const uint8_t* ptr, size_t* size);
+    void _init_profile() override;
+
+    const TFileScanRangeParams* _scan_params = nullptr;
+    std::vector<SlotDescriptor*> _source_file_slot_descs;
+    std::vector<int32_t> _source_column_idxs;
+    DataTypeSerDeSPtrs _source_serdes;
+    std::vector<ColumnDefinition> _file_schema;
+    RuntimeState* _runtime_state = nullptr;
+
+    std::vector<RequestedColumn> _requested_columns;
+    std::unique_ptr<Decompressor> _decompressor;
+    std::unique_ptr<LineReader> _line_reader;
+    std::vector<Slice> _split_values;
+    DataTypeSerDe::FormatOptions _options;
+
+    std::string _value_separator;
+    std::string _line_delimiter;
+    TFileCompressType::type _file_compress_type = TFileCompressType::UNKNOWN;
+    TFileCompressType::type _range_compress_type = TFileCompressType::UNKNOWN;
+    std::optional<TUniqueId> _stream_load_id;
+    int64_t _start_offset = 0;
+    int64_t _size = -1;
+    int _skip_lines = 0;
+    char _escape = 0;
+    bool _line_reader_eof = false;
+    bool _bom_removed = false;
+    // FE exposes this as an optional text-file attribute. Keep the default strict so missing thrift
+    // fields do not accidentally accept arbitrary bytes; CSV can still opt out through the session
+    // variable or TVF/file-format property `enable_text_validate_utf8=false`.
+    bool _enable_text_validate_utf8 = true;
+    DelimitedTextProfile _text_profile;
+
+private:
+    Status _build_requested_columns(const FileScanRequest& request,
+                                    std::vector<RequestedColumn>* columns) const;
+    Status _open_file();
+    Status _read_next_line(Slice* line, bool* eof);
+    Status _fill_columns_from_line(const Slice& line, std::vector<MutableColumnPtr>* columns,
+                                   size_t* rows);
+
+    std::string _reader_name;
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/delimited_text/text_reader.cpp b/be/src/format_v2/delimited_text/text_reader.cpp
new file mode 100644
index 00000000000000..930052a14f1229
--- /dev/null
+++ b/be/src/format_v2/delimited_text/text_reader.cpp
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/text_reader.h"
+
+#include <cstring>
+#include <utility>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type_serde/data_type_string_serde.h"
+#include "format/file_reader/new_plain_text_line_reader.h"
+#include "runtime/descriptors.h"
+#include "util/decompressor.h"
+
+namespace doris::format::text {
+namespace {
+
+bool starts_with_at(const Slice& line, size_t pos, const std::string& needle) {
+    return !needle.empty() && pos + needle.size() <= line.size &&
+           std::memcmp(line.data + pos, needle.data(), needle.size()) == 0;
+}
+
+} // namespace
+
+TextReader::TextReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                       std::unique_ptr<io::FileDescription>& file_description,
+                       std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                       const TFileScanRangeParams* scan_params,
+                       const std::vector<SlotDescriptor*>& file_slot_descs,
+                       TFileCompressType::type range_compress_type,
+                       std::optional<TUniqueId> stream_load_id)
+        : DelimitedTextReader(system_properties, file_description, std::move(io_ctx), profile,
+                              scan_params, file_slot_descs, range_compress_type,
+                              std::move(stream_load_id), "Text") {}
+
+TextReader::~TextReader() = default;
+
+Status TextReader::_init_format_state() {
+    _file_compress_type =
+            _range_compress_type != TFileCompressType::UNKNOWN
+                    ? _range_compress_type
+                    : (_scan_params->__isset.compress_type ? _scan_params->compress_type
+                                                           : TFileCompressType::PLAIN);
+
+    const auto& text_params = _scan_params->file_attributes.text_params;
+    _value_separator = text_params.column_separator;
+    _line_delimiter = text_params.line_delimiter;
+    if (text_params.__isset.escape) {
+        _escape = text_params.escape;
+    }
+    _options.escape_char = _escape;
+    _options.collection_delim =
+            text_params.collection_delimiter.empty() ? ',' : text_params.collection_delimiter[0];
+    _options.map_key_delim =
+            text_params.mapkv_delimiter.empty() ? ':' : text_params.mapkv_delimiter[0];
+    if (text_params.__isset.null_format) {
+        _options.null_format = text_params.null_format.data();
+        _options.null_len = text_params.null_format.length();
+    }
+    return Status::OK();
+}
+
+Status TextReader::_create_decompressor() {
+    return Decompressor::create_decompressor(_file_compress_type, &_decompressor);
+}
+
+Status TextReader::_create_line_reader() {
+    auto text_line_reader_ctx = std::make_shared<PlainTextLineReaderCtx>(
+            _line_delimiter, _line_delimiter.size(), false);
+    _line_reader = NewPlainTextLineReader::create_unique(
+            _profile, _file_reader, _decompressor.get(), std::move(text_line_reader_ctx), _size,
+            _start_offset);
+    return Status::OK();
+}
+
+void TextReader::_split_line(const Slice& line) {
+    _split_values.clear();
+    if (_value_separator.size() == 1) {
+        _split_line_single_char(line);
+    } else {
+        _split_line_multi_char(line);
+    }
+}
+
+void TextReader::_split_line_single_char(const Slice& line) {
+    size_t value_start = 0;
+    for (size_t i = 0; i < line.size; ++i) {
+        if (line.data[i] == _value_separator[0]) {
+            // Hive text lets a string escape the field separator. The backslash remains in the
+            // field slice so deserialize_one_cell_from_hive_text() can unescape the final value.
+            if (_escape != 0 && i > 0 && line.data[i - 1] == _escape) {
+                continue;
+            }
+            _split_values.emplace_back(line.data + value_start, i - value_start);
+            value_start = i + _value_separator.size();
+        }
+    }
+    _split_values.emplace_back(line.data + value_start, line.size - value_start);
+}
+
+void TextReader::_split_line_multi_char(const Slice& line) {
+    size_t value_start = 0;
+    size_t i = 0;
+    while (i < line.size) {
+        if (starts_with_at(line, i, _value_separator)) {
+            if (_escape != 0 && i > 0 && line.data[i - 1] == _escape) {
+                ++i;
+                continue;
+            }
+            _split_values.emplace_back(line.data + value_start, i - value_start);
+            i += _value_separator.size();
+            value_start = i;
+            continue;
+        }
+        ++i;
+    }
+    _split_values.emplace_back(line.data + value_start, line.size - value_start);
+}
+
+Status TextReader::_deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                         Slice value) {
+    DORIS_CHECK(output != nullptr);
+    if (column.nullable_string_fast_path) {
+        auto& null_column = assert_cast<ColumnNullable&>(*output);
+        if (_is_null_format(value)) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        static DataTypeStringSerDe string_serde(TYPE_STRING);
+        auto status = string_serde.deserialize_one_cell_from_hive_text(
+                null_column.get_nested_column(), value, _options);
+        if (!status.ok()) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        null_column.get_null_map_data().push_back(0);
+        return Status::OK();
+    }
+    return column.serde->deserialize_one_cell_from_hive_text(*output, value, _options);
+}
+
+bool TextReader::_empty_line_as_record() const {
+    // Hive TEXTFILE treats an empty physical line as a record. The splitter maps it
+    // to one empty field and missing trailing fields are filled with null_format.
+    return true;
+}
+
+} // namespace doris::format::text
diff --git a/be/src/format_v2/delimited_text/text_reader.h b/be/src/format_v2/delimited_text/text_reader.h
new file mode 100644
index 00000000000000..8efbfe359c7e64
--- /dev/null
+++ b/be/src/format_v2/delimited_text/text_reader.h
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+
+#include "format_v2/delimited_text/delimited_text_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "util/slice.h"
+
+namespace doris {
+class SlotDescriptor;
+} // namespace doris
+
+namespace doris::format::text {
+
+// FileScannerV2 Hive text reader.
+//
+// Text files do not have embedded schema, so FE-provided file slots and column_idxs are converted
+// into a file-local schema in the same way as CSV v2. The row parser is intentionally different
+// from CSV: field splitting follows Hive text escaping rules and cells are deserialized through
+// deserialize_one_cell_from_hive_text().
+class TextReader final : public ::doris::format::DelimitedTextReader {
+public:
+    TextReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+               std::unique_ptr<io::FileDescription>& file_description,
+               std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+               const TFileScanRangeParams* scan_params,
+               const std::vector<SlotDescriptor*>& file_slot_descs,
+               TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN,
+               std::optional<TUniqueId> stream_load_id = std::nullopt);
+    ~TextReader() override;
+
+private:
+    Status _init_format_state() override;
+    Status _create_decompressor() override;
+    Status _create_line_reader() override;
+    void _split_line(const Slice& line) override;
+    void _split_line_single_char(const Slice& line);
+    void _split_line_multi_char(const Slice& line);
+    Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                 Slice value) override;
+    bool _empty_line_as_record() const override;
+};
+
+} // namespace doris::format::text
diff --git a/be/src/format_v2/expr/cast.cpp b/be/src/format_v2/expr/cast.cpp
new file mode 100644
index 00000000000000..efeb9d851deb22
--- /dev/null
+++ b/be/src/format_v2/expr/cast.cpp
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/cast.h"
+
+#include <fmt/format.h>
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+
+#include <ostream>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/block/columns_with_type_and_name.h"
+#include "exprs/function/simple_function_factory.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vliteral.h"
+
+namespace doris::format {
+
+Status Cast::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    if (_children.size() != 1) {
+        return Status::InternalError(
+                fmt::format("Cast should have exactly 1 child expr, but got {}", _children.size()));
+    }
+    ColumnsWithTypeAndName argument_template;
+    argument_template.reserve(_children.size());
+    if (_children[0]->is_literal()) {
+        // For some functions, he needs some literal columns to derive the return type.
+        auto literal_node = std::dynamic_pointer_cast<VLiteral>(_children[0]);
+        argument_template.emplace_back(literal_node->get_column_ptr(), _children[0]->data_type(),
+                                       _children[0]->expr_name());
+    } else {
+        argument_template.emplace_back(nullptr, _children[0]->data_type(),
+                                       _children[0]->expr_name());
+    }
+
+    _expr_name = fmt::format("CAST(arguments={},return={})", _children[0]->data_type()->get_name(),
+                             _data_type->get_name());
+    // get the function. won't prepare function.
+    _function = SimpleFunctionFactory::instance().get_function(
+            "CAST", argument_template, _data_type,
+            {.new_version_unix_timestamp = state->query_options().new_version_unix_timestamp},
+            state->be_exec_version());
+    if (_function == nullptr) {
+        return Status::InternalError("Could not find function {} ", _expr_name);
+    }
+    VExpr::register_function_context(state, context);
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status Cast::open(RuntimeState* state, VExprContext* context,
+                  FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    for (auto& i : _children) {
+        RETURN_IF_ERROR(i->open(state, context, scope));
+    }
+    RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope, _function));
+    if (scope == FunctionContext::FRAGMENT_LOCAL) {
+        RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
+    }
+    _open_finished = true;
+    return Status::OK();
+}
+
+void Cast::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
+    VExpr::close_function_context(context, scope, _function);
+    VExpr::close(context, scope);
+}
+
+Status Cast::execute_column_impl(VExprContext* context, const Block* block,
+                                 const Selector* selector, size_t count,
+                                 ColumnPtr& result_column) const {
+    return _do_execute(context, block, selector, count, result_column);
+}
+
+std::string Cast::debug_string() const {
+    return _expr_name;
+}
+
+Status Cast::_do_execute(VExprContext* context, const Block* block, const Selector* selector,
+                         size_t count, ColumnPtr& result_column) const {
+    DCHECK(_open_finished || block == nullptr) << debug_string();
+    if (_children.size() != 1) {
+        return Status::InternalError(
+                fmt::format("Cast should have exactly 1 child expr, but got {}", _children.size()));
+    }
+    if (is_const_and_have_executed()) { // const have executed in open function
+        result_column = get_result_from_const(count);
+        return Status::OK();
+    }
+
+    Block temp_block;
+    ColumnNumbers args(1);
+
+    ColumnPtr tmp_arg_column;
+    RETURN_IF_ERROR(_children[0]->execute_column(context, block, selector, count, tmp_arg_column));
+    auto arg_type = _children[0]->execute_type(block);
+    temp_block.insert({tmp_arg_column, arg_type, _children[0]->expr_name()});
+    args[0] = 0;
+
+    uint32_t num_columns_without_result = temp_block.columns();
+    // prepare a column to save result
+    temp_block.insert({nullptr, _data_type, _expr_name});
+
+    RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), temp_block, args,
+                                       num_columns_without_result, count));
+    result_column = temp_block.get_by_position(num_columns_without_result).column;
+    DCHECK_EQ(result_column->size(), count);
+    RETURN_IF_ERROR(result_column->column_self_check());
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/cast.h b/be/src/format_v2/expr/cast.h
new file mode 100644
index 00000000000000..1dc06bcf07f2bc
--- /dev/null
+++ b/be/src/format_v2/expr/cast.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "common/object_pool.h"
+#include "common/status.h"
+#include "exprs/function_context.h"
+#include "exprs/vexpr.h"
+
+namespace doris {
+class RowDescriptor;
+class RuntimeState;
+class TExprNode;
+class Block;
+class VExprContext;
+} // namespace doris
+
+namespace doris::format {
+
+class Cast final : public VExpr {
+    ENABLE_FACTORY_CREATOR(Cast);
+
+public:
+    Cast(const DataTypePtr& type) {
+        _node_type = TExprNodeType::CAST_EXPR;
+        _opcode = TExprOpcode::CAST;
+        _data_type = type;
+    }
+    ~Cast() override = default;
+    Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
+    Status open(RuntimeState* state, VExprContext* context,
+                FunctionContext::FunctionStateScope scope) override;
+    void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override;
+    std::string debug_string() const override;
+    uint64_t get_digest(uint64_t seed) const override { return 0; }
+    const std::string& expr_name() const override { return _expr_name; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = Cast::create_shared(_data_type);
+        return Status::OK();
+    }
+
+private:
+    Status _do_execute(VExprContext* context, const Block* block, const Selector* selector,
+                       size_t count, ColumnPtr& result_column) const;
+    std::string _expr_name;
+    FunctionBasePtr _function;
+};
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/delete_predicate.cpp b/be/src/format_v2/expr/delete_predicate.cpp
new file mode 100644
index 00000000000000..9ab1090247c15a
--- /dev/null
+++ b/be/src/format_v2/expr/delete_predicate.cpp
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/delete_predicate.h"
+
+#include <fmt/format.h>
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <ostream>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/block/column_numbers.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/block/columns_with_type_and_name.h"
+
+namespace doris::format {
+
+DeletePredicate::DeletePredicate(const std::vector<int64_t>& deleted_rows)
+        : VExpr(), _deleted_rows(deleted_rows) {
+    _node_type = TExprNodeType::PREDICATE;
+    _opcode = TExprOpcode::DELETE;
+    _data_type = std::make_shared<DataTypeBool>();
+}
+
+Status DeletePredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
+                                VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    _expr_name = "DeletePredicate";
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status DeletePredicate::open(RuntimeState* state, VExprContext* context,
+                             FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::open(state, context, scope));
+    _open_finished = true;
+    return Status::OK();
+}
+
+void DeletePredicate::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
+    VExpr::close(context, scope);
+}
+
+/**
+ * DeletePredicate is derived from 2 cases:
+ * 1. All row IDs indicates deleted rows. (e.g. Delete rows with row_id in (1, 2, 3))
+ * 2. Bit vector indicates whether each row is deleted or not. (e.g. Bit vector[0,1,0,0,1] indicates row 1 and row 4 are deleted)
+ *
+ * So DeletePredicate should have exactly 1 child expr, which is the slot of row id.
+ * Row IDs should be generated by file reader as a virtual column in `block`.
+ **/
+Status DeletePredicate::execute(VExprContext* context, Block* block, int* result_column_id) const {
+    if (_children.size() != 1) {
+        return Status::InternalError(fmt::format(
+                "DeletePredicate should have exactly 1 child expr, but got {}", _children.size()));
+    }
+    int slot = -1;
+    RETURN_IF_ERROR(_children[0]->execute(context, block, &slot));
+    if (slot < 0 || static_cast<size_t>(slot) >= block->columns()) {
+        return Status::InternalError(
+                "DeletePredicate row id child returned invalid column id {}, block has {} columns",
+                slot, block->columns());
+    }
+    const auto& row_ids =
+            assert_cast<const ColumnInt64&>(*block->get_by_position(slot).column).get_data();
+    const auto count = row_ids.size();
+    auto res_col = ColumnBool::create(count, 0);
+    if (_deleted_rows.empty()) {
+        block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+        *result_column_id = static_cast<int>(block->get_columns().size() - 1);
+        return Status::OK();
+    }
+    if (count == 0) {
+        block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+        *result_column_id = static_cast<int>(block->get_columns().size() - 1);
+        return Status::OK();
+    }
+    const int64_t* delete_rows = _deleted_rows.data();
+    const int64_t* delete_rows_end = delete_rows + _deleted_rows.size();
+    const int64_t* start_pos = std::lower_bound(delete_rows, delete_rows_end, row_ids[0]);
+    int64_t start_index = start_pos - delete_rows;
+    const int64_t* end_pos = std::upper_bound(start_pos, delete_rows_end, row_ids[count - 1]);
+    const int64_t end_index = end_pos - delete_rows;
+
+    while (start_index < end_index) {
+        int64_t delete_row = delete_rows[start_index];
+        if (const auto it = std::ranges::lower_bound(row_ids, delete_row);
+            it != row_ids.end() && *it == delete_row) {
+            const size_t index = it - row_ids.begin();
+            res_col->get_data()[index] = true;
+        }
+        ++start_index;
+    }
+    block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+    *result_column_id = static_cast<int>(block->get_columns().size() - 1);
+    return Status::OK();
+}
+
+std::string DeletePredicate::debug_string() const {
+    return _expr_name;
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/delete_predicate.h b/be/src/format_v2/expr/delete_predicate.h
new file mode 100644
index 00000000000000..dce2de3edf278e
--- /dev/null
+++ b/be/src/format_v2/expr/delete_predicate.h
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "common/object_pool.h"
+#include "common/status.h"
+#include "exprs/function_context.h"
+#include "exprs/vexpr.h"
+
+namespace doris {
+class RowDescriptor;
+class RuntimeState;
+class TExprNode;
+class Block;
+class VExprContext;
+} // namespace doris
+
+namespace doris::format {
+
+class DeletePredicate final : public VExpr {
+    ENABLE_FACTORY_CREATOR(DeletePredicate);
+
+public:
+    DeletePredicate(const std::vector<int64_t>& deleted_rows);
+    ~DeletePredicate() override = default;
+    Status execute(VExprContext* context, Block* block, int* result_column_id) const override;
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        return Status::InternalError("Not implement DeletePredicate::execute_column_impl");
+    }
+    Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
+    Status open(RuntimeState* state, VExprContext* context,
+                FunctionContext::FunctionStateScope scope) override;
+    void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
+    std::string debug_string() const override;
+    uint64_t get_digest(uint64_t seed) const override { return 0; }
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    std::string _expr_name;
+    const std::vector<int64_t>& _deleted_rows;
+};
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/equality_delete_predicate.cpp b/be/src/format_v2/expr/equality_delete_predicate.cpp
new file mode 100644
index 00000000000000..13454e3b22f116
--- /dev/null
+++ b/be/src/format_v2/expr/equality_delete_predicate.cpp
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/equality_delete_predicate.h"
+
+#include <gen_cpp/Exprs_types.h>
+
+#include <utility>
+
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+
+namespace doris::format {
+namespace {
+
+bool column_value_equal(const ColumnPtr& lhs, size_t lhs_row, const ColumnPtr& rhs,
+                        size_t rhs_row) {
+    if (lhs->is_nullable() && rhs->is_nullable()) {
+        return lhs->compare_at(lhs_row, rhs_row, *rhs, -1) == 0;
+    }
+    if (lhs->is_nullable()) {
+        const auto& nullable_lhs = assert_cast<const ColumnNullable&>(*lhs);
+        return !nullable_lhs.is_null_at(lhs_row) &&
+               nullable_lhs.get_nested_column().compare_at(lhs_row, rhs_row, *rhs, -1) == 0;
+    }
+    if (rhs->is_nullable()) {
+        const auto& nullable_rhs = assert_cast<const ColumnNullable&>(*rhs);
+        return !nullable_rhs.is_null_at(rhs_row) &&
+               lhs->compare_at(lhs_row, rhs_row, nullable_rhs.get_nested_column(), -1) == 0;
+    }
+    return lhs->compare_at(lhs_row, rhs_row, *rhs, -1) == 0;
+}
+
+} // namespace
+
+EqualityDeletePredicate::EqualityDeletePredicate(Block delete_block, std::vector<int> field_ids)
+        : VExpr(), _delete_block(std::move(delete_block)), _field_ids(std::move(field_ids)) {
+    _node_type = TExprNodeType::PREDICATE;
+    _opcode = TExprOpcode::DELETE;
+    _data_type = std::make_shared<DataTypeBool>();
+    _expr_name = "EqualityDeletePredicate";
+    DCHECK_EQ(_delete_block.columns(), _field_ids.size());
+    _delete_hashes = _build_hashes(_delete_block);
+    for (size_t row = 0; row < _delete_hashes.size(); ++row) {
+        _delete_hash_map.emplace(_delete_hashes[row], row);
+    }
+}
+
+Status EqualityDeletePredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
+                                        VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    _expr_name = "EqualityDeletePredicate";
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status EqualityDeletePredicate::open(RuntimeState* state, VExprContext* context,
+                                     FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    for (auto& child : _children) {
+        RETURN_IF_ERROR(child->open(state, context, scope));
+    }
+    if (scope == FunctionContext::FRAGMENT_LOCAL) {
+        RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
+    }
+    _open_finished = true;
+    return Status::OK();
+}
+
+void EqualityDeletePredicate::close(VExprContext* context,
+                                    FunctionContext::FunctionStateScope scope) {
+    VExpr::close(context, scope);
+}
+
+Status EqualityDeletePredicate::execute(VExprContext* context, Block* block,
+                                        int* result_column_id) const {
+    if (_children.size() != _field_ids.size()) {
+        return Status::InternalError(
+                "EqualityDeletePredicate should have {} child exprs, but got {}", _field_ids.size(),
+                _children.size());
+    }
+
+    Block data_key_block;
+    for (const auto& child : _children) {
+        Block eval_block = *block;
+        int slot = -1;
+        RETURN_IF_ERROR(child->execute(context, &eval_block, &slot));
+        const auto& key_column = eval_block.get_by_position(slot);
+        data_key_block.insert({key_column.column, key_column.type, key_column.name});
+    }
+
+    const auto rows = data_key_block.rows();
+    auto res_col = ColumnBool::create(rows, 0);
+    if (_delete_hash_map.empty() || rows == 0) {
+        block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+        *result_column_id = static_cast<int>(block->columns() - 1);
+        return Status::OK();
+    }
+
+    auto data_hashes = _build_hashes(data_key_block);
+    auto& result_data = res_col->get_data();
+    for (size_t row = 0; row < rows; ++row) {
+        const auto range = _delete_hash_map.equal_range(data_hashes[row]);
+        for (auto it = range.first; it != range.second; ++it) {
+            if (_equal(data_key_block, row, it->second)) {
+                result_data[row] = true;
+                break;
+            }
+        }
+    }
+
+    block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+    *result_column_id = static_cast<int>(block->columns() - 1);
+    return Status::OK();
+}
+
+std::vector<uint64_t> EqualityDeletePredicate::_build_hashes(const Block& block) {
+    std::vector<uint64_t> hashes(block.rows(), 0);
+    for (const auto& column : block.get_columns()) {
+        column->update_hashes_with_value(hashes.data(), nullptr);
+    }
+    return hashes;
+}
+
+bool EqualityDeletePredicate::_equal(const Block& data_block, size_t data_row,
+                                     size_t delete_row) const {
+    for (size_t column_idx = 0; column_idx < _delete_block.columns(); ++column_idx) {
+        const auto& data_column = data_block.get_by_position(column_idx).column;
+        const auto& delete_column = _delete_block.get_by_position(column_idx).column;
+        if (!column_value_equal(data_column, data_row, delete_column, delete_row)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+std::string EqualityDeletePredicate::debug_string() const {
+    return _expr_name;
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/equality_delete_predicate.h b/be/src/format_v2/expr/equality_delete_predicate.h
new file mode 100644
index 00000000000000..cad16ca387ccd8
--- /dev/null
+++ b/be/src/format_v2/expr/equality_delete_predicate.h
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "exprs/function_context.h"
+#include "exprs/vexpr.h"
+
+namespace doris {
+class RowDescriptor;
+class RuntimeState;
+class VExprContext;
+} // namespace doris
+
+namespace doris::format {
+
+class EqualityDeletePredicate final : public VExpr {
+    ENABLE_FACTORY_CREATOR(EqualityDeletePredicate);
+
+public:
+    EqualityDeletePredicate(Block delete_block, std::vector<int> field_ids);
+    ~EqualityDeletePredicate() override = default;
+
+    Status execute(VExprContext* context, Block* block, int* result_column_id) const override;
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        return Status::InternalError("Not implement EqualityDeletePredicate::execute_column_impl");
+    }
+    Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
+    Status open(RuntimeState* state, VExprContext* context,
+                FunctionContext::FunctionStateScope scope) override;
+    void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
+    std::string debug_string() const override;
+    uint64_t get_digest(uint64_t seed) const override { return 0; }
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    static std::vector<uint64_t> _build_hashes(const Block& block);
+    bool _equal(const Block& data_block, size_t data_row, size_t delete_row) const;
+
+    std::string _expr_name;
+    Block _delete_block;
+    std::vector<int> _field_ids;
+    std::vector<uint64_t> _delete_hashes;
+    std::multimap<uint64_t, size_t> _delete_hash_map;
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/file_reader.cpp b/be/src/format_v2/file_reader.cpp
new file mode 100644
index 00000000000000..31b3f27c69797d
--- /dev/null
+++ b/be/src/format_v2/file_reader.cpp
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/file_reader.h"
+
+#include <sstream>
+
+#include "format_v2/column_mapper.h"
+#include "io/fs/buffered_reader.h"
+#include "io/fs/tracing_file_reader.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format {
+namespace {
+
+std::unique_ptr<FileStructPredicateTarget> clone_struct_predicate_target(
+        const std::unique_ptr<FileStructPredicateTarget>& target) {
+    return target == nullptr ? nullptr : std::make_unique<FileStructPredicateTarget>(*target);
+}
+
+template <typename T, typename Formatter>
+std::string join_debug_strings(const std::vector<T>& values, Formatter formatter) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t i = 0; i < values.size(); ++i) {
+        if (i > 0) {
+            out << ", ";
+        }
+        out << formatter(values[i]);
+    }
+    out << "]";
+    return out.str();
+}
+
+std::string int_vector_debug_string(const std::vector<int32_t>& values) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t i = 0; i < values.size(); ++i) {
+        if (i > 0) {
+            out << ", ";
+        }
+        out << values[i];
+    }
+    out << "]";
+    return out.str();
+}
+
+void append_struct_predicate_path(const FileStructPredicateTarget* target,
+                                  std::vector<int32_t>* path) {
+    DORIS_CHECK(path != nullptr);
+    for (const auto* current = target; current != nullptr; current = current->child.get()) {
+        path->push_back(current->file_local_id);
+    }
+}
+
+std::string struct_predicate_target_debug_string(const FileStructPredicateTarget* target) {
+    if (target == nullptr) {
+        return "null";
+    }
+    std::ostringstream out;
+    out << "{file_local_id=" << target->file_local_id
+        << ", file_child_name=" << target->file_child_name
+        << ", child=" << struct_predicate_target_debug_string(target->child.get()) << "}";
+    return out.str();
+}
+
+bool struct_predicate_targets_equal(const FileStructPredicateTarget* lhs,
+                                    const FileStructPredicateTarget* rhs) {
+    while (lhs != nullptr && rhs != nullptr) {
+        if (lhs->file_local_id != rhs->file_local_id) {
+            return false;
+        }
+        lhs = lhs->child.get();
+        rhs = rhs->child.get();
+    }
+    return lhs == nullptr && rhs == nullptr;
+}
+
+} // namespace
+
+FileStructPredicateTarget::FileStructPredicateTarget(const FileStructPredicateTarget& other)
+        : file_local_id(other.file_local_id),
+          file_child_name(other.file_child_name),
+          child(clone_struct_predicate_target(other.child)) {}
+
+FileStructPredicateTarget& FileStructPredicateTarget::operator=(
+        const FileStructPredicateTarget& other) {
+    if (this == &other) {
+        return *this;
+    }
+    file_local_id = other.file_local_id;
+    file_child_name = other.file_child_name;
+    child = clone_struct_predicate_target(other.child);
+    return *this;
+}
+
+FileNestedPredicateTarget::FileNestedPredicateTarget(const FileNestedPredicateTarget& other)
+        : file_column_id(other.file_column_id),
+          struct_target(clone_struct_predicate_target(other.struct_target)) {}
+
+FileNestedPredicateTarget& FileNestedPredicateTarget::operator=(
+        const FileNestedPredicateTarget& other) {
+    if (this == &other) {
+        return *this;
+    }
+    file_column_id = other.file_column_id;
+    struct_target = clone_struct_predicate_target(other.struct_target);
+    return *this;
+}
+
+LocalColumnId FileColumnPredicateFilter::effective_file_column_id() const {
+    return target.is_valid() ? target.file_column_id : file_column_id;
+}
+
+std::vector<int32_t> FileColumnPredicateFilter::effective_file_child_id_path() const {
+    if (!target.is_valid()) {
+        return file_child_id_path;
+    }
+    std::vector<int32_t> path;
+    append_struct_predicate_path(target.struct_target.get(), &path);
+    return path;
+}
+
+bool FileColumnPredicateFilter::same_target_as(const FileColumnPredicateFilter& other) const {
+    if (target.is_valid() && other.target.is_valid()) {
+        return target.file_column_id == other.target.file_column_id &&
+               struct_predicate_targets_equal(target.struct_target.get(),
+                                              other.target.struct_target.get());
+    }
+    return effective_file_column_id() == other.effective_file_column_id() &&
+           effective_file_child_id_path() == other.effective_file_child_id_path();
+}
+
+std::string FileColumnPredicateFilter::debug_string() const {
+    std::ostringstream out;
+    out << "FileColumnPredicateFilter{target={file_column_id=" << effective_file_column_id()
+        << ", struct_target=" << struct_predicate_target_debug_string(target.struct_target.get())
+        << "}, file_child_id_path=" << int_vector_debug_string(effective_file_child_id_path())
+        << ", predicate_count=" << predicates.size() << "}";
+    return out.str();
+}
+
+std::string FileScanRequest::debug_string() const {
+    std::ostringstream out;
+    out << "FileScanRequest{predicate_columns="
+        << join_debug_strings(
+                   predicate_columns,
+                   [](const LocalColumnIndex& projection) { return projection.debug_string(); })
+        << ", non_predicate_columns="
+        << join_debug_strings(
+                   non_predicate_columns,
+                   [](const LocalColumnIndex& projection) { return projection.debug_string(); })
+        << ", local_positions={";
+    size_t position_idx = 0;
+    for (const auto& [column_id, block_position] : local_positions) {
+        if (position_idx++ > 0) {
+            out << ", ";
+        }
+        out << column_id << ":" << block_position;
+    }
+    out << "}, conjunct_count=" << conjuncts.size()
+        << ", delete_conjunct_count=" << delete_conjuncts.size() << ", column_predicate_filters="
+        << join_debug_strings(
+                   column_predicate_filters,
+                   [](const FileColumnPredicateFilter& filter) { return filter.debug_string(); })
+        << "}";
+    return out.str();
+}
+
+Status FileReader::init(RuntimeState* state) {
+    _init_profile();
+    SCOPED_RAW_TIMER(&_reader_statistics.file_reader_create_time);
+    ++_reader_statistics.open_file_num;
+    io::FileReaderOptions reader_options =
+            FileFactory::get_reader_options(state->query_options(), *_file_description);
+    _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
+            _profile, *_system_properties, *_file_description, reader_options,
+            io::DelegateReader::AccessMode::RANDOM, _io_ctx));
+    // IOContext can be present without file_reader_stats in standalone tests or callers that only
+    // need extra IO state. TracingFileReader dereferences the stats pointer on every read, so only
+    // wrap the physical reader when stats collection is actually available.
+    _tracing_file_reader = _io_ctx && _io_ctx->file_reader_stats
+                                   ? std::make_shared<io::TracingFileReader>(
+                                             _file_reader, _io_ctx->file_reader_stats)
+                                   : _file_reader;
+    _eof = false;
+    return Status::OK();
+}
+
+std::unique_ptr<TableColumnMapper> FileReader::create_column_mapper(
+        TableColumnMapperOptions options) const {
+    return std::make_unique<TableColumnMapper>(std::move(options));
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/file_reader.h b/be/src/format_v2/file_reader.h
new file mode 100644
index 00000000000000..3f192ae093a47b
--- /dev/null
+++ b/be/src/format_v2/file_reader.h
@@ -0,0 +1,414 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/field.h"
+#include "exprs/vexpr_fwd.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/file_factory.h"
+#include "io/fs/file_reader_writer_fwd.h"
+
+namespace doris {
+class Block;
+class ColumnPredicate;
+struct ConditionCacheContext;
+
+namespace io {
+struct IOContext;
+} // namespace io
+} // namespace doris
+
+namespace doris::format {
+
+class TableColumnMapper;
+struct TableColumnMapperOptions;
+
+// Struct-only nested predicate target used by file-layer pruning.
+//
+// This intentionally models only a STRUCT field chain. LIST/MAP/repeated predicates need explicit
+// quantified semantics, so they must not be encoded here.
+struct FileStructPredicateTarget {
+    int32_t file_local_id = -1;
+    std::string file_child_name;
+    std::unique_ptr<FileStructPredicateTarget> child;
+
+    FileStructPredicateTarget() = default;
+    FileStructPredicateTarget(int32_t local_id, std::string child_name,
+                              std::unique_ptr<FileStructPredicateTarget> nested_child = nullptr)
+            : file_local_id(local_id),
+              file_child_name(std::move(child_name)),
+              child(std::move(nested_child)) {}
+    FileStructPredicateTarget(const FileStructPredicateTarget& other);
+    FileStructPredicateTarget& operator=(const FileStructPredicateTarget& other);
+    FileStructPredicateTarget(FileStructPredicateTarget&& other) noexcept = default;
+    FileStructPredicateTarget& operator=(FileStructPredicateTarget&& other) noexcept = default;
+};
+
+struct FileNestedPredicateTarget {
+    LocalColumnId file_column_id = LocalColumnId::invalid();
+    // Null means the predicate targets the top-level primitive column itself.
+    std::unique_ptr<FileStructPredicateTarget> struct_target;
+
+    FileNestedPredicateTarget() = default;
+    explicit FileNestedPredicateTarget(LocalColumnId column_id) : file_column_id(column_id) {}
+    FileNestedPredicateTarget(LocalColumnId column_id,
+                              std::unique_ptr<FileStructPredicateTarget> target)
+            : file_column_id(column_id), struct_target(std::move(target)) {}
+    FileNestedPredicateTarget(const FileNestedPredicateTarget& other);
+    FileNestedPredicateTarget& operator=(const FileNestedPredicateTarget& other);
+    FileNestedPredicateTarget(FileNestedPredicateTarget&& other) noexcept = default;
+    FileNestedPredicateTarget& operator=(FileNestedPredicateTarget&& other) noexcept = default;
+
+    bool is_valid() const { return file_column_id.is_valid(); }
+};
+
+// File-local single-column predicates for file-layer pruning, such as min/max, page index,
+// dictionary and bloom filter.
+//
+// Predicates must all belong to target.file_column_id. target.struct_target points to the nested
+// primitive leaf under that root; null means the top-level column itself is the primitive leaf.
+// These predicates are pruning hints only and are not row-level conjuncts.
+struct FileColumnPredicateFilter {
+    FileNestedPredicateTarget target;
+    // Compatibility fields for call sites and tests that still construct pruning filters directly.
+    // New mapper code should fill target; file readers consume target first and only fall back to
+    // these fields while the API migration is in progress.
+    LocalColumnId file_column_id = LocalColumnId::invalid();
+    std::vector<int32_t> file_child_id_path;
+    std::vector<std::shared_ptr<ColumnPredicate>> predicates;
+
+    LocalColumnId effective_file_column_id() const;
+    std::vector<int32_t> effective_file_child_id_path() const;
+    bool same_target_as(const FileColumnPredicateFilter& other) const;
+    std::string debug_string() const;
+};
+
+enum class FileFormat {
+    PARQUET,
+    ORC,
+    CSV,
+    JSON,
+    TEXT,
+    JNI,
+};
+
+// 通用文件层 scan 请求。
+// 该结构描述所有文件格式都可以共享的 file-local 读取输入。这里不出现 table/global
+// schema。所有 schema change、filter localization、default/generated/partition
+// 列都应在 table 层完成。
+struct FileScanRequest {
+    virtual ~FileScanRequest() = default;
+
+    std::string debug_string() const;
+
+    // Columns that must be read before row-level filtering. They are materialized eagerly because
+    // conjuncts/delete_conjuncts need them to decide the selected rows.
+    std::vector<LocalColumnIndex> predicate_columns;
+    // Columns read after row-level filtering. Predicate columns are also available for output and
+    // should not be duplicated here.
+    std::vector<LocalColumnIndex> non_predicate_columns;
+    // file-local column id -> file-local output block position.
+    std::map<LocalColumnId, LocalIndex> local_positions;
+    // Row-level filters converted to file-local expressions from table-level predicates.
+    VExprContextSPtrs conjuncts;
+    // Delete predicates converted to file-local expressions.
+    VExprContextSPtrs delete_conjuncts;
+    // Single-column predicates used only for file-layer pruning, such as statistics, page index,
+    // dictionary and bloom filter. They must not be used for batch row-level filtering.
+    std::vector<FileColumnPredicateFilter> column_predicate_filters;
+};
+
+// Helper for constructing the scan-column layout in FileScanRequest.
+//
+// FileScanRequest keeps predicate and non-predicate columns separate because columnar readers such
+// as Parquet can read predicate columns first, filter rows, and then lazily read the remaining
+// projected columns. The two lists still share one file-local output block, whose positions are
+// stored in local_positions. This builder centralizes the mechanical rules for that shared layout:
+// - each root file column gets one stable block position;
+// - predicate columns dominate non-predicate columns because they are already returned in the file
+//   block and can be reused for final materialization;
+// - repeated nested projections for the same root are merged instead of duplicated.
+//
+// TableColumnMapper should still own table-to-file semantic resolution. This helper only owns the
+// FileScanRequest layout contract after a file-local projection has been produced.
+class FileScanRequestBuilder {
+public:
+    explicit FileScanRequestBuilder(FileScanRequest* request) : _request(request) {
+        DORIS_CHECK(_request != nullptr);
+    }
+
+    Status add_predicate_column(LocalColumnIndex projection) {
+        return _add_column(std::move(projection), &_request->predicate_columns,
+                           /*is_predicate_column=*/true);
+    }
+
+    Status add_non_predicate_column(LocalColumnIndex projection) {
+        return _add_column(std::move(projection), &_request->non_predicate_columns,
+                           /*is_predicate_column=*/false);
+    }
+
+    Status add_predicate_column(LocalColumnId column_id) {
+        return add_predicate_column(LocalColumnIndex::top_level(column_id));
+    }
+
+    Status add_non_predicate_column(LocalColumnId column_id) {
+        return add_non_predicate_column(LocalColumnIndex::top_level(column_id));
+    }
+
+private:
+    static LocalIndex _next_block_position(const FileScanRequest& request) {
+        size_t next_position = 0;
+        for (const auto& [_, block_position] : request.local_positions) {
+            next_position = std::max(next_position, block_position.value() + 1);
+        }
+        return LocalIndex(next_position);
+    }
+
+    static void _sort_projection_children_by_file_id(LocalColumnIndex* projection) {
+        DORIS_CHECK(projection != nullptr);
+        if (projection->project_all_children) {
+            return;
+        }
+        for (auto& child : projection->children) {
+            _sort_projection_children_by_file_id(&child);
+        }
+        std::ranges::sort(projection->children,
+                          [](const LocalColumnIndex& lhs, const LocalColumnIndex& rhs) {
+                              return lhs.local_id() < rhs.local_id();
+                          });
+    }
+
+    Status _add_column(LocalColumnIndex projection, std::vector<LocalColumnIndex>* scan_columns,
+                       bool is_predicate_column) {
+        DORIS_CHECK(scan_columns != nullptr);
+        const auto file_column_id = projection.column_id();
+        DORIS_CHECK(file_column_id != LocalColumnId::invalid());
+        if (!is_predicate_column &&
+            std::ranges::find_if(_request->predicate_columns, [&](const LocalColumnIndex& p) {
+                return p.column_id() == file_column_id;
+            }) != _request->predicate_columns.end()) {
+            return Status::OK();
+        }
+        if (!_request->local_positions.contains(file_column_id)) {
+            _request->local_positions.emplace(file_column_id, _next_block_position(*_request));
+        }
+
+        _sort_projection_children_by_file_id(&projection);
+        auto existing_projection_it = std::ranges::find_if(
+                *scan_columns,
+                [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; });
+        if (existing_projection_it == scan_columns->end()) {
+            scan_columns->push_back(std::move(projection));
+        } else {
+            RETURN_IF_ERROR(merge_local_column_index(&*existing_projection_it, projection));
+            _sort_projection_children_by_file_id(&*existing_projection_it);
+        }
+
+        if (is_predicate_column) {
+            auto it = std::ranges::find_if(
+                    _request->non_predicate_columns,
+                    [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; });
+            if (it != _request->non_predicate_columns.end()) {
+                _request->non_predicate_columns.erase(it);
+            }
+        }
+        return Status::OK();
+    }
+
+    FileScanRequest* _request = nullptr;
+};
+
+struct FileAggregateRequest {
+    struct Column {
+        // File-local projection for the aggregate column. For nested MIN/MAX, this points to the
+        // single primitive leaf that can be represented by file statistics.
+        LocalColumnIndex projection;
+    };
+
+    TPushAggOp::type agg_type = TPushAggOp::type::NONE;
+    std::vector<Column> columns;
+};
+
+struct FileAggregateResult {
+    struct Column {
+        // Mirrors FileAggregateRequest::Column::projection so TableReader can put the returned
+        // aggregate value back into the matching projected nested shape.
+        LocalColumnIndex projection;
+        bool has_min = false;
+        bool has_max = false;
+        Field min_value;
+        Field max_value;
+    };
+
+    int64_t count = 0;
+    std::vector<Column> columns;
+};
+
+// 文件物理读取层通用接口。
+// 该接口只描述 file-local schema、file-local scan request 和 file-local block。
+// TableReader/IcebergTableReader 可以通过它组合不同文件格式 reader。
+/**
+ *                                +-----> get_schema() -----------------+
+ * FileReader() -----> init() ----|                                      -----> close()
+ *                                +-----> open() -----> get_block() ----+
+ */
+class FileReader {
+public:
+    struct ReaderStatistics {
+        int32_t filtered_row_groups = 0;
+        int32_t filtered_row_groups_by_min_max = 0;
+        int32_t filtered_row_groups_by_bloom_filter = 0;
+        int32_t read_row_groups = 0;
+        int64_t filtered_group_rows = 0;
+        int64_t filtered_page_rows = 0;
+        int64_t lazy_read_filtered_rows = 0;
+        int64_t read_rows = 0;
+        int64_t filtered_bytes = 0;
+        int64_t column_read_time = 0;
+        int64_t parse_meta_time = 0;
+        int64_t parse_footer_time = 0;
+        int64_t file_footer_read_calls = 0;
+        int64_t file_footer_hit_cache = 0;
+        int64_t file_reader_create_time = 0;
+        int64_t open_file_num = 0;
+        int64_t row_group_filter_time = 0;
+        int64_t page_index_filter_time = 0;
+        int64_t read_page_index_time = 0;
+        int64_t parse_page_index_time = 0;
+        int64_t predicate_filter_time = 0;
+        int64_t dict_filter_rewrite_time = 0;
+        int64_t bloom_filter_read_time = 0;
+    };
+
+    FileReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+               std::unique_ptr<io::FileDescription>& file_description,
+               std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile)
+            : _system_properties(system_properties),
+              _file_description(std::move(file_description)),
+              _io_ctx(io_ctx),
+              _profile(profile) {}
+    virtual ~FileReader() = default;
+
+    // Initialize file reader and parse file metadata.
+    virtual Status init(RuntimeState* state);
+
+    // Get semantic file-local schema from file metadata. The file schema is determined by file
+    // format and file content, and does not contain table/global schema semantics. A file reader may
+    // expose raw file identifiers, such as Parquet field_id, through ColumnDefinition::identifier,
+    // but it must not interpret table-format semantics such as Iceberg name mapping,
+    // default/generated columns, or partition columns. File-format physical wrappers should be
+    // normalized away before exposing this schema; for example, Parquet MAP is exposed as key/value
+    // children rather than key_value/entry.
+    //
+    // Doris plans external-table scan types as nullable, including all nested children of complex
+    // types. This protects Doris from illegal or inconsistent values produced by external systems.
+    // Therefore every ColumnDefinition::type returned here must be nullable. Complex types must
+    // also expose nullable child types recursively, even if the physical file marks those fields as
+    // required.
+    //
+    // This method can only be called after init() successfully, but does not require open() to be
+    // called.
+    virtual Status get_schema(std::vector<ColumnDefinition>* file_schema) const = 0;
+
+    // Create the mapper that matches this reader's scan-request capabilities. TableReader still
+    // owns table-format semantics such as BY_NAME/BY_FIELD_ID/BY_INDEX, partition values and
+    // default expressions; the FileReader only chooses whether file-local requests support columnar
+    // lazy materialization/pruning or must materialize one flat list of required columns.
+    virtual std::unique_ptr<TableColumnMapper> create_column_mapper(
+            TableColumnMapperOptions options) const;
+
+    // Open the file reader with file-local scan request. The file reader should initialize its internal state according to the request, but does not need to interpret table/global schema semantics. For example, all schema change, filter localization, default/generated/partition columns should be handled in table reader layer. This method can only be called after init() successfully.
+    virtual Status open(std::shared_ptr<FileScanRequest> request) {
+        _request = std::move(request);
+        return Status::OK();
+    }
+
+    // 读取下一批 file-local block。
+    // 该方法只能在 open(FileScanRequest) 成功后调用。
+    // file_block 的列顺序和类型必须遵守 FileScanRequest，而不是 table/global schema。
+    // rows 返回当前批次输出行数；eof 表示当前文件 reader 是否读完；多文件切换由
+    // TableReader 负责。
+    virtual Status get_block(Block* file_block, size_t* rows, bool* eof) {
+        // stub 默认立即 EOF。
+        if (rows != nullptr) {
+            *rows = 0;
+        }
+        if (eof != nullptr) {
+            *eof = true;
+        }
+        _eof = true;
+        return Status::OK();
+    }
+
+    virtual Status get_aggregate_result(const FileAggregateRequest& request,
+                                        FileAggregateResult* result) {
+        return Status::NotSupported("FileReader does not support aggregate pushdown");
+    }
+
+    // Condition cache is managed by TableReader and consumed by physical file readers.
+    // On cache HIT, readers may skip granules whose cached bit is false before doing column IO.
+    // On cache MISS, readers mark a granule true when row-level predicates keep at least one row
+    // in that granule. Readers that cannot map batch rows to stable file-global row ids should
+    // keep the default no-op implementation.
+    virtual void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) {}
+
+    // Total rows covered by this physical reader. TableReader uses it to pre-size the miss bitmap.
+    // Readers should return 0 if the metadata is unavailable or the row coordinate is unstable.
+    virtual int64_t get_total_rows() const { return 0; }
+
+    // 关闭当前物理文件 reader 并释放文件层状态。
+    // 该方法不处理 table-level delete/finalize 状态，后者由 TableReader 子类管理。
+    virtual Status close() {
+        _file_reader.reset();
+        _tracing_file_reader.reset();
+        _io_ctx.reset();
+        _eof = true;
+        return Status::OK();
+    }
+
+protected:
+    virtual void _init_profile() {}
+
+    io::FileReaderSPtr _file_reader;
+    // _tracing_file_reader wraps _file_reader.
+    // _file_reader is original file reader.
+    // _tracing_file_reader is tracing file reader with io context.
+    // If io_ctx is null, _tracing_file_reader will be the same as file_reader.
+    io::FileReaderSPtr _tracing_file_reader = nullptr;
+    std::shared_ptr<FileScanRequest> _request;
+    bool _eof = true;
+    ReaderStatistics _reader_statistics;
+    std::shared_ptr<io::FileSystemProperties> _system_properties;
+    std::unique_ptr<io::FileDescription> _file_description;
+    std::shared_ptr<io::IOContext> _io_ctx;
+    RuntimeProfile* _profile = nullptr;
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/jni/hudi_jni_reader.cpp b/be/src/format_v2/jni/hudi_jni_reader.cpp
new file mode 100644
index 00000000000000..3247e3c683c2de
--- /dev/null
+++ b/be/src/format_v2/jni/hudi_jni_reader.cpp
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/hudi_jni_reader.h"
+
+#include <string_view>
+
+#include "core/block/block.h"
+#include "exprs/vexpr_context.h"
+#include "util/string_util.h"
+#include "util/uid_util.h"
+
+namespace doris::format::hudi {
+namespace {
+
+constexpr std::string_view HOODIE_CONF_PREFIX = "hoodie.";
+constexpr std::string_view HADOOP_CONF_PREFIX = "hadoop_conf.";
+
+} // namespace
+
+Status HudiJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError("missing table_format_params for hudi jni reader");
+    }
+    if (!range.table_format_params.__isset.hudi_params) {
+        return Status::InternalError("missing hudi_params for hudi jni reader");
+    }
+    const auto& hudi_params = range.table_format_params.hudi_params;
+    if (!hudi_params.__isset.base_path || hudi_params.base_path.empty()) {
+        return Status::InternalError(
+                "missing base_path for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (!hudi_params.__isset.data_file_path || hudi_params.data_file_path.empty()) {
+        return Status::InternalError(
+                "missing data_file_path for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (!hudi_params.__isset.data_file_length) {
+        return Status::InternalError(
+                "missing data_file_length for hudi jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!hudi_params.__isset.column_names) {
+        return Status::InternalError(
+                "missing column_names for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (!hudi_params.__isset.column_types) {
+        return Status::InternalError(
+                "missing column_types for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    DORIS_CHECK(hudi_params.column_names.size() == hudi_params.column_types.size());
+    if (_scan_params == nullptr) {
+        return Status::InternalError(
+                "missing scan params for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    return Status::OK();
+}
+
+std::string HudiJniReader::connector_class() const {
+    return "org/apache/doris/hudi/HadoopHudiJniScanner";
+}
+
+Status HudiJniReader::build_scanner_params(std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    DORIS_CHECK(_scan_params != nullptr);
+    params->clear();
+
+    const auto& hudi_params = _current_range.table_format_params.hudi_params;
+    (*params)["base_path"] = hudi_params.base_path;
+    (*params)["data_file_path"] = hudi_params.data_file_path;
+    (*params)["data_file_length"] = std::to_string(hudi_params.data_file_length);
+    (*params)["delta_file_paths"] = join(hudi_params.delta_logs, ",");
+    (*params)["hudi_column_names"] = join(hudi_params.column_names, ",");
+    (*params)["hudi_column_types"] = join(hudi_params.column_types, "#");
+    (*params)["instant_time"] = hudi_params.instant_time;
+    (*params)["serde"] = hudi_params.serde;
+    (*params)["input_format"] = hudi_params.input_format;
+    if (_runtime_state != nullptr) {
+        (*params)["query_id"] = print_id(_runtime_state->query_id());
+    }
+
+    for (const auto& kv : _scan_params->properties) {
+        if (kv.first.starts_with(HOODIE_CONF_PREFIX)) {
+            (*params)[kv.first] = kv.second;
+        } else {
+            (*params)[std::string(HADOOP_CONF_PREFIX) + kv.first] = kv.second;
+        }
+    }
+    return Status::OK();
+}
+
+Status HudiJniReader::build_jni_columns(
+        std::vector<format::JniTableReader::JniColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    columns->reserve(_projected_columns.size());
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        if (table_column.is_partition_key &&
+            find_partition_value(table_column, _partition_values) != nullptr) {
+            continue;
+        }
+        columns->push_back({
+                .java_name = table_column.name,
+                .output_index = i,
+                .output_type = table_column.type,
+                .transfer_type = table_column.type,
+                .replace_type = "not_replace",
+        });
+    }
+    return Status::OK();
+}
+
+Status HudiJniReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) {
+    DORIS_CHECK(jni_block != nullptr);
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    const auto original_rows = *rows;
+
+    const auto& columns = jni_columns();
+    DORIS_CHECK(columns.size() == jni_block->columns());
+    for (size_t i = 0; i < columns.size(); ++i) {
+        const auto& column = columns[i];
+        DORIS_CHECK(column.output_index < output_block->columns());
+        output_block->get_by_position(column.output_index).type = column.output_type;
+        output_block->replace_by_position(column.output_index,
+                                          jni_block->get_by_position(i).column);
+    }
+
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        const auto* partition_value = find_partition_value(table_column, _partition_values);
+        if (!table_column.is_partition_key || partition_value == nullptr) {
+            continue;
+        }
+        output_block->get_by_position(i).type = table_column.type;
+        output_block->replace_by_position(
+                i, table_column.type->create_column_const(original_rows, *partition_value));
+    }
+    DORIS_CHECK(output_block->rows() == original_rows);
+    if (!_conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_conjuncts, output_block, output_block->columns()));
+    }
+    *rows = output_block->rows();
+    return Status::OK();
+}
+
+} // namespace doris::format::hudi
diff --git a/be/src/format_v2/jni/hudi_jni_reader.h b/be/src/format_v2/jni/hudi_jni_reader.h
new file mode 100644
index 00000000000000..4beb6f2d1728b6
--- /dev/null
+++ b/be/src/format_v2/jni/hudi_jni_reader.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::hudi {
+
+class HudiJniReader final : public format::JniTableReader {
+public:
+    ~HudiJniReader() override = default;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+    Status build_jni_columns(
+            std::vector<format::JniTableReader::JniColumn>* columns) const override;
+    Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override;
+};
+
+} // namespace doris::format::hudi
diff --git a/be/src/format_v2/jni/iceberg_sys_table_reader.cpp b/be/src/format_v2/jni/iceberg_sys_table_reader.cpp
new file mode 100644
index 00000000000000..b41d505f886d31
--- /dev/null
+++ b/be/src/format_v2/jni/iceberg_sys_table_reader.cpp
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/iceberg_sys_table_reader.h"
+
+#include <string_view>
+
+#include "format/jni/jni_data_bridge.h"
+#include "util/string_util.h"
+
+namespace doris::format::iceberg {
+namespace {
+
+constexpr std::string_view HADOOP_OPTION_PREFIX = "hadoop.";
+
+} // namespace
+
+Status IcebergSysTableJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError(
+                "missing table_format_params for iceberg sys table jni reader");
+    }
+    if (!range.table_format_params.__isset.iceberg_params) {
+        return Status::InternalError("missing iceberg_params for iceberg sys table jni reader");
+    }
+    if (!range.table_format_params.iceberg_params.__isset.serialized_split ||
+        range.table_format_params.iceberg_params.serialized_split.empty()) {
+        return Status::InternalError(
+                "missing serialized_split for iceberg sys table jni reader, "
+                "possibly caused by FE/BE protocol mismatch");
+    }
+    return Status::OK();
+}
+
+std::string IcebergSysTableJniReader::connector_class() const {
+    return "org/apache/doris/iceberg/IcebergSysTableJniScanner";
+}
+
+Status IcebergSysTableJniReader::build_scanner_params(
+        std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    params->clear();
+    params->emplace("serialized_split",
+                    _current_range.table_format_params.iceberg_params.serialized_split);
+
+    std::vector<std::string> required_types;
+    required_types.reserve(_projected_columns.size());
+    for (const auto& column : _projected_columns) {
+        required_types.emplace_back(JniDataBridge::get_jni_type_with_different_string(column.type));
+    }
+    (*params)["required_types"] = join(required_types, "#");
+
+    if (_scan_params != nullptr && _scan_params->__isset.properties &&
+        !_scan_params->properties.empty()) {
+        for (const auto& kv : _scan_params->properties) {
+            (*params)[std::string(HADOOP_OPTION_PREFIX) + kv.first] = kv.second;
+        }
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::iceberg
diff --git a/be/src/format_v2/jni/iceberg_sys_table_reader.h b/be/src/format_v2/jni/iceberg_sys_table_reader.h
new file mode 100644
index 00000000000000..be254c39f3ffb5
--- /dev/null
+++ b/be/src/format_v2/jni/iceberg_sys_table_reader.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::iceberg {
+
+class IcebergSysTableJniReader final : public format::JniTableReader {
+public:
+    ~IcebergSysTableJniReader() override = default;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+};
+
+} // namespace doris::format::iceberg
diff --git a/be/src/format_v2/jni/jdbc_reader.cpp b/be/src/format_v2/jni/jdbc_reader.cpp
new file mode 100644
index 00000000000000..e0391f3a13a8f0
--- /dev/null
+++ b/be/src/format_v2/jni/jdbc_reader.cpp
@@ -0,0 +1,187 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/jdbc_reader.h"
+
+#include <memory>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/block/columns_with_type_and_name.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "exprs/function/simple_function_factory.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/table_reader.h"
+#include "util/jdbc_utils.h"
+
+namespace doris::format::jdbc {
+
+std::string JdbcJniReader::connector_class() const {
+    return "org/apache/doris/jdbc/JdbcJniScanner";
+}
+
+Status JdbcJniReader::prepare_split(const format::SplitReadOptions& options) {
+    _jdbc_params.clear();
+    if (options.current_range.__isset.table_format_params &&
+        options.current_range.table_format_params.table_format_type == "jdbc") {
+        _jdbc_params = std::map<std::string, std::string>(
+                options.current_range.table_format_params.jdbc_params.begin(),
+                options.current_range.table_format_params.jdbc_params.end());
+    }
+    return format::JniTableReader::prepare_split(options);
+}
+
+// need pass to the java side, so the java scanner can parse the params and construct the JDBC connection
+Status JdbcJniReader::build_scanner_params(std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    *params = _jdbc_params;
+    if (params->contains("jdbc_driver_url")) {
+        std::string resolved;
+        if (JdbcUtils::resolve_driver_url((*params)["jdbc_driver_url"], &resolved).ok()) {
+            (*params)["jdbc_driver_url"] = resolved;
+        }
+    }
+    return Status::OK();
+}
+
+Status JdbcJniReader::build_jni_columns(
+        std::vector<format::JniTableReader::JniColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    columns->reserve(_projected_columns.size());
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        const auto primitive_type = remove_nullable(table_column.type)->get_primitive_type();
+        columns->push_back({
+                .java_name = table_column.name,
+                .output_index = i,
+                .output_type = table_column.type,
+                .transfer_type = _transfer_type_for(table_column.type),
+                .replace_type = _replace_type_for(primitive_type),
+        });
+    }
+    return Status::OK();
+}
+
+Status JdbcJniReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) {
+    DORIS_CHECK(jni_block != nullptr);
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    const auto original_rows = *rows;
+    const auto& columns = jni_columns();
+    DORIS_CHECK(columns.size() == jni_block->columns());
+
+    for (size_t i = 0; i < columns.size(); ++i) {
+        const auto& column = columns[i];
+        DORIS_CHECK(column.output_type != nullptr);
+        DORIS_CHECK(column.output_index < output_block->columns());
+        if (_is_special_type(remove_nullable(column.output_type)->get_primitive_type())) {
+            RETURN_IF_ERROR(_cast_string_to_special_type(column, jni_block, i, output_block,
+                                                         original_rows));
+            continue;
+        }
+        output_block->get_by_position(column.output_index).type = column.output_type;
+        output_block->replace_by_position(column.output_index,
+                                          jni_block->get_by_position(i).column);
+    }
+    DORIS_CHECK(output_block->rows() == original_rows);
+    if (!_conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_conjuncts, output_block, output_block->columns()));
+    }
+    *rows = output_block->rows();
+    return Status::OK();
+}
+
+std::string JdbcJniReader::_replace_type_for(PrimitiveType type) const {
+    switch (type) {
+    case PrimitiveType::TYPE_BITMAP:
+        return "bitmap";
+    case PrimitiveType::TYPE_HLL:
+        return "hll";
+    case PrimitiveType::TYPE_QUANTILE_STATE:
+        return "quantile_state";
+    case PrimitiveType::TYPE_JSONB:
+        return "jsonb";
+    default:
+        return "not_replace";
+    }
+}
+
+bool JdbcJniReader::_is_special_type(PrimitiveType type) const {
+    return type == PrimitiveType::TYPE_BITMAP || type == PrimitiveType::TYPE_HLL ||
+           type == PrimitiveType::TYPE_QUANTILE_STATE || type == PrimitiveType::TYPE_JSONB;
+}
+
+DataTypePtr JdbcJniReader::_transfer_type_for(const DataTypePtr& output_type) const {
+    DORIS_CHECK(output_type != nullptr);
+    if (!_is_special_type(remove_nullable(output_type)->get_primitive_type())) {
+        return output_type;
+    }
+    DataTypePtr string_type = std::make_shared<DataTypeString>();
+    if (output_type->is_nullable()) {
+        string_type = make_nullable(string_type);
+    }
+    return string_type;
+}
+
+Status JdbcJniReader::_cast_string_to_special_type(const format::JniTableReader::JniColumn& column,
+                                                   Block* jni_block, size_t jni_column_index,
+                                                   Block* output_block, size_t rows) {
+    DORIS_CHECK(column.output_type != nullptr);
+    DORIS_CHECK(column.transfer_type != nullptr);
+    const auto target_type = column.output_type;
+    const auto target_type_name = target_type->get_name();
+
+    ColumnPtr input_column = jni_block->get_by_position(jni_column_index).column;
+    ColumnPtr cast_param = target_type->create_column_const_with_default_value(1);
+
+    ColumnsWithTypeAndName argument_template;
+    argument_template.reserve(2);
+    argument_template.emplace_back(std::move(input_column), column.transfer_type,
+                                   "java.sql.String");
+    argument_template.emplace_back(std::move(cast_param), target_type, target_type_name);
+
+    FunctionBasePtr cast_function = SimpleFunctionFactory::instance().get_function(
+            "CAST", argument_template, make_nullable(target_type));
+    if (cast_function == nullptr) {
+        return Status::InternalError("Failed to find CAST function for type {}", target_type_name);
+    }
+
+    Block cast_block(argument_template);
+    const auto result_idx = cast_set<uint32_t>(cast_block.columns());
+    cast_block.insert({nullptr, make_nullable(target_type), "cast_result"});
+    RETURN_IF_ERROR(
+            cast_function->execute(nullptr, cast_block, {0}, result_idx, cast_set<int>(rows)));
+
+    auto result_column = cast_block.get_by_position(result_idx).column;
+    output_block->get_by_position(column.output_index).type = target_type;
+    if (target_type->is_nullable()) {
+        output_block->replace_by_position(column.output_index, result_column);
+    } else {
+        const auto* nullable_column = assert_cast<const ColumnNullable*>(result_column.get());
+        output_block->replace_by_position(column.output_index,
+                                          nullable_column->get_nested_column_ptr());
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::jdbc
diff --git a/be/src/format_v2/jni/jdbc_reader.h b/be/src/format_v2/jni/jdbc_reader.h
new file mode 100644
index 00000000000000..91a5878cb4622f
--- /dev/null
+++ b/be/src/format_v2/jni/jdbc_reader.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/types.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::jdbc {
+
+class JdbcJniReader final : public format::JniTableReader {
+public:
+    ~JdbcJniReader() override = default;
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+
+protected:
+    std::string connector_class() const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+    Status build_jni_columns(
+            std::vector<format::JniTableReader::JniColumn>* columns) const override;
+    Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override;
+
+private:
+    bool _is_special_type(PrimitiveType type) const;
+    std::string _replace_type_for(PrimitiveType type) const;
+    DataTypePtr _transfer_type_for(const DataTypePtr& output_type) const;
+    Status _cast_string_to_special_type(const format::JniTableReader::JniColumn& column,
+                                        Block* jni_block, size_t jni_column_index,
+                                        Block* output_block, size_t rows);
+
+    std::map<std::string, std::string> _jdbc_params;
+};
+
+} // namespace doris::format::jdbc
diff --git a/be/src/format_v2/jni/jni_table_reader.cpp b/be/src/format_v2/jni/jni_table_reader.cpp
new file mode 100644
index 00000000000000..9245b2545f9162
--- /dev/null
+++ b/be/src/format_v2/jni/jni_table_reader.cpp
@@ -0,0 +1,384 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/jni_table_reader.h"
+
+#include <utility>
+
+#include "common/cast_set.h"
+#include "core/block/block.h"
+#include "exprs/vexpr_context.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/string_util.h"
+
+namespace doris::format {
+
+Status JniTableReader::init(TableReadOptions&& options) {
+    RETURN_IF_ERROR(TableReader::init(std::move(options)));
+    _init_profile();
+
+    // JNI readers do not go through TableReader::open_reader(), where file-local filters are
+    // prepared for file readers. They execute table-level conjuncts directly on the JNI block.
+    RowDescriptor row_desc;
+    for (const auto& conjunct : _conjuncts) {
+        RETURN_IF_ERROR(conjunct->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(conjunct->open(_runtime_state));
+    }
+    return Status::OK();
+}
+
+Status JniTableReader::prepare_split(const SplitReadOptions& options) {
+    _current_range = options.current_range;
+    RETURN_IF_ERROR(validate_scan_range(options.current_range));
+    RETURN_IF_ERROR(TableReader::prepare_split(options));
+    DORIS_CHECK(!_closed);
+    DORIS_CHECK(!_scanner_opened);
+    if (_is_table_level_count_active()) {
+        return Status::OK();
+    }
+    // Subclasses populate split-specific scanner params before calling this method, so the Java
+    // scanner can be opened here instead of being lazily opened by the first get_block() call.
+    return _open_jni_scanner();
+}
+
+Status JniTableReader::get_block(Block* output_block, bool* eos) {
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(eos != nullptr);
+    DORIS_CHECK(output_block->columns() == _projected_columns.size());
+    output_block->clear_column_data(_projected_columns.size());
+    if (_is_table_level_count_active()) {
+        return _read_table_level_count(output_block, eos);
+    }
+
+    DORIS_CHECK(_scanner_opened);
+    if (_eof) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    while (true) {
+        size_t current_rows = 0;
+        bool current_eof = false;
+        // get next block data from Java scanner, and fill the data to _jni_block_template
+        RETURN_IF_ERROR(_get_next_jni_block(&current_rows, &current_eof));
+        if (current_eof) {
+            _eof = true;
+            RETURN_IF_ERROR(_close_jni_scanner());
+            *eos = true;
+            return Status::OK();
+        }
+
+        RETURN_IF_ERROR(finalize_jni_block(&_jni_block_template, output_block, &current_rows));
+        if (current_rows == 0) {
+            output_block->clear_column_data(_projected_columns.size());
+            continue;
+        }
+        *eos = false;
+        return Status::OK();
+    }
+}
+
+Status JniTableReader::_get_next_jni_block(size_t* rows, bool* eof) {
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    *rows = 0;
+    _jni_block_template.clear_column_data(_jni_columns.size());
+
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+    long meta_address = 0;
+    {
+        SCOPED_RAW_TIMER(&_java_scan_watcher);
+        //getNextBatchMeta function, return the meta address
+        RETURN_IF_ERROR(_jni_scanner_obj.call_long_method(env, _jni_scanner_get_next_batch)
+                                .call(&meta_address));
+    }
+    RETURN_ERROR_IF_EXC(env);
+    if (meta_address == 0) {
+        *eof = true;
+        return Status::OK();
+    }
+
+    JniDataBridge::TableMetaAddress table_meta(meta_address);
+    const auto num_rows = table_meta.next_meta_as_long();
+    if (num_rows == 0) {
+        *eof = true;
+        return Status::OK();
+    }
+
+    *rows = cast_set<size_t>(num_rows);
+    // fill data from Java table meta to C++ block
+    RETURN_IF_ERROR(_fill_jni_block(table_meta, *rows));
+    // call releaseTable() method in JAVA side to release the Java table Heap free Memory
+    RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call());
+    RETURN_ERROR_IF_EXC(env);
+    *eof = false;
+    return Status::OK();
+}
+
+// Java table to C++ block
+Status JniTableReader::_fill_jni_block(JniDataBridge::TableMetaAddress& table_meta,
+                                       size_t num_rows) {
+    SCOPED_RAW_TIMER(&_fill_block_watcher);
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+    for (size_t i = 0; i < _jni_columns.size(); ++i) {
+        const auto& read_column = _jni_columns[i];
+        auto& column_with_type_and_name = _jni_block_template.get_by_position(i);
+        auto& column_ptr = column_with_type_and_name.column;
+        RETURN_IF_ERROR(JniDataBridge::fill_column(table_meta, column_ptr,
+                                                   read_column.transfer_type, num_rows));
+        // call releaseColumn(int columnIndex) method in JAVA side to release the Java column Heap free Memory
+        RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_column)
+                                .with_arg(cast_set<int>(i))
+                                .call());
+        RETURN_ERROR_IF_EXC(env);
+    }
+    return Status::OK();
+}
+
+Status JniTableReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) {
+    DORIS_CHECK(jni_block != nullptr);
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(jni_block->columns() == _jni_columns.size());
+    const auto original_rows = *rows;
+    for (size_t i = 0; i < _jni_columns.size(); ++i) {
+        const auto& column = _jni_columns[i];
+        DORIS_CHECK(column.output_index < output_block->columns());
+        output_block->get_by_position(column.output_index).type = column.output_type;
+        output_block->replace_by_position(column.output_index,
+                                          jni_block->get_by_position(i).column);
+    }
+    DORIS_CHECK(output_block->rows() == original_rows);
+    // Apply conjuncts on the output block
+    if (!_conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_conjuncts, output_block, output_block->columns()));
+    }
+    *rows = output_block->rows();
+    return Status::OK();
+}
+
+Status JniTableReader::build_jni_columns(std::vector<JniColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    columns->reserve(_projected_columns.size());
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        columns->push_back({
+                .java_name = table_column.name,
+                .output_index = i,
+                .output_type = table_column.type,
+                .transfer_type = table_column.type,
+                .replace_type = "not_replace",
+        });
+    }
+    return Status::OK();
+}
+
+int64_t JniTableReader::self_split_weight() const {
+    return _current_range.__isset.self_split_weight ? _current_range.self_split_weight : -1;
+}
+
+Status JniTableReader::close() {
+    if (_closed) {
+        return Status::OK();
+    }
+    _closed = true;
+    RETURN_IF_ERROR(_close_jni_scanner());
+    return TableReader::close();
+}
+
+Status JniTableReader::_close_jni_scanner() {
+    if (!_scanner_opened) {
+        JNIEnv* env = nullptr;
+        if (!_jni_scanner_obj.uninitialized()) {
+            RETURN_IF_ERROR(Jni::Env::Get(&env));
+        }
+        _reset_split_state(env);
+        return Status::OK();
+    }
+
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+    if (_scanner_profile != nullptr) {
+        COUNTER_UPDATE(_open_scanner_time, _jni_scanner_open_watcher);
+        COUNTER_UPDATE(_fill_block_time, _fill_block_watcher);
+    }
+
+    RETURN_ERROR_IF_EXC(env);
+    jlong append_data_time = 0;
+    RETURN_IF_ERROR(_jni_scanner_obj.call_long_method(env, _jni_scanner_get_append_data_time)
+                            .call(&append_data_time));
+    jlong create_vector_table_time = 0;
+    RETURN_IF_ERROR(
+            _jni_scanner_obj.call_long_method(env, _jni_scanner_get_create_vector_table_time)
+                    .call(&create_vector_table_time));
+    if (_scanner_profile != nullptr) {
+        COUNTER_UPDATE(_java_append_data_time, append_data_time);
+        COUNTER_UPDATE(_java_create_vector_table_time, create_vector_table_time);
+        COUNTER_UPDATE(_java_scan_time,
+                       _java_scan_watcher - append_data_time - create_vector_table_time);
+        _max_time_split_weight_counter->conditional_update(
+                _jni_scanner_open_watcher + _fill_block_watcher + _java_scan_watcher,
+                self_split_weight());
+    }
+
+    // _fill_jni_block may fail before releasing the current Java table. JniScanner::releaseTable()
+    // is idempotent, so closing the split always releases it.
+    RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call());
+    RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_close).call());
+    _reset_split_state(env);
+    return Status::OK();
+}
+
+void JniTableReader::_reset_split_state(JNIEnv* env) {
+    if (!_jni_scanner_obj.uninitialized()) {
+        DORIS_CHECK(env != nullptr);
+        _jni_scanner_obj.reset(env);
+    }
+    _scanner_opened = false;
+    _eof = false;
+    _scanner_params.clear();
+    _jni_columns.clear();
+    _jni_block_template.clear();
+    _jni_scanner_open_watcher = 0;
+    _java_scan_watcher = 0;
+    _fill_block_watcher = 0;
+}
+
+Status JniTableReader::_open_jni_scanner() {
+    // subclasses build map<string,string> _scanner_params to JAVA side
+    RETURN_IF_ERROR(build_scanner_params(&_scanner_params));
+    // subclasses build _jni_columns info to JAVA side, including column name and column type
+    RETURN_IF_ERROR(build_jni_columns(&_jni_columns));
+    // _jni_columns info is used to build Java scanner schema params and JNI block template.
+    _prepare_jni_scanner_schema();
+
+    if (_runtime_state != nullptr) {
+        _batch_size = _runtime_state->batch_size();
+        _scanner_params["time_zone"] = _runtime_state->timezone();
+    }
+
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+    SCOPED_RAW_TIMER(&_jni_scanner_open_watcher);
+    RETURN_IF_ERROR(_register_jni_class_functions_once(env));
+    RETURN_IF_ERROR(_create_jni_scanner_object(env, cast_set<int>(_batch_size)));
+    // call open() method in JAVA side.
+    RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_open).call());
+    RETURN_ERROR_IF_EXC(env);
+
+    _scanner_opened = true;
+    return Status::OK();
+}
+
+void JniTableReader::_prepare_jni_scanner_schema() {
+    std::vector<std::string> required_fields;
+    std::vector<std::string> column_types;
+    std::vector<std::string> replace_types;
+    required_fields.reserve(_jni_columns.size());
+    column_types.reserve(_jni_columns.size());
+    replace_types.reserve(_jni_columns.size());
+    _jni_block_template.clear();
+    _jni_block_template.reserve(_jni_columns.size());
+
+    bool has_replace_type = false;
+    for (const auto& column : _jni_columns) {
+        DORIS_CHECK(column.transfer_type != nullptr);
+        required_fields.push_back(column.java_name);
+        column_types.push_back(
+                JniDataBridge::get_jni_type_with_different_string(column.transfer_type));
+        replace_types.push_back(column.replace_type);
+        has_replace_type = has_replace_type || column.replace_type != "not_replace";
+        _jni_block_template.insert(
+                {column.transfer_type->create_column(), column.transfer_type, column.java_name});
+    }
+    _scanner_params["required_fields"] = join(required_fields, ",");
+    _scanner_params["columns_types"] = join(column_types, "#");
+    if (has_replace_type) {
+        _scanner_params["replace_string"] = join(replace_types, ",");
+    }
+}
+
+Status JniTableReader::_register_jni_class_functions_once(JNIEnv* env) {
+    if (!_jni_scanner_cls.uninitialized()) {
+        return Status::OK();
+    }
+
+    RETURN_IF_ERROR(
+            Jni::Util::get_jni_scanner_class(env, connector_class().c_str(), &_jni_scanner_cls));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "<init>", "(ILjava/util/Map;)V",
+                                                &_jni_scanner_constructor));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "open", "()V", &_jni_scanner_open));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getNextBatchMeta", "()J",
+                                                &_jni_scanner_get_next_batch));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getAppendDataTime", "()J",
+                                                &_jni_scanner_get_append_data_time));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getCreateVectorTableTime", "()J",
+                                                &_jni_scanner_get_create_vector_table_time));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "close", "()V", &_jni_scanner_close));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "releaseColumn", "(I)V",
+                                                &_jni_scanner_release_column));
+    RETURN_IF_ERROR(
+            _jni_scanner_cls.get_method(env, "releaseTable", "()V", &_jni_scanner_release_table));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getStatistics", "()Ljava/util/Map;",
+                                                &_jni_scanner_get_statistics));
+    RETURN_IF_ERROR(
+            _jni_scanner_cls.get_method(env, "setBatchSize", "(I)V", &_jni_scanner_set_batch_size));
+    return Status::OK();
+}
+
+Status JniTableReader::_create_jni_scanner_object(JNIEnv* env, int batch_size) {
+    DORIS_CHECK(!_jni_scanner_cls.uninitialized());
+    DORIS_CHECK(!_jni_scanner_constructor.uninitialized());
+    DORIS_CHECK(_jni_scanner_obj.uninitialized());
+    Jni::LocalObject hashmap_object;
+    RETURN_IF_ERROR(Jni::Util::convert_to_java_map(env, _scanner_params, &hashmap_object));
+    RETURN_IF_ERROR(_jni_scanner_cls.new_object(env, _jni_scanner_constructor)
+                            .with_arg(batch_size)
+                            .with_arg(hashmap_object)
+                            .call(&_jni_scanner_obj));
+    return Status::OK();
+}
+
+void JniTableReader::_init_profile() {
+    if (_scanner_profile == nullptr) {
+        return;
+    }
+    const auto connector_name = _connector_name();
+    ADD_TIMER(_scanner_profile, connector_name);
+    _open_scanner_time = ADD_CHILD_TIMER(_scanner_profile, "OpenScannerTime", connector_name);
+    _java_scan_time = ADD_CHILD_TIMER(_scanner_profile, "JavaScanTime", connector_name);
+    _java_append_data_time =
+            ADD_CHILD_TIMER(_scanner_profile, "JavaAppendDataTime", connector_name);
+    _java_create_vector_table_time =
+            ADD_CHILD_TIMER(_scanner_profile, "JavaCreateVectorTableTime", connector_name);
+    _fill_block_time = ADD_CHILD_TIMER(_scanner_profile, "FillBlockTime", connector_name);
+    _max_time_split_weight_counter = _scanner_profile->add_conditition_counter(
+            "MaxTimeSplitWeight", TUnit::UNIT, [](int64_t _c, int64_t c) { return c > _c; },
+            connector_name);
+}
+
+std::string JniTableReader::_connector_name() const {
+    const auto parts = split(connector_class(), "/");
+    return parts.empty() ? connector_class() : parts.back();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/jni/jni_table_reader.h b/be/src/format_v2/jni/jni_table_reader.h
new file mode 100644
index 00000000000000..4d1b0f4768cf8d
--- /dev/null
+++ b/be/src/format_v2/jni/jni_table_reader.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "format/jni/jni_data_bridge.h"
+#include "format_v2/table_reader.h"
+#include "runtime/runtime_profile.h"
+#include "util/jni-util.h"
+
+namespace doris::format {
+
+class JniTableReader : public TableReader {
+public:
+    struct JniColumn {
+        std::string java_name;
+        // The index of the column in the output block, which is used to place the data from Java side to the correct position in the output block.
+        size_t output_index = 0;
+        // The original output type of the column, which is used for type casting after getting the data from Java side. like Bitmap column
+        // For columns without special types, the transfer_type and output_type are the same.
+        DataTypePtr output_type;
+        //Bitmap Type transfer type is String, so the Java scanner will convert the Bitmap column to String before transferring the data to C++, and then C++ side can convert the String back to Bitmap.
+        DataTypePtr transfer_type;
+        std::string replace_type = "not_replace";
+    };
+
+    ~JniTableReader() override = default;
+
+    Status init(TableReadOptions&& options) override;
+    Status prepare_split(const SplitReadOptions& options) override;
+    Status get_block(Block* block, bool* eos) override;
+    Status close() override;
+
+protected:
+    // Subclasses should implement these methods to specify the Java scanner class
+    virtual std::string connector_class() const = 0;
+    virtual Status validate_scan_range(const TFileRangeDesc&) const { return Status::OK(); }
+    // Subclasses should implement this method to build the scanner params map
+    virtual Status build_scanner_params(std::map<std::string, std::string>* params) const = 0;
+    // Subclasses can override this method when Java transfer types differ from output types.
+    virtual Status build_jni_columns(std::vector<JniColumn>* columns) const;
+    virtual Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows);
+    // used for profile
+    virtual int64_t self_split_weight() const;
+    const std::vector<JniColumn>& jni_columns() const { return _jni_columns; }
+    TFileRangeDesc _current_range;
+
+private:
+    // init
+    void _init_profile();
+    std::string _connector_name() const;
+    // open
+    Status _open_jni_scanner();
+    void _reset_split_state(JNIEnv* env);
+    void _prepare_jni_scanner_schema();
+    Status _register_jni_class_functions_once(JNIEnv* env);
+    Status _create_jni_scanner_object(JNIEnv* env, int batch_size);
+    // get_next
+    Status _get_next_jni_block(size_t* rows, bool* eof);
+    Status _fill_jni_block(JniDataBridge::TableMetaAddress& table_meta, size_t num_rows);
+
+    Status _close_jni_scanner();
+
+    std::map<std::string, std::string> _scanner_params;
+    std::vector<JniColumn> _jni_columns;
+    Block _jni_block_template;
+
+    bool _closed = false;
+    bool _scanner_opened = false;
+    bool _eof = false;
+    size_t _batch_size = 0;
+
+    RuntimeProfile::Counter* _open_scanner_time = nullptr;
+    RuntimeProfile::Counter* _java_scan_time = nullptr;
+    RuntimeProfile::Counter* _java_append_data_time = nullptr;
+    RuntimeProfile::Counter* _java_create_vector_table_time = nullptr;
+    RuntimeProfile::Counter* _fill_block_time = nullptr;
+    RuntimeProfile::ConditionCounter* _max_time_split_weight_counter = nullptr;
+
+    int64_t _jni_scanner_open_watcher = 0;
+    int64_t _java_scan_watcher = 0;
+    int64_t _fill_block_watcher = 0;
+
+    Jni::GlobalClass _jni_scanner_cls;
+    Jni::GlobalObject _jni_scanner_obj;
+    Jni::MethodId _jni_scanner_constructor;
+    Jni::MethodId _jni_scanner_open;
+    Jni::MethodId _jni_scanner_get_append_data_time;
+    Jni::MethodId _jni_scanner_get_create_vector_table_time;
+    Jni::MethodId _jni_scanner_get_next_batch;
+    Jni::MethodId _jni_scanner_close;
+    Jni::MethodId _jni_scanner_release_column;
+    Jni::MethodId _jni_scanner_release_table;
+    Jni::MethodId _jni_scanner_get_statistics;
+    Jni::MethodId _jni_scanner_set_batch_size;
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/jni/max_compute_jni_reader.cpp b/be/src/format_v2/jni/max_compute_jni_reader.cpp
new file mode 100644
index 00000000000000..a26e9e229b5d82
--- /dev/null
+++ b/be/src/format_v2/jni/max_compute_jni_reader.cpp
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/max_compute_jni_reader.h"
+
+#include "core/block/block.h"
+#include "exprs/vexpr_context.h"
+
+namespace doris::format::max_compute {
+
+MaxComputeJniReader::MaxComputeJniReader(const doris::MaxComputeTableDescriptor* table_desc)
+        : _table_desc(table_desc) {}
+
+Status MaxComputeJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError("missing table_format_params for max compute jni reader");
+    }
+    if (!range.table_format_params.__isset.max_compute_params) {
+        return Status::InternalError("missing max_compute_params for max compute jni reader");
+    }
+    const auto& max_compute_params = range.table_format_params.max_compute_params;
+    if (!max_compute_params.__isset.session_id || max_compute_params.session_id.empty()) {
+        return Status::InternalError(
+                "missing session_id for max compute jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!max_compute_params.__isset.table_batch_read_session ||
+        max_compute_params.table_batch_read_session.empty()) {
+        return Status::InternalError(
+                "missing table_batch_read_session for max compute jni reader, possibly caused "
+                "by FE/BE protocol mismatch");
+    }
+    if (!range.__isset.start_offset) {
+        return Status::InternalError(
+                "missing start_offset for max compute jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!range.__isset.size) {
+        return Status::InternalError(
+                "missing size for max compute jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (_scan_params == nullptr) {
+        return Status::InternalError(
+                "missing scan params for max compute jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    return Status::OK();
+}
+
+std::string MaxComputeJniReader::connector_class() const {
+    return "org/apache/doris/maxcompute/MaxComputeJniScanner";
+}
+
+Status MaxComputeJniReader::build_scanner_params(std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    DORIS_CHECK(_table_desc != nullptr);
+    params->clear();
+
+    *params = _table_desc->properties();
+    (*params)["endpoint"] = _table_desc->endpoint();
+    (*params)["quota"] = _table_desc->quota();
+    (*params)["project"] = _table_desc->project();
+    (*params)["table"] = _table_desc->table();
+
+    const auto& max_compute_params = _current_range.table_format_params.max_compute_params;
+    (*params)["session_id"] = max_compute_params.session_id;
+    (*params)["scan_serializer"] = max_compute_params.table_batch_read_session;
+    (*params)["start_offset"] = std::to_string(_current_range.start_offset);
+    (*params)["split_size"] = std::to_string(_current_range.size);
+    (*params)["connect_timeout"] = std::to_string(max_compute_params.connect_timeout);
+    (*params)["read_timeout"] = std::to_string(max_compute_params.read_timeout);
+    (*params)["retry_count"] = std::to_string(max_compute_params.retry_times);
+    return Status::OK();
+}
+
+Status MaxComputeJniReader::build_jni_columns(
+        std::vector<format::JniTableReader::JniColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    columns->reserve(_projected_columns.size());
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        if (table_column.is_partition_key &&
+            find_partition_value(table_column, _partition_values) != nullptr) {
+            continue;
+        }
+        columns->push_back({
+                .java_name = table_column.name,
+                .output_index = i,
+                .output_type = table_column.type,
+                .transfer_type = table_column.type,
+                .replace_type = "not_replace",
+        });
+    }
+    return Status::OK();
+}
+
+Status MaxComputeJniReader::finalize_jni_block(Block* jni_block, Block* output_block,
+                                               size_t* rows) {
+    DORIS_CHECK(jni_block != nullptr);
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    const auto original_rows = *rows;
+
+    const auto& columns = jni_columns();
+    DORIS_CHECK(columns.size() == jni_block->columns());
+    for (size_t i = 0; i < columns.size(); ++i) {
+        const auto& column = columns[i];
+        DORIS_CHECK(column.output_index < output_block->columns());
+        output_block->get_by_position(column.output_index).type = column.output_type;
+        output_block->replace_by_position(column.output_index,
+                                          jni_block->get_by_position(i).column);
+    }
+
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        const auto* partition_value = find_partition_value(table_column, _partition_values);
+        if (!table_column.is_partition_key || partition_value == nullptr) {
+            continue;
+        }
+        output_block->get_by_position(i).type = table_column.type;
+        output_block->replace_by_position(
+                i, table_column.type->create_column_const(original_rows, *partition_value));
+    }
+    DORIS_CHECK(output_block->rows() == original_rows);
+    if (!_conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_conjuncts, output_block, output_block->columns()));
+    }
+    *rows = output_block->rows();
+    return Status::OK();
+}
+
+} // namespace doris::format::max_compute
diff --git a/be/src/format_v2/jni/max_compute_jni_reader.h b/be/src/format_v2/jni/max_compute_jni_reader.h
new file mode 100644
index 00000000000000..8addce07988e4c
--- /dev/null
+++ b/be/src/format_v2/jni/max_compute_jni_reader.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris {
+class MaxComputeTableDescriptor;
+} // namespace doris
+
+namespace doris::format::max_compute {
+
+class MaxComputeJniReader final : public format::JniTableReader {
+public:
+    explicit MaxComputeJniReader(const doris::MaxComputeTableDescriptor* table_desc);
+    ~MaxComputeJniReader() override = default;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+    Status build_jni_columns(
+            std::vector<format::JniTableReader::JniColumn>* columns) const override;
+    Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override;
+
+private:
+    const doris::MaxComputeTableDescriptor* _table_desc = nullptr;
+};
+
+} // namespace doris::format::max_compute
diff --git a/be/src/format_v2/jni/paimon_jni_reader.cpp b/be/src/format_v2/jni/paimon_jni_reader.cpp
new file mode 100644
index 00000000000000..c68cc7b952a5d5
--- /dev/null
+++ b/be/src/format_v2/jni/paimon_jni_reader.cpp
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/paimon_jni_reader.h"
+
+#include <string_view>
+
+namespace doris::format::paimon {
+namespace {
+
+constexpr std::string_view PAIMON_OPTION_PREFIX = "paimon.";
+constexpr std::string_view HADOOP_OPTION_PREFIX = "hadoop.";
+
+} // namespace
+
+Status PaimonJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError("missing table_format_params for paimon jni reader");
+    }
+    if (!range.table_format_params.__isset.paimon_params) {
+        return Status::InternalError("missing paimon_params for paimon jni reader");
+    }
+    if (!range.table_format_params.paimon_params.__isset.paimon_split ||
+        range.table_format_params.paimon_params.paimon_split.empty()) {
+        return Status::InternalError(
+                "missing paimon_split for paimon jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (!range.table_format_params.paimon_params.__isset.reader_type ||
+        range.table_format_params.paimon_params.reader_type != TPaimonReaderType::PAIMON_JNI) {
+        return Status::InternalError(
+                "invalid reader_type for paimon jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (_scan_params == nullptr || !_scan_params->__isset.serialized_table ||
+        _scan_params->serialized_table.empty()) {
+        return Status::InternalError(
+                "missing serialized_table for paimon jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!_scan_params->__isset.paimon_predicate || _scan_params->paimon_predicate.empty()) {
+        return Status::InternalError(
+                "missing paimon_predicate for paimon jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    return Status::OK();
+}
+
+std::string PaimonJniReader::connector_class() const {
+    return "org/apache/doris/paimon/PaimonJniScanner";
+}
+
+Status PaimonJniReader::build_scanner_params(std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    DORIS_CHECK(_scan_params != nullptr);
+    params->clear();
+
+    const auto& paimon_params = _current_range.table_format_params.paimon_params;
+    (*params)["paimon_split"] = paimon_params.paimon_split;
+    (*params)["paimon_predicate"] = _scan_params->paimon_predicate;
+    (*params)["serialized_table"] = _scan_params->serialized_table;
+
+    if (_scan_params->__isset.paimon_options && !_scan_params->paimon_options.empty()) {
+        for (const auto& kv : _scan_params->paimon_options) {
+            (*params)[std::string(PAIMON_OPTION_PREFIX) + kv.first] = kv.second;
+        }
+    }
+    if (_scan_params->__isset.properties && !_scan_params->properties.empty()) {
+        for (const auto& kv : _scan_params->properties) {
+            (*params)[std::string(HADOOP_OPTION_PREFIX) + kv.first] = kv.second;
+        }
+    }
+    // TODO: Remove legacy split-level paimon_predicate, paimon_options and hadoop_conf from thrift
+    // after all readers stop using them. Format V2 Paimon JNI consumes the scan-level fields
+    // planned by current FE and intentionally does not fall back to deprecated split-level fields.
+    return Status::OK();
+}
+
+} // namespace doris::format::paimon
diff --git a/be/src/format_v2/jni/paimon_jni_reader.h b/be/src/format_v2/jni/paimon_jni_reader.h
new file mode 100644
index 00000000000000..f789edb0b17bd0
--- /dev/null
+++ b/be/src/format_v2/jni/paimon_jni_reader.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::paimon {
+
+class PaimonJniReader final : public format::JniTableReader {
+public:
+    ~PaimonJniReader() override = default;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+};
+
+} // namespace doris::format::paimon
diff --git a/be/src/format_v2/jni/trino_connector_jni_reader.cpp b/be/src/format_v2/jni/trino_connector_jni_reader.cpp
new file mode 100644
index 00000000000000..11c9945c5dea16
--- /dev/null
+++ b/be/src/format_v2/jni/trino_connector_jni_reader.cpp
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/trino_connector_jni_reader.h"
+
+#include <string_view>
+
+#include "common/config.h"
+#include "util/jni-util.h"
+
+namespace doris::format::trino_connector {
+namespace {
+
+constexpr std::string_view TRINO_CONNECTOR_OPTION_PREFIX = "trino.";
+constexpr std::string_view TRINO_CONNECTOR_NAME = "connector.name";
+
+} // namespace
+
+Status TrinoConnectorJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError("missing table_format_params for trino connector jni reader");
+    }
+    if (!range.table_format_params.__isset.trino_connector_params) {
+        return Status::InternalError(
+                "missing trino_connector_params for trino connector jni reader");
+    }
+
+    const auto& trino_params = range.table_format_params.trino_connector_params;
+    if (!trino_params.__isset.catalog_name || trino_params.catalog_name.empty()) {
+        return Status::InternalError(
+                "missing catalog_name for trino connector jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_options ||
+        !trino_params.trino_connector_options.contains(std::string(TRINO_CONNECTOR_NAME))) {
+        return Status::InternalError(
+                "missing trino connector.name option for trino connector jni reader, possibly "
+                "caused by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_split || trino_params.trino_connector_split.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_split for trino connector jni reader, possibly caused "
+                "by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_table_handle ||
+        trino_params.trino_connector_table_handle.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_table_handle for trino connector jni reader, possibly "
+                "caused by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_column_handles ||
+        trino_params.trino_connector_column_handles.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_column_handles for trino connector jni reader, possibly "
+                "caused by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_column_metadata ||
+        trino_params.trino_connector_column_metadata.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_column_metadata for trino connector jni reader, possibly "
+                "caused by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_trascation_handle ||
+        trino_params.trino_connector_trascation_handle.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_trascation_handle for trino connector jni reader, "
+                "possibly caused by FE/BE protocol mismatch");
+    }
+    return Status::OK();
+}
+
+Status TrinoConnectorJniReader::prepare_split(const format::SplitReadOptions& options) {
+    RETURN_IF_ERROR(validate_scan_range(options.current_range));
+    RETURN_IF_ERROR(_set_spi_plugins_dir());
+    return format::JniTableReader::prepare_split(options);
+}
+
+std::string TrinoConnectorJniReader::connector_class() const {
+    return "org/apache/doris/trinoconnector/TrinoConnectorJniScanner";
+}
+
+Status TrinoConnectorJniReader::build_scanner_params(
+        std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    params->clear();
+
+    const auto& trino_params = _current_range.table_format_params.trino_connector_params;
+    (*params)["catalog_name"] = trino_params.catalog_name;
+    (*params)["db_name"] = trino_params.db_name;
+    (*params)["table_name"] = trino_params.table_name;
+    (*params)["trino_connector_split"] = trino_params.trino_connector_split;
+    (*params)["trino_connector_table_handle"] = trino_params.trino_connector_table_handle;
+    (*params)["trino_connector_column_handles"] = trino_params.trino_connector_column_handles;
+    (*params)["trino_connector_column_metadata"] = trino_params.trino_connector_column_metadata;
+    (*params)["trino_connector_predicate"] = trino_params.trino_connector_predicate;
+    (*params)["trino_connector_trascation_handle"] = trino_params.trino_connector_trascation_handle;
+
+    for (const auto& kv : trino_params.trino_connector_options) {
+        (*params)[std::string(TRINO_CONNECTOR_OPTION_PREFIX) + kv.first] = kv.second;
+    }
+    return Status::OK();
+}
+
+Status TrinoConnectorJniReader::_set_spi_plugins_dir() const {
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+
+    Jni::LocalClass plugin_loader_cls;
+    const std::string plugin_loader_class =
+            "org/apache/doris/trinoconnector/TrinoConnectorPluginLoader";
+    RETURN_IF_ERROR(
+            Jni::Util::get_jni_scanner_class(env, plugin_loader_class.c_str(), &plugin_loader_cls));
+
+    Jni::MethodId set_plugins_dir_method;
+    RETURN_IF_ERROR(plugin_loader_cls.get_static_method(
+            env, "setPluginsDir", "(Ljava/lang/String;)V", &set_plugins_dir_method));
+
+    Jni::LocalString trino_connector_plugin_path;
+    RETURN_IF_ERROR(Jni::LocalString::new_string(
+            env, doris::config::trino_connector_plugin_dir.c_str(), &trino_connector_plugin_path));
+
+    return plugin_loader_cls.call_static_void_method(env, set_plugins_dir_method)
+            .with_arg(trino_connector_plugin_path)
+            .call();
+}
+
+} // namespace doris::format::trino_connector
diff --git a/be/src/format_v2/jni/trino_connector_jni_reader.h b/be/src/format_v2/jni/trino_connector_jni_reader.h
new file mode 100644
index 00000000000000..a20c3a5f62ef96
--- /dev/null
+++ b/be/src/format_v2/jni/trino_connector_jni_reader.h
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::trino_connector {
+
+class TrinoConnectorJniReader final : public format::JniTableReader {
+public:
+    ~TrinoConnectorJniReader() override = default;
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+
+private:
+    Status _set_spi_plugins_dir() const;
+};
+
+} // namespace doris::format::trino_connector
diff --git a/be/src/format_v2/json/json_reader.cpp b/be/src/format_v2/json/json_reader.cpp
new file mode 100644
index 00000000000000..04f74f52aefa9a
--- /dev/null
+++ b/be/src/format_v2/json/json_reader.cpp
@@ -0,0 +1,1145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/json/json_reader.h"
+
+#include <rapidjson/document.h>
+
+#include <algorithm>
+#include <cstring>
+#include <limits>
+#include <map>
+#include <string_view>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_array.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/vexpr_context.h"
+#include "format/file_reader/new_plain_text_line_reader.h"
+#include "format_v2/column_mapper.h"
+#include "io/file_factory.h"
+#include "io/fs/file_reader.h"
+#include "io/fs/stream_load_pipe.h"
+#include "io/fs/tracing_file_reader.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/decompressor.h"
+#include "util/slice.h"
+
+namespace doris::format::json {
+namespace {
+
+DataTypePtr json_file_type_from_slot_type(const DataTypePtr& type) {
+    if (type == nullptr) {
+        return nullptr;
+    }
+
+    // Text-like file readers expose CHAR/VARCHAR as STRING and let the table column mapper cast to
+    // the destination slot type. JSON follows the same file-schema convention so that v2 mapping
+    // behaves consistently across text formats.
+    const bool is_nullable = type->is_nullable();
+    const auto nested_type = remove_nullable(type);
+    DataTypePtr file_type;
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_CHAR:
+    case TYPE_VARCHAR:
+        file_type = std::make_shared<DataTypeString>();
+        break;
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        file_type = std::make_shared<DataTypeArray>(
+                json_file_type_from_slot_type(array_type->get_nested_type()));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        file_type = std::make_shared<DataTypeMap>(
+                json_file_type_from_slot_type(map_type->get_key_type()),
+                json_file_type_from_slot_type(map_type->get_value_type()));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        DataTypes file_children;
+        file_children.reserve(struct_type->get_elements().size());
+        for (const auto& child_type : struct_type->get_elements()) {
+            file_children.push_back(json_file_type_from_slot_type(child_type));
+        }
+        file_type =
+                std::make_shared<DataTypeStruct>(file_children, struct_type->get_element_names());
+        break;
+    }
+    default:
+        file_type = nested_type;
+        break;
+    }
+
+    return is_nullable ? make_nullable(file_type) : file_type;
+}
+
+ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id);
+
+std::vector<ColumnDefinition> synthesize_file_children_from_type(const DataTypePtr& type) {
+    std::vector<ColumnDefinition> children;
+    if (type == nullptr) {
+        return children;
+    }
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        children.push_back(synthetic_file_child("element", array_type->get_nested_type(), 0));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        children.push_back(synthetic_file_child("key", map_type->get_key_type(), 0));
+        children.push_back(synthetic_file_child("value", map_type->get_value_type(), 1));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        children.reserve(struct_type->get_elements().size());
+        for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) {
+            children.push_back(synthetic_file_child(struct_type->get_element_name(idx),
+                                                    struct_type->get_element(idx),
+                                                    cast_set<int32_t>(idx)));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return children;
+}
+
+ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id) {
+    ColumnDefinition child;
+    child.identifier = Field::create_field<TYPE_STRING>(name);
+    child.local_id = local_id;
+    child.name = name;
+    child.type = std::move(type);
+    child.children = synthesize_file_children_from_type(child.type);
+    return child;
+}
+
+std::string lower_key(std::string_view key) {
+    std::string lowered(key.data(), key.size());
+    std::transform(lowered.begin(), lowered.end(), lowered.begin(), ::tolower);
+    return lowered;
+}
+
+} // namespace
+
+JsonReader::JsonReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                       std::unique_ptr<io::FileDescription>& file_description,
+                       std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                       const TFileScanRangeParams* scan_params, const TFileRangeDesc& range,
+                       const std::vector<SlotDescriptor*>& file_slot_descs,
+                       TFileCompressType::type range_compress_type,
+                       std::optional<TUniqueId> stream_load_id)
+        : FileReader(system_properties, file_description, std::move(io_ctx), profile),
+          _scan_params(scan_params),
+          _range(range),
+          _source_file_slot_descs(file_slot_descs),
+          _range_compress_type(range_compress_type),
+          _stream_load_id(std::move(stream_load_id)) {}
+
+JsonReader::~JsonReader() {
+    static_cast<void>(close());
+}
+
+Status JsonReader::init(RuntimeState* state) {
+    _runtime_state = state;
+    if (_scan_params == nullptr) {
+        return Status::InvalidArgument("JSON v2 reader requires scan params");
+    }
+    if (_file_description == nullptr) {
+        return Status::InvalidArgument("JSON v2 reader requires file description");
+    }
+    if (_runtime_state == nullptr) {
+        return Status::InvalidArgument("JSON v2 reader requires runtime state");
+    }
+    if (!_scan_params->__isset.file_attributes) {
+        return Status::InvalidArgument("JSON v2 reader requires file attributes");
+    }
+
+    const auto& attributes = _scan_params->file_attributes;
+    if (attributes.__isset.text_params && attributes.text_params.__isset.line_delimiter) {
+        _line_delimiter = attributes.text_params.line_delimiter;
+    } else {
+        _line_delimiter = "\n";
+    }
+    _line_delimiter_length = _line_delimiter.size();
+    _jsonpaths = attributes.__isset.jsonpaths ? attributes.jsonpaths : "";
+    _json_root = attributes.__isset.json_root ? attributes.json_root : "";
+    _read_json_by_line = attributes.__isset.read_json_by_line && attributes.read_json_by_line;
+    _strip_outer_array = attributes.__isset.strip_outer_array && attributes.strip_outer_array;
+    _num_as_string = attributes.__isset.num_as_string && attributes.num_as_string;
+    _fuzzy_parse = attributes.__isset.fuzzy_parse && attributes.fuzzy_parse;
+    _openx_json_ignore_malformed = attributes.__isset.openx_json_ignore_malformed &&
+                                   attributes.openx_json_ignore_malformed;
+    _is_hive_table = _range.table_format_params.table_format_type == "hive";
+    _file_compress_type = _range_compress_type != TFileCompressType::UNKNOWN
+                                  ? _range_compress_type
+                                  : _scan_params->compress_type;
+
+    _source_serdes = create_data_type_serdes(_source_file_slot_descs);
+    _file_schema.clear();
+    _file_schema.reserve(_source_file_slot_descs.size());
+    // JSON has no physical footer schema. The FE file slots are therefore the authoritative schema
+    // for both field names and source local ids.
+    for (size_t idx = 0; idx < _source_file_slot_descs.size(); ++idx) {
+        const auto* slot = _source_file_slot_descs[idx];
+        DORIS_CHECK(slot != nullptr);
+        ColumnDefinition field;
+        field.identifier = Field::create_field<TYPE_STRING>(slot->col_name());
+        field.local_id = cast_set<int32_t>(idx);
+        field.name = slot->col_name();
+        field.type = json_file_type_from_slot_type(slot->get_data_type_ptr());
+        field.children = synthesize_file_children_from_type(field.type);
+        _file_schema.push_back(std::move(field));
+    }
+    _eof = false;
+    return Status::OK();
+}
+
+Status JsonReader::get_schema(std::vector<ColumnDefinition>* file_schema) const {
+    if (file_schema == nullptr) {
+        return Status::InvalidArgument("JSON v2 file_schema is null");
+    }
+    *file_schema = _file_schema;
+    return Status::OK();
+}
+
+std::unique_ptr<TableColumnMapper> JsonReader::create_column_mapper(
+        TableColumnMapperOptions options) const {
+    return std::make_unique<MaterializedColumnMapper>(std::move(options));
+}
+
+Status JsonReader::open(std::shared_ptr<FileScanRequest> request) {
+    RETURN_IF_ERROR(FileReader::open(std::move(request)));
+    DORIS_CHECK(_request != nullptr);
+    RETURN_IF_ERROR(_build_requested_columns(*_request, &_requested_columns));
+    _slot_name_to_index.clear();
+    _slot_name_to_index.reserve(_requested_columns.size());
+    for (size_t idx = 0; idx < _requested_columns.size(); ++idx) {
+        auto name = _requested_columns[idx].slot_desc->col_name();
+        _slot_name_to_index.emplace(_is_hive_table ? lower_key(name) : name, idx);
+    }
+    _previous_positions.clear();
+    _reader_range = _json_range();
+    RETURN_IF_ERROR(_open_file_reader());
+    RETURN_IF_ERROR(_create_decompressor());
+    if (_read_json_by_line) {
+        RETURN_IF_ERROR(_create_line_reader());
+    }
+    RETURN_IF_ERROR(_parse_jsonpath_and_json_root());
+    _json_parser = std::make_unique<simdjson::ondemand::parser>();
+    _padding_buffer.resize(_padded_size);
+    _reader_eof = false;
+    _single_document_read = false;
+    _eof = false;
+    return Status::OK();
+}
+
+Status JsonReader::get_block(Block* file_block, size_t* rows, bool* eof) {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    if (_json_parser == nullptr || _physical_file_reader == nullptr) {
+        return Status::InternalError("JSON v2 reader is not open");
+    }
+
+    const auto batch_size = _runtime_state->batch_size();
+    const auto max_block_bytes = _runtime_state->preferred_block_size_bytes();
+    *rows = 0;
+    *eof = false;
+
+    while (file_block->rows() < batch_size && !_reader_eof &&
+           file_block->bytes() < max_block_bytes) {
+        if (_read_json_by_line && _skip_first_line) {
+            size_t skipped_size = 0;
+            const uint8_t* skipped_line = nullptr;
+            RETURN_IF_ERROR(_line_reader->read_line(&skipped_line, &skipped_size, &_reader_eof,
+                                                    _io_ctx.get()));
+            _skip_first_line = false;
+            continue;
+        }
+
+        const size_t original_rows = file_block->rows();
+        size_t size = 0;
+        bool is_empty_row = false;
+        Status st = Status::OK();
+        try {
+            st = _parse_next_json(&size, &_reader_eof);
+            if (st.ok() && !_reader_eof) {
+                if (size == 0) {
+                    is_empty_row = true;
+                } else {
+                    st = _extract_json_value(size, &_reader_eof, &is_empty_row);
+                }
+            }
+            if (st.ok() && !_reader_eof && !is_empty_row) {
+                st = _append_rows_from_current_value(file_block, &is_empty_row, &_reader_eof);
+            }
+        } catch (simdjson::simdjson_error& e) {
+            st = Status::DataQualityError("Parse json data failed. code: {}, error info: {}",
+                                          e.error(), e.what());
+        }
+        if (!st.ok()) {
+            RETURN_IF_ERROR(_handle_json_error(st, file_block, original_rows, &is_empty_row));
+        }
+        // An ignored or empty JSON object can produce no row. Avoid spinning forever on a document
+        // that was consumed but produced no materialized value.
+        if (!is_empty_row && file_block->rows() == original_rows) {
+            break;
+        }
+    }
+
+    *rows = file_block->rows();
+    RETURN_IF_ERROR(_apply_filters(file_block, rows));
+    _reader_statistics.read_rows += *rows;
+    *eof = _reader_eof && *rows == 0;
+    _eof = *eof;
+    return Status::OK();
+}
+
+Status JsonReader::close() {
+    if (_line_reader != nullptr) {
+        _line_reader->close();
+        _line_reader.reset();
+    }
+    _json_parser.reset();
+    _decompressor.reset();
+    _physical_file_reader.reset();
+    _tracing_file_reader.reset();
+    _file_reader.reset();
+    _requested_columns.clear();
+    _slot_name_to_index.clear();
+    _previous_positions.clear();
+    _cached_string_values.clear();
+    return Status::OK();
+}
+
+Status JsonReader::_build_requested_columns(const FileScanRequest& request,
+                                            std::vector<RequestedColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    // FileScanRequest stores a map from file-local id to output block position. Materialization is
+    // position-driven, so normalize it into a dense vector ordered by block position while keeping
+    // the original source index for jsonpaths.
+    std::vector<RequestedColumn> by_position(request.local_positions.size());
+    for (const auto& [file_column_id, block_position] : request.local_positions) {
+        if (file_column_id.value() < 0 ||
+            static_cast<size_t>(file_column_id.value()) >= _source_file_slot_descs.size()) {
+            return Status::InvalidArgument("JSON v2 request references unknown local column id {}",
+                                           file_column_id.value());
+        }
+        if (block_position.value() >= by_position.size()) {
+            return Status::InvalidArgument("JSON v2 request has invalid block position {}",
+                                           block_position.value());
+        }
+        const auto source_index = cast_set<size_t>(file_column_id.value());
+        RequestedColumn requested_column;
+        requested_column.file_column_id = file_column_id;
+        requested_column.block_position = block_position;
+        requested_column.source_index = source_index;
+        requested_column.slot_desc = _source_file_slot_descs[source_index];
+        requested_column.serde = _source_serdes[source_index];
+        by_position[block_position.value()] = std::move(requested_column);
+    }
+    for (size_t pos = 0; pos < by_position.size(); ++pos) {
+        if (!by_position[pos].file_column_id.is_valid()) {
+            return Status::InvalidArgument("JSON v2 request misses block position {}", pos);
+        }
+    }
+    *columns = std::move(by_position);
+    return Status::OK();
+}
+
+TFileRangeDesc JsonReader::_json_range() const {
+    auto range = _range;
+    range.__set_path(_file_description->path);
+    range.__set_start_offset(_file_description->range_start_offset);
+    range.__set_size(_file_description->range_size);
+    if (_file_description->file_size >= 0) {
+        range.__set_file_size(_file_description->file_size);
+    }
+    if (!_file_description->fs_name.empty()) {
+        range.__set_fs_name(_file_description->fs_name);
+    }
+    range.__set_file_cache_admission(_file_description->file_cache_admission);
+    if (_range_compress_type != TFileCompressType::UNKNOWN) {
+        range.__set_compress_type(_range_compress_type);
+    }
+    if (_stream_load_id.has_value()) {
+        range.__set_load_id(*_stream_load_id);
+    }
+    return range;
+}
+
+Status JsonReader::_open_file_reader() {
+    _current_offset = _reader_range.start_offset;
+    if (_current_offset != 0) {
+        --_current_offset;
+    }
+    if (_scan_params->file_type == TFileType::FILE_STREAM) {
+        if (!_stream_load_id.has_value()) {
+            return Status::InvalidArgument("JSON v2 stream reader requires load id");
+        }
+        RETURN_IF_ERROR(FileFactory::create_pipe_reader(*_stream_load_id, &_physical_file_reader,
+                                                        _runtime_state, /*need_schema=*/false));
+    } else {
+        _file_description->mtime =
+                _reader_range.__isset.modification_time ? _reader_range.modification_time : 0;
+        auto reader_options = FileFactory::get_reader_options(_runtime_state->query_options(),
+                                                              *_file_description);
+        auto file_reader = DORIS_TRY(FileFactory::create_file_reader(
+                *_system_properties, *_file_description, reader_options, _profile));
+        _physical_file_reader =
+                _io_ctx && _io_ctx->file_reader_stats
+                        ? std::make_shared<io::TracingFileReader>(std::move(file_reader),
+                                                                  _io_ctx->file_reader_stats)
+                        : file_reader;
+    }
+    _file_reader = _physical_file_reader;
+    _tracing_file_reader = _physical_file_reader;
+    return Status::OK();
+}
+
+Status JsonReader::_create_decompressor() {
+    return Decompressor::create_decompressor(_file_compress_type, &_decompressor);
+}
+
+Status JsonReader::_create_line_reader() {
+    int64_t size = _reader_range.size;
+    if (_reader_range.start_offset != 0) {
+        // Start one byte earlier and discard the first partial line, matching split semantics used
+        // by text readers.
+        ++size;
+        _skip_first_line = true;
+    } else {
+        _skip_first_line = false;
+    }
+    _line_reader = NewPlainTextLineReader::create_unique(
+            _profile, _physical_file_reader, _decompressor.get(),
+            std::make_shared<PlainTextLineReaderCtx>(_line_delimiter, _line_delimiter_length,
+                                                     false),
+            size, _current_offset);
+    return Status::OK();
+}
+
+Status JsonReader::_parse_jsonpath_and_json_root() {
+    _parsed_jsonpaths.clear();
+    _parsed_json_root.clear();
+    if (!_jsonpaths.empty()) {
+        rapidjson::Document jsonpaths_doc;
+        if (jsonpaths_doc.Parse(_jsonpaths.c_str(), _jsonpaths.length()).HasParseError() ||
+            !jsonpaths_doc.IsArray()) {
+            return Status::InvalidJsonPath("Invalid json path: {}", _jsonpaths);
+        }
+        for (int i = 0; i < jsonpaths_doc.Size(); ++i) {
+            const rapidjson::Value& path = jsonpaths_doc[i];
+            if (!path.IsString()) {
+                return Status::InvalidJsonPath("Invalid json path: {}", _jsonpaths);
+            }
+            std::string json_path = path.GetString();
+            if (json_path.size() == 1 && json_path[0] == '$') {
+                json_path.insert(1, ".");
+            }
+            std::vector<JsonPath> parsed_paths;
+            JsonFunctions::parse_json_paths(json_path, &parsed_paths);
+            _parsed_jsonpaths.push_back(std::move(parsed_paths));
+        }
+    }
+    if (!_json_root.empty()) {
+        std::string json_root = _json_root;
+        if (json_root.size() == 1 && json_root[0] == '$') {
+            json_root.insert(1, ".");
+        }
+        JsonFunctions::parse_json_paths(json_root, &_parsed_json_root);
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_read_one_document(size_t* size, bool* eof) {
+    DORIS_CHECK(size != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    *size = 0;
+    *eof = false;
+    if (_line_reader != nullptr) {
+        const uint8_t* line = nullptr;
+        RETURN_IF_ERROR(_line_reader->read_line(&line, size, eof, _io_ctx.get()));
+        if (*eof) {
+            return Status::OK();
+        }
+        _document_buffer.assign(reinterpret_cast<const char*>(line), *size);
+        return Status::OK();
+    }
+    // Non-line mode treats the split as one JSON document. This supports a single object or an
+    // array with strip_outer_array=true.
+    if (_single_document_read) {
+        *eof = true;
+        return Status::OK();
+    }
+    _single_document_read = true;
+    if (_scan_params->file_type == TFileType::FILE_STREAM) {
+        return _read_one_document_from_pipe(size);
+    }
+
+    auto read_size = _reader_range.size;
+    if (read_size <= 0 && _reader_range.__isset.file_size) {
+        read_size = _reader_range.file_size - _current_offset;
+    }
+    if (read_size <= 0) {
+        *eof = true;
+        return Status::OK();
+    }
+    _document_buffer.resize(cast_set<size_t>(read_size));
+    Slice result(_document_buffer.data(), _document_buffer.size());
+    RETURN_IF_ERROR(_physical_file_reader->read_at(_current_offset, result, size, _io_ctx.get()));
+    _document_buffer.resize(*size);
+    if (*size == 0) {
+        *eof = true;
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_read_one_document_from_pipe(size_t* read_size) {
+    auto* stream_load_pipe = dynamic_cast<io::StreamLoadPipe*>(_physical_file_reader.get());
+    if (stream_load_pipe == nullptr) {
+        return Status::InternalError("JSON v2 stream reader requires StreamLoadPipe");
+    }
+    DorisUniqueBufferPtr<uint8_t> file_buf;
+    RETURN_IF_ERROR(stream_load_pipe->read_one_message(&file_buf, read_size));
+    _document_buffer.assign(reinterpret_cast<const char*>(file_buf.get()), *read_size);
+    if (!stream_load_pipe->is_chunked_transfer()) {
+        return Status::OK();
+    }
+
+    while (true) {
+        DorisUniqueBufferPtr<uint8_t> next_buf;
+        size_t next_size = 0;
+        RETURN_IF_ERROR(stream_load_pipe->read_one_message(&next_buf, &next_size));
+        if (next_size == 0) {
+            break;
+        }
+        _document_buffer.append(reinterpret_cast<const char*>(next_buf.get()), next_size);
+        *read_size += next_size;
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_parse_next_json(size_t* size, bool* eof) {
+    RETURN_IF_ERROR(_read_one_document(size, eof));
+    if (*eof || *size == 0) {
+        return Status::OK();
+    }
+    if (*size >= 3 && static_cast<unsigned char>(_document_buffer[0]) == 0xEF &&
+        static_cast<unsigned char>(_document_buffer[1]) == 0xBB &&
+        static_cast<unsigned char>(_document_buffer[2]) == 0xBF) {
+        _document_buffer.erase(0, 3);
+        *size -= 3;
+    }
+    if (*size + simdjson::SIMDJSON_PADDING > _padded_size) {
+        _padded_size = *size + simdjson::SIMDJSON_PADDING;
+        _padding_buffer.resize(_padded_size);
+    }
+    // Ondemand values reference the input buffer. Keep the padded bytes in a member buffer until the
+    // current document is fully materialized.
+    std::memcpy(_padding_buffer.data(), _document_buffer.data(), *size);
+    _original_doc_size = *size;
+    const auto error =
+            _json_parser->iterate(std::string_view(_padding_buffer.data(), *size), _padded_size)
+                    .get(_original_json_doc);
+    if (error != simdjson::error_code::SUCCESS) {
+        return Status::DataQualityError(
+                "Parse json data for JsonDoc failed. code: {}, error info: {}", error,
+                simdjson::error_message(error));
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_extract_json_value(size_t size, bool* eof, bool* is_empty_row) {
+    DORIS_CHECK(eof != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    *is_empty_row = false;
+    if (size == 0 || *eof) {
+        *is_empty_row = true;
+        return Status::OK();
+    }
+    auto type_res = _original_json_doc.type();
+    if (type_res.error() != simdjson::error_code::SUCCESS) {
+        return Status::DataQualityError(
+                "Parse json data for JsonDoc failed. code: {}, error info: {}", type_res.error(),
+                simdjson::error_message(type_res.error()));
+    }
+    const auto type = type_res.value();
+    if (type != simdjson::ondemand::json_type::object &&
+        type != simdjson::ondemand::json_type::array) {
+        return Status::DataQualityError("Not an json object or json array");
+    }
+    _parsed_from_json_root = false;
+    if (!_parsed_json_root.empty() && type == simdjson::ondemand::json_type::object) {
+        // In object mode json_root can be applied once here. In outer-array mode each array element
+        // needs its own root extraction, which is handled while iterating the array.
+        simdjson::ondemand::object object = _original_json_doc;
+        Status st = JsonFunctions::extract_from_object(object, _parsed_json_root, &_json_value);
+        if (!st.ok()) {
+            return Status::DataQualityError("{}", st.to_string());
+        }
+        _parsed_from_json_root = true;
+    } else {
+        _json_value = _original_json_doc;
+    }
+
+    const auto value_type = _json_value.type().value();
+    if (value_type == simdjson::ondemand::json_type::array && !_strip_outer_array) {
+        return Status::DataQualityError(
+                "JSON data is array-object, `strip_outer_array` must be TRUE.");
+    }
+    if (value_type != simdjson::ondemand::json_type::array && _strip_outer_array) {
+        return Status::DataQualityError(
+                "JSON data is not an array-object, `strip_outer_array` must be FALSE.");
+    }
+    if (!_parsed_jsonpaths.empty() && _strip_outer_array &&
+        _json_value.count_elements().value() == 0) {
+        *is_empty_row = true;
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_append_rows_from_current_value(Block* block, bool* is_empty_row, bool* eof) {
+    if (_parsed_jsonpaths.empty()) {
+        return _append_simple_json_rows(block, is_empty_row, eof);
+    }
+    if (_strip_outer_array) {
+        return _append_flat_array_jsonpath_rows(block, is_empty_row, eof);
+    }
+    return _append_nested_jsonpath_row(block, is_empty_row, eof);
+}
+
+Status JsonReader::_append_simple_json_rows(Block* block, bool* is_empty_row, bool* eof) {
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    bool valid = false;
+    if (_json_value.type().value() == simdjson::ondemand::json_type::array) {
+        _array = _json_value.get_array();
+        if (_array.count_elements() == 0) {
+            *is_empty_row = true;
+            return Status::OK();
+        }
+        _array_iter = _array.begin();
+        while (_array_iter != _array.end()) {
+            simdjson::ondemand::object object_value = (*_array_iter).get_object();
+            RETURN_IF_ERROR(_set_column_values_from_object(&object_value, block, &valid));
+            ++_array_iter;
+            if (!valid) {
+                *is_empty_row = true;
+                return Status::OK();
+            }
+        }
+    } else {
+        simdjson::ondemand::object object_value = _json_value.get_object();
+        RETURN_IF_ERROR(_set_column_values_from_object(&object_value, block, &valid));
+        if (!valid) {
+            *is_empty_row = true;
+            return Status::OK();
+        }
+    }
+    *is_empty_row = false;
+    return Status::OK();
+}
+
+Status JsonReader::_append_flat_array_jsonpath_rows(Block* block, bool* is_empty_row, bool* eof) {
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    const size_t original_rows = block->rows();
+    bool valid = true;
+    _array = _json_value.get_array();
+    _array_iter = _array.begin();
+    while (_array_iter != _array.end()) {
+        simdjson::ondemand::object object_value = (*_array_iter).get_object();
+        if (!_parsed_from_json_root && !_parsed_json_root.empty()) {
+            // For strip_outer_array, json_root is evaluated against each element. Elements without
+            // the requested root do not produce rows, matching the load reader behavior.
+            simdjson::ondemand::value rooted_value;
+            Status st = JsonFunctions::extract_from_object(object_value, _parsed_json_root,
+                                                           &rooted_value);
+            if (!st.ok()) {
+                if (st.is<ErrorCode::NOT_FOUND>()) {
+                    ++_array_iter;
+                    continue;
+                }
+                return st;
+            }
+            if (rooted_value.type().value() != simdjson::ondemand::json_type::object) {
+                ++_array_iter;
+                continue;
+            }
+            object_value = rooted_value.get_object();
+        }
+        RETURN_IF_ERROR(_write_columns_by_jsonpath(&object_value, block, &valid));
+        ++_array_iter;
+    }
+    *is_empty_row = block->rows() == original_rows;
+    return Status::OK();
+}
+
+Status JsonReader::_append_nested_jsonpath_row(Block* block, bool* is_empty_row, bool* eof) {
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    if (_json_value.type().value() != simdjson::ondemand::json_type::object) {
+        return Status::DataQualityError("Not object item");
+    }
+    bool valid = true;
+    simdjson::ondemand::object object_value = _json_value.get_object();
+    RETURN_IF_ERROR(_write_columns_by_jsonpath(&object_value, block, &valid));
+    *is_empty_row = !valid;
+    return Status::OK();
+}
+
+Status JsonReader::_set_column_values_from_object(simdjson::ondemand::object* object_value,
+                                                  Block* block, bool* valid) {
+    DORIS_CHECK(object_value != nullptr);
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(valid != nullptr);
+    std::vector<bool> seen_columns(block->columns(), false);
+    const size_t cur_row_count = block->rows();
+    bool has_valid_value = false;
+    size_t key_index = 0;
+
+    for (auto field : *object_value) {
+        std::string_view key = field.unescaped_key().value();
+        const size_t column_index = _column_index(key, key_index++);
+        if (column_index == static_cast<size_t>(-1)) {
+            continue;
+        }
+        if (seen_columns[column_index]) {
+            if (_is_hive_table) {
+                // Hive JSON keeps the last duplicate key ignoring case. The earlier value has
+                // already been appended, so remove it before writing the replacement.
+                _pop_back_last_inserted_value(block, column_index);
+            } else {
+                continue;
+            }
+        }
+        simdjson::ondemand::value value = field.value().value();
+        const auto& requested = _requested_columns[column_index];
+        auto* column_ptr = block->get_by_position(column_index).column->assert_mutable().get();
+        RETURN_IF_ERROR(_write_data_to_column<false>(
+                value, requested.slot_desc->get_data_type_ptr(), column_ptr,
+                requested.slot_desc->col_name(), requested.serde, valid));
+        if (!*valid) {
+            return Status::OK();
+        }
+        seen_columns[column_index] = true;
+        has_valid_value = true;
+    }
+
+    for (size_t i = 0; i < _requested_columns.size(); ++i) {
+        if (seen_columns[i]) {
+            continue;
+        }
+        auto* column_ptr = block->get_by_position(i).column->assert_mutable().get();
+        RETURN_IF_ERROR(_fill_missing_column(_requested_columns[i], column_ptr, valid));
+        if (!*valid) {
+            _truncate_block_to_rows(block, cur_row_count);
+            return Status::OK();
+        }
+    }
+    *valid = true;
+    if (!has_valid_value) {
+        return Status::OK();
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_write_columns_by_jsonpath(simdjson::ondemand::object* object_value,
+                                              Block* block, bool* valid) {
+    DORIS_CHECK(object_value != nullptr);
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(valid != nullptr);
+    bool has_valid_value = false;
+    const size_t cur_row_count = block->rows();
+    _cached_string_values.clear();
+
+    for (size_t i = 0; i < _requested_columns.size(); ++i) {
+        const auto& requested = _requested_columns[i];
+        auto* column_ptr = block->get_by_position(i).column->assert_mutable().get();
+        simdjson::ondemand::value json_value;
+        Status st = Status::OK();
+        if (requested.source_index < _parsed_jsonpaths.size()) {
+            st = JsonFunctions::extract_from_object(
+                    *object_value, _parsed_jsonpaths[requested.source_index], &json_value);
+            if (!st.ok() && !st.is<ErrorCode::NOT_FOUND>()) {
+                return st;
+            }
+        }
+        if (_is_root_path_for_column(requested)) {
+            // A root jsonpath means "materialize the whole current JSON document" instead of a
+            // field under it. Use the original bytes so callers receive the same document text.
+            if (is_column_nullable(*column_ptr)) {
+                auto* nullable_column = assert_cast<ColumnNullable*>(column_ptr);
+                nullable_column->get_null_map_data().push_back(0);
+                auto* column_string =
+                        assert_cast<ColumnString*>(nullable_column->get_nested_column_ptr().get());
+                column_string->insert_data(_padding_buffer.data(), _original_doc_size);
+            } else {
+                auto* column_string = assert_cast<ColumnString*>(column_ptr);
+                column_string->insert_data(_padding_buffer.data(), _original_doc_size);
+            }
+            has_valid_value = true;
+        } else if (requested.source_index >= _parsed_jsonpaths.size() ||
+                   st.is<ErrorCode::NOT_FOUND>()) {
+            RETURN_IF_ERROR(_fill_missing_column(requested, column_ptr, valid));
+            if (!*valid) {
+                _truncate_block_to_rows(block, cur_row_count);
+                return Status::OK();
+            }
+        } else {
+            RETURN_IF_ERROR(_write_data_to_column<true>(
+                    json_value, requested.slot_desc->get_data_type_ptr(), column_ptr,
+                    requested.slot_desc->col_name(), requested.serde, valid));
+            if (!*valid) {
+                _truncate_block_to_rows(block, cur_row_count);
+                return Status::OK();
+            }
+            has_valid_value = true;
+        }
+    }
+
+    if (!has_valid_value) {
+        // jsonpaths can legally match nothing. Roll the row back so an all-missing path set does
+        // not create a synthetic row of nulls.
+        _truncate_block_to_rows(block, cur_row_count);
+        *valid = false;
+        return Status::OK();
+    }
+    *valid = true;
+    return Status::OK();
+}
+
+template <bool use_string_cache>
+Status JsonReader::_write_data_to_column(simdjson::ondemand::value& value,
+                                         const DataTypePtr& type_desc, IColumn* column_ptr,
+                                         const std::string& column_name,
+                                         const DataTypeSerDeSPtr& serde, bool* valid) {
+    ColumnNullable* nullable_column = nullptr;
+    IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+    const auto value_type = value.type().value();
+
+    if (is_column_nullable(*column_ptr)) {
+        nullable_column = assert_cast<ColumnNullable*>(column_ptr);
+        data_column_ptr = nullable_column->get_nested_column().get_ptr().get();
+        if (type_desc->is_nullable()) {
+            data_serde = serde->get_nested_serdes()[0];
+        }
+        if (value_type == simdjson::ondemand::json_type::null) {
+            nullable_column->insert_default();
+            *valid = true;
+            return Status::OK();
+        }
+    } else if (value_type == simdjson::ondemand::json_type::null) {
+        return Status::DataQualityError("Json value is null, but the column `{}` is not nullable.",
+                                        column_name);
+    }
+
+    const auto primitive_type = type_desc->get_primitive_type();
+    if (!is_complex_type(primitive_type)) {
+        if (value_type == simdjson::ondemand::json_type::string) {
+            std::string_view value_string;
+            if constexpr (use_string_cache) {
+                const auto cache_key = value.raw_json().value();
+                if (_cached_string_values.contains(cache_key)) {
+                    value_string = _cached_string_values[cache_key];
+                } else {
+                    value_string = value.get_string();
+                    _cached_string_values.emplace(cache_key, value_string);
+                }
+            } else {
+                value_string = value.get_string();
+            }
+            Slice slice {value_string.data(), value_string.size()};
+            RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice,
+                                                                       _serde_options));
+        } else if (value_type == simdjson::ondemand::json_type::boolean) {
+            const char* str_value = value.get_bool() ? "1" : "0";
+            Slice slice {str_value, 1};
+            RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice,
+                                                                       _serde_options));
+        } else {
+            std::string_view json_str = simdjson::to_json_string(value);
+            Slice slice {json_str.data(), json_str.size()};
+            RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice,
+                                                                       _serde_options));
+        }
+    } else if (primitive_type == TYPE_STRUCT) {
+        if (value_type != simdjson::ondemand::json_type::object) {
+            return Status::DataQualityError(
+                    "Json value isn't object, but the column `{}` is struct.", column_name);
+        }
+        const auto* type_struct =
+                assert_cast<const DataTypeStruct*>(remove_nullable(type_desc).get());
+        auto* struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);
+        const auto sub_serdes = data_serde->get_nested_serdes();
+        std::map<std::string, size_t> sub_col_name_to_idx;
+        for (size_t sub_col_idx = 0; sub_col_idx < type_struct->get_elements().size();
+             ++sub_col_idx) {
+            sub_col_name_to_idx.emplace(lower_key(type_struct->get_element_name(sub_col_idx)),
+                                        sub_col_idx);
+        }
+        std::vector<bool> has_value(type_struct->get_elements().size(), false);
+        simdjson::ondemand::object struct_value = value.get_object();
+        for (auto sub : struct_value) {
+            const auto sub_key = lower_key(sub.unescaped_key().value());
+            const auto it = sub_col_name_to_idx.find(sub_key);
+            if (it == sub_col_name_to_idx.end()) {
+                continue;
+            }
+            const auto sub_column_idx = it->second;
+            auto sub_column_ptr = struct_column_ptr->get_column(sub_column_idx).get_ptr();
+            if (has_value[sub_column_idx]) {
+                // Struct fields follow Hive-style duplicate handling: the last matching nested key
+                // wins. Remove the earlier nested value before appending the new one.
+                sub_column_ptr->pop_back(1);
+            }
+            has_value[sub_column_idx] = true;
+            auto sub_value = sub.value().value();
+            RETURN_IF_ERROR(_write_data_to_column<use_string_cache>(
+                    sub_value, type_struct->get_element(sub_column_idx), sub_column_ptr.get(),
+                    column_name + "." + sub_key, sub_serdes[sub_column_idx], valid));
+        }
+        for (size_t sub_col_idx = 0; sub_col_idx < type_struct->get_elements().size();
+             ++sub_col_idx) {
+            if (has_value[sub_col_idx]) {
+                continue;
+            }
+            auto sub_column_ptr = struct_column_ptr->get_column(sub_col_idx).get_ptr();
+            if (!is_column_nullable(*sub_column_ptr)) {
+                return Status::DataQualityError(
+                        "Json file structColumn miss field {} and this column isn't nullable.",
+                        column_name + "." + type_struct->get_element_name(sub_col_idx));
+            }
+            sub_column_ptr->insert_default();
+        }
+    } else if (primitive_type == TYPE_MAP) {
+        if (value_type != simdjson::ondemand::json_type::object) {
+            return Status::DataQualityError("Json value isn't object, but the column `{}` is map.",
+                                            column_name);
+        }
+        const auto* map_type = assert_cast<const DataTypeMap*>(remove_nullable(type_desc).get());
+        auto* map_column_ptr = assert_cast<ColumnMap*>(data_column_ptr);
+        const auto sub_serdes = data_serde->get_nested_serdes();
+        size_t field_count = 0;
+        simdjson::ondemand::object object_value = value.get_object();
+        for (auto member_value : object_value) {
+            auto* key_column = map_column_ptr->get_keys_ptr()->assert_mutable()->get_ptr().get();
+            auto key_serde = sub_serdes[0];
+            if (is_column_nullable(*key_column)) {
+                auto* nullable_key = assert_cast<ColumnNullable*>(key_column);
+                nullable_key->get_null_map_data().push_back(0);
+                key_column = nullable_key->get_nested_column().get_ptr().get();
+                if (map_type->get_key_type()->is_nullable()) {
+                    key_serde = key_serde->get_nested_serdes()[0];
+                }
+            }
+            std::string_view key_view = member_value.unescaped_key().value();
+            Slice key_slice(key_view.data(), key_view.size());
+            RETURN_IF_ERROR(key_serde->deserialize_one_cell_from_json(*key_column, key_slice,
+                                                                      _serde_options));
+            simdjson::ondemand::value field_value = member_value.value().value();
+            RETURN_IF_ERROR(_write_data_to_column<use_string_cache>(
+                    field_value, map_type->get_value_type(),
+                    map_column_ptr->get_values_ptr()->assert_mutable()->get_ptr().get(),
+                    column_name + ".value", sub_serdes[1], valid));
+            ++field_count;
+        }
+        auto& offsets = map_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + field_count);
+    } else if (primitive_type == TYPE_ARRAY) {
+        if (value_type != simdjson::ondemand::json_type::array) {
+            return Status::DataQualityError("Json value isn't array, but the column `{}` is array.",
+                                            column_name);
+        }
+        const auto* array_type =
+                assert_cast<const DataTypeArray*>(remove_nullable(type_desc).get());
+        auto* array_column_ptr = assert_cast<ColumnArray*>(data_column_ptr);
+        const auto sub_serdes = data_serde->get_nested_serdes();
+        size_t field_count = 0;
+        simdjson::ondemand::array array_value = value.get_array();
+        for (simdjson::ondemand::value sub_value : array_value) {
+            RETURN_IF_ERROR(_write_data_to_column<use_string_cache>(
+                    sub_value, array_type->get_nested_type(),
+                    array_column_ptr->get_data().get_ptr().get(), column_name + ".element",
+                    sub_serdes[0], valid));
+            ++field_count;
+        }
+        auto& offsets = array_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + field_count);
+    } else {
+        return Status::InternalError("Not support JSON value to complex column");
+    }
+
+    if (nullable_column && value_type != simdjson::ondemand::json_type::null) {
+        nullable_column->get_null_map_data().push_back(0);
+    }
+    *valid = true;
+    return Status::OK();
+}
+
+Status JsonReader::_fill_missing_column(const RequestedColumn& column, IColumn* column_ptr,
+                                        bool* valid) {
+    if (column.slot_desc->is_nullable()) {
+        auto* nullable_column = assert_cast<ColumnNullable*>(column_ptr);
+        nullable_column->insert_default();
+        *valid = true;
+        return Status::OK();
+    }
+    return Status::DataQualityError(
+            "The column `{}` is not nullable, but it's not found in jsondata.",
+            column.slot_desc->col_name());
+}
+
+Status JsonReader::_append_null_for_malformed_json(Block* block) {
+    DORIS_CHECK(block != nullptr);
+    for (int i = 0; i < block->columns(); ++i) {
+        auto& column_with_type = block->get_by_position(i);
+        if (!is_column_nullable(*column_with_type.column)) {
+            return Status::DataQualityError("malformed json, but the column `{}` is not nullable.",
+                                            column_with_type.column->get_name());
+        }
+        auto column = IColumn::mutate(std::move(column_with_type.column));
+        assert_cast<ColumnNullable*>(column.get())->insert_default();
+        column_with_type.column = std::move(column);
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_handle_json_error(const Status& status, Block* block, size_t original_rows,
+                                      bool* is_empty_row) {
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    // Deserialization can fail after several columns have already appended data. Always restore the
+    // block to the row count before this document before either surfacing the error or appending
+    // the ignore-malformed null row.
+    _truncate_block_to_rows(block, original_rows);
+    if (_openx_json_ignore_malformed && status.is<ErrorCode::DATA_QUALITY_ERROR>()) {
+        RETURN_IF_ERROR(_append_null_for_malformed_json(block));
+        *is_empty_row = false;
+        return Status::OK();
+    }
+    return status;
+}
+
+Status JsonReader::_apply_filters(Block* file_block, size_t* rows) {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    const size_t rows_before_filter = *rows;
+    size_t rows_after_delete_filter = rows_before_filter;
+    if (_request != nullptr && rows_before_filter > 0 && !_request->delete_conjuncts.empty()) {
+        RETURN_IF_ERROR(VExprContext::filter_block(_request->delete_conjuncts, file_block,
+                                                   file_block->columns()));
+        rows_after_delete_filter =
+                file_block->columns() == 0 ? rows_before_filter : file_block->rows();
+    }
+
+    size_t rows_after_filter = rows_after_delete_filter;
+    if (_request != nullptr && rows_after_delete_filter > 0 && !_request->conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_request->conjuncts, file_block, file_block->columns()));
+        rows_after_filter =
+                file_block->columns() == 0 ? rows_after_delete_filter : file_block->rows();
+        if (_io_ctx != nullptr) {
+            _io_ctx->predicate_filtered_rows += rows_after_delete_filter - rows_after_filter;
+        }
+    }
+    *rows = rows_after_filter;
+    return Status::OK();
+}
+
+void JsonReader::_truncate_block_to_rows(Block* block, size_t num_rows) {
+    DORIS_CHECK(block != nullptr);
+    for (int i = 0; i < block->columns(); ++i) {
+        auto& column_with_type = block->get_by_position(i);
+        auto column = IColumn::mutate(std::move(column_with_type.column));
+        if (column->size() > num_rows) {
+            column->pop_back(column->size() - num_rows);
+        }
+        column_with_type.column = std::move(column);
+    }
+}
+
+void JsonReader::_pop_back_last_inserted_value(Block* block, size_t column_index) {
+    DORIS_CHECK(block != nullptr);
+    auto& column = block->get_by_position(column_index).column;
+    auto mutable_column = IColumn::mutate(std::move(column));
+    mutable_column->pop_back(1);
+    column = std::move(mutable_column);
+}
+
+size_t JsonReader::_column_index(std::string_view key, size_t key_index) {
+    std::string hive_key;
+    std::string_view lookup_key = key;
+    if (_is_hive_table) {
+        hive_key = lower_key(key);
+        lookup_key = hive_key;
+    }
+    if (key_index < _previous_positions.size()) {
+        // Most JSON lines share field order. Reuse the previous line's key-position mapping before
+        // falling back to the hash table lookup.
+        const auto previous = _previous_positions[key_index];
+        if (previous < _requested_columns.size()) {
+            const auto previous_name = _requested_columns[previous].slot_desc->col_name();
+            if ((_is_hive_table ? lower_key(previous_name) : previous_name) == lookup_key) {
+                return previous;
+            }
+        }
+    }
+    const auto it = _slot_name_to_index.find(std::string(lookup_key));
+    if (it == _slot_name_to_index.end()) {
+        return static_cast<size_t>(-1);
+    }
+    if (key_index >= _previous_positions.size()) {
+        _previous_positions.resize(key_index + 1, static_cast<size_t>(-1));
+    }
+    _previous_positions[key_index] = it->second;
+    return it->second;
+}
+
+bool JsonReader::_is_root_path_for_column(const RequestedColumn& column) const {
+    return column.source_index < _parsed_jsonpaths.size() &&
+           JsonFunctions::is_root_path(_parsed_jsonpaths[column.source_index]);
+}
+
+} // namespace doris::format::json
diff --git a/be/src/format_v2/json/json_reader.h b/be/src/format_v2/json/json_reader.h
new file mode 100644
index 00000000000000..52cdfad6728d64
--- /dev/null
+++ b/be/src/format_v2/json/json_reader.h
@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <simdjson/simdjson.h> // IWYU pragma: keep
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+
+#include "core/custom_allocator.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "exprs/json_functions.h"
+#include "format_v2/file_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_profile.h"
+
+namespace doris {
+class Decompressor;
+class LineReader;
+class SlotDescriptor;
+class IColumn;
+} // namespace doris
+
+namespace doris::format::json {
+
+// FileScannerV2 JSON reader.
+//
+// JSON files do not carry an embedded physical schema. The v2 table layer still needs a
+// file-local schema and FileScanRequest contract, so this reader exposes FE-provided file slots as
+// v2 file-local columns and performs JSON parsing/materialization directly in the v2 path.
+class JsonReader final : public FileReader {
+public:
+    // `file_slot_descs` is the FE-planned file schema. JSON has no physical schema, so the reader
+    // exposes these slots as synthetic file-local columns and materializes only the columns
+    // requested by FileScanRequest.
+    JsonReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+               std::unique_ptr<io::FileDescription>& file_description,
+               std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+               const TFileScanRangeParams* scan_params, const TFileRangeDesc& range,
+               const std::vector<SlotDescriptor*>& file_slot_descs,
+               TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN,
+               std::optional<TUniqueId> stream_load_id = std::nullopt);
+    ~JsonReader() override;
+
+    // Initializes scan attributes and builds the synthetic schema from FE slots.
+    Status init(RuntimeState* state) override;
+    Status get_schema(std::vector<ColumnDefinition>* file_schema) const override;
+    std::unique_ptr<TableColumnMapper> create_column_mapper(
+            TableColumnMapperOptions options) const override;
+    // Opens the underlying file or stream and binds requested local column ids to output block
+    // positions. After this call, `get_block` can be called until it returns eof.
+    Status open(std::shared_ptr<FileScanRequest> request) override;
+    // Appends rows into `file_block` according to the FileScanRequest order. The block must already
+    // contain columns matching the requested positions.
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override;
+    Status close() override;
+
+private:
+    // A requested column keeps both identities:
+    // - `source_index`: index in FE file slots, used for jsonpaths and SerDe lookup.
+    // - `block_position`: index in the caller's output block, used for materialization.
+    struct RequestedColumn {
+        LocalColumnId file_column_id = LocalColumnId::invalid();
+        LocalIndex block_position;
+        size_t source_index = 0;
+        SlotDescriptor* slot_desc = nullptr;
+        DataTypeSerDeSPtr serde;
+    };
+
+    Status _build_requested_columns(const FileScanRequest& request,
+                                    std::vector<RequestedColumn>* columns) const;
+    // Reconciles TableReader's split/range descriptor with FileReader's concrete file description.
+    TFileRangeDesc _json_range() const;
+    Status _open_file_reader();
+    Status _create_decompressor();
+    Status _create_line_reader();
+    Status _parse_jsonpath_and_json_root();
+    // Reads one logical JSON document: one line for JSON Lines, or the whole range/pipe payload for
+    // single-document mode.
+    Status _read_one_document(size_t* size, bool* eof);
+    Status _read_one_document_from_pipe(size_t* read_size);
+    // Moves the logical document into a simdjson-padded buffer and creates an ondemand document.
+    Status _parse_next_json(size_t* size, bool* eof);
+    // Applies json_root and validates the object/array shape required by strip_outer_array.
+    Status _extract_json_value(size_t size, bool* eof, bool* is_empty_row);
+    Status _append_rows_from_current_value(Block* block, bool* is_empty_row, bool* eof);
+    Status _append_simple_json_rows(Block* block, bool* is_empty_row, bool* eof);
+    Status _append_flat_array_jsonpath_rows(Block* block, bool* is_empty_row, bool* eof);
+    Status _append_nested_jsonpath_row(Block* block, bool* is_empty_row, bool* eof);
+    Status _set_column_values_from_object(simdjson::ondemand::object* object_value, Block* block,
+                                          bool* valid);
+    Status _write_columns_by_jsonpath(simdjson::ondemand::object* object_value, Block* block,
+                                      bool* valid);
+    template <bool use_string_cache>
+    Status _write_data_to_column(simdjson::ondemand::value& value, const DataTypePtr& type_desc,
+                                 IColumn* column_ptr, const std::string& column_name,
+                                 const DataTypeSerDeSPtr& serde, bool* valid);
+    Status _fill_missing_column(const RequestedColumn& column, IColumn* column_ptr, bool* valid);
+    Status _append_null_for_malformed_json(Block* block);
+    Status _handle_json_error(const Status& status, Block* block, size_t original_rows,
+                              bool* is_empty_row);
+    Status _apply_filters(Block* file_block, size_t* rows);
+    void _truncate_block_to_rows(Block* block, size_t num_rows);
+    void _pop_back_last_inserted_value(Block* block, size_t column_index);
+    size_t _column_index(std::string_view key, size_t key_index);
+    bool _is_root_path_for_column(const RequestedColumn& column) const;
+
+    const TFileScanRangeParams* _scan_params = nullptr;
+    TFileRangeDesc _range;
+    TFileRangeDesc _reader_range;
+    std::vector<SlotDescriptor*> _source_file_slot_descs;
+    DataTypeSerDeSPtrs _source_serdes;
+    std::vector<ColumnDefinition> _file_schema;
+    RuntimeState* _runtime_state = nullptr;
+    TFileCompressType::type _range_compress_type = TFileCompressType::UNKNOWN;
+    std::optional<TUniqueId> _stream_load_id;
+    std::vector<RequestedColumn> _requested_columns;
+    std::unordered_map<std::string, size_t> _slot_name_to_index;
+    std::vector<size_t> _previous_positions;
+
+    io::FileReaderSPtr _physical_file_reader;
+    std::unique_ptr<Decompressor> _decompressor;
+    std::unique_ptr<LineReader> _line_reader;
+    int64_t _current_offset = 0;
+    bool _reader_eof = false;
+    bool _skip_first_line = false;
+    bool _single_document_read = false;
+
+    std::string _line_delimiter;
+    size_t _line_delimiter_length = 0;
+    std::string _jsonpaths;
+    std::string _json_root;
+    bool _read_json_by_line = false;
+    bool _strip_outer_array = false;
+    bool _num_as_string = false;
+    bool _fuzzy_parse = false;
+    bool _is_hive_table = false;
+    bool _openx_json_ignore_malformed = false;
+    TFileCompressType::type _file_compress_type = TFileCompressType::UNKNOWN;
+
+    std::vector<std::vector<JsonPath>> _parsed_jsonpaths;
+    std::vector<JsonPath> _parsed_json_root;
+    bool _parsed_from_json_root = false;
+    DataTypeSerDe::FormatOptions _serde_options;
+
+    // simdjson ondemand values point into `_padding_buffer`, so the buffer must outlive all values
+    // created from the current document.
+    std::unique_ptr<simdjson::ondemand::parser> _json_parser;
+    simdjson::ondemand::document _original_json_doc;
+    simdjson::ondemand::value _json_value;
+    simdjson::ondemand::array _array;
+    simdjson::ondemand::array_iterator _array_iter;
+    std::string _document_buffer;
+    std::string _padding_buffer;
+    size_t _original_doc_size = 0;
+    size_t _padded_size = 1024 * 1024 * 8 + simdjson::SIMDJSON_PADDING;
+    std::unordered_map<std::string_view, std::string_view> _cached_string_values;
+};
+
+} // namespace doris::format::json
diff --git a/be/src/format_v2/parquet/parquet_column_schema.cpp b/be/src/format_v2/parquet/parquet_column_schema.cpp
new file mode 100644
index 00000000000000..e73624f3828d62
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_column_schema.cpp
@@ -0,0 +1,501 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_column_schema.h"
+
+#include <parquet/api/schema.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "format_v2/parquet/parquet_type.h"
+
+namespace doris::format::parquet {
+namespace {
+
+// Schema 构建过程中的上下文，携带逐层累加的 Dremel level 状态。
+// child_context() 在递归过程中根据子节点的 optional/repeated 属性递增对应的 level。
+struct SchemaBuildContext {
+    int32_t local_id = -1;                          // 父节点内的 child ordinal
+    int16_t definition_level = 0;                   // 累计 optional/repeated 数
+    int16_t repetition_level = 0;                   // 累计 repeated 数
+    int16_t nullable_definition_level = 0;          // 最近 optional 节点的 def level
+    int16_t repeated_repetition_level = 0;          // 最近 repeated 节点的 rep level
+    int16_t repeated_ancestor_definition_level = 0; // 最近 repeated 节点的 def level
+};
+
+enum class SchemaBuildMode {
+    // Normal recursive schema build. Bare repeated fields are exposed as Doris ARRAY for
+    // protobuf/legacy Parquet compatibility, while repeated LIST/MAP annotated groups are rejected
+    // because Parquet LIST/MAP outer groups are not allowed to be repeated at a top-level or struct
+    // field boundary.
+    NORMAL,
+    // Build the current repeated node as the already-selected element of an enclosing LIST. This
+    // is the compatibility path for Arrow/parquet-format legacy two-level LIST encodings where the
+    // repeated node itself is the array element instead of a wrapper that should be stripped.
+    REPEATED_NODE_AS_LIST_ELEMENT,
+    // Build the current repeated group as a STRUCT element of an enclosing LIST, ignoring LIST/MAP
+    // annotations on the repeated group itself. This keeps compatibility with the old Doris
+    // Parquet schema parser for Hive/legacy wrappers named "array" or "<list_name>_tuple".
+    REPEATED_NODE_AS_STRUCT_ELEMENT,
+};
+
+// Result of applying Parquet LIST backward compatibility rules to the single repeated child of a
+// LIST-annotated group. The repeated child can either be a physical wrapper whose only child is the
+// element, or the element node itself.
+struct ListElementResolution {
+    // Parquet node that should be exposed as Doris ARRAY element.
+    const ::parquet::schema::Node* element_node = nullptr;
+    // Level state after consuming the LIST repeated child. The parent ARRAY schema keeps this state
+    // to materialize offsets, empty arrays and null arrays.
+    SchemaBuildContext repeated_context;
+    // Level state used to build element_node. This equals repeated_context when the repeated child
+    // itself is the element, and includes the wrapper's only child when standard 3-level LIST
+    // encoding is stripped.
+    SchemaBuildContext element_context;
+    // Build mode for element_node. Non-NORMAL modes mean element_node is the repeated child itself,
+    // and the repeated level must not be interpreted as a second unrelated array at the same
+    // boundary.
+    SchemaBuildMode element_build_mode = SchemaBuildMode::NORMAL;
+};
+
+// Resolved repeated entry group of a MAP-annotated group. The entry wrapper is a physical Parquet
+// encoding detail; Doris folds it into the parent MAP schema and exposes only direct [key, value]
+// children.
+struct MapEntryResolution {
+    const ::parquet::schema::GroupNode* entry_group = nullptr;
+    // Level state after consuming the repeated entry group. The parent MAP schema keeps this state
+    // to materialize offsets, empty maps and null maps.
+    SchemaBuildContext entry_context;
+};
+
+bool is_list_node(const ::parquet::schema::Node& node) {
+    const auto& logical_type = node.logical_type();
+    return node.converted_type() == ::parquet::ConvertedType::LIST ||
+           (logical_type != nullptr && logical_type->is_valid() && logical_type->is_list());
+}
+
+bool is_map_node(const ::parquet::schema::Node& node) {
+    const auto& logical_type = node.logical_type();
+    return node.converted_type() == ::parquet::ConvertedType::MAP ||
+           node.converted_type() == ::parquet::ConvertedType::MAP_KEY_VALUE ||
+           (logical_type != nullptr && logical_type->is_valid() && logical_type->is_map());
+}
+
+bool has_logical_annotation(const ::parquet::schema::Node& node) {
+    const auto& logical_type = node.logical_type();
+    return (node.converted_type() != ::parquet::ConvertedType::NONE &&
+            node.converted_type() != ::parquet::ConvertedType::UNDEFINED) ||
+           (logical_type != nullptr && logical_type->is_valid() && !logical_type->is_none());
+}
+
+bool has_structural_list_name(const std::string& list_name, const std::string& repeated_name) {
+    return repeated_name == "array" || repeated_name == list_name + "_tuple";
+}
+
+bool should_build_repeated_field_as_list(const ::parquet::schema::Node& node) {
+    return node.is_repeated() && !is_list_node(node) && !is_map_node(node);
+}
+
+DataTypePtr nullable_if_needed(DataTypePtr type, const ::parquet::schema::Node& node) {
+    return node.is_optional() ? make_nullable(type) : type;
+}
+
+void inherit_common_schema_state(const ::parquet::schema::Node& node,
+                                 const SchemaBuildContext& context,
+                                 ParquetColumnSchema* column_schema) {
+    DORIS_CHECK(column_schema != nullptr);
+    column_schema->local_id = context.local_id;
+    column_schema->parquet_field_id = node.field_id();
+    column_schema->name = node.name();
+    column_schema->max_definition_level = context.definition_level;
+    column_schema->max_repetition_level = context.repetition_level;
+    column_schema->nullable_definition_level = context.nullable_definition_level;
+    column_schema->definition_level = context.definition_level;
+    column_schema->repetition_level = context.repetition_level;
+    column_schema->repeated_ancestor_definition_level = context.repeated_ancestor_definition_level;
+    column_schema->repeated_repetition_level = context.repeated_repetition_level;
+}
+
+SchemaBuildContext child_context(const SchemaBuildContext& parent,
+                                 const ::parquet::schema::Node& child_node, int32_t child_idx) {
+    SchemaBuildContext result = parent;
+    result.local_id = child_idx;
+    if (child_node.repetition() == ::parquet::Repetition::OPTIONAL) {
+        result.definition_level++;
+        result.nullable_definition_level = result.definition_level;
+    }
+    if (child_node.is_repeated()) {
+        result.repetition_level++;
+        result.definition_level++;
+        result.repeated_repetition_level = result.repetition_level;
+        result.repeated_ancestor_definition_level = result.definition_level;
+    }
+    return result;
+}
+
+void propagate_child_levels(ParquetColumnSchema* column_schema) {
+    DORIS_CHECK(column_schema != nullptr);
+    for (const auto& child : column_schema->children) {
+        column_schema->max_definition_level =
+                std::max(column_schema->max_definition_level, child->max_definition_level);
+        column_schema->max_repetition_level =
+                std::max(column_schema->max_repetition_level, child->max_repetition_level);
+    }
+}
+
+// Mirrors Arrow's ResolveList() compatibility rules, but only decides which Parquet node is the
+// logical LIST element. The caller still builds Doris' semantic LIST->[element] schema tree.
+//
+// Important cases:
+// - repeated primitive: the primitive itself is the element (legacy two-level LIST).
+// - repeated group with multiple children: the group itself is a STRUCT element.
+// - repeated group named "array" or "<list_name>_tuple": the group itself is a STRUCT element per
+//   Parquet backward compatibility rules, even when it has one child or its own logical annotation.
+//   This also keeps v2 file-local schema aligned with Doris' old schema parser used by HDFS TVF.
+// - other repeated group with a logical annotation, or whose only child is repeated: the group
+//   itself is the element. This preserves nested LIST/MAP and repeated fields inside struct
+//   elements.
+// - otherwise, strip the one-child repeated wrapper as standard three-level LIST encoding.
+Status resolve_list_element_node(const ::parquet::schema::GroupNode& list_group,
+                                 const SchemaBuildContext& list_context,
+                                 ListElementResolution* result) {
+    if (result == nullptr) {
+        return Status::InvalidArgument("result is null");
+    }
+    if (list_group.field_count() != 1) {
+        return Status::NotSupported("Unsupported parquet LIST encoding for column {}",
+                                    list_group.name());
+    }
+    const auto& repeated_node = *list_group.field(0);
+    if (!repeated_node.is_repeated()) {
+        return Status::NotSupported("Unsupported parquet LIST encoding for column {}",
+                                    list_group.name());
+    }
+    result->repeated_context = child_context(list_context, repeated_node, 0);
+    if (repeated_node.is_primitive()) {
+        result->element_node = &repeated_node;
+        result->element_context = result->repeated_context;
+        result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT;
+        return Status::OK();
+    }
+
+    const auto& repeated_group = static_cast<const ::parquet::schema::GroupNode&>(repeated_node);
+    if (repeated_group.field_count() == 0) {
+        return Status::NotSupported("Unsupported parquet LIST element layout for column {}",
+                                    list_group.name());
+    }
+    const bool repeated_group_has_logical_annotation = has_logical_annotation(repeated_group);
+    if (repeated_group.field_count() > 1 ||
+        has_structural_list_name(list_group.name(), repeated_group.name())) {
+        result->element_node = &repeated_node;
+        result->element_context = result->repeated_context;
+        result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT;
+        return Status::OK();
+    }
+    if (repeated_group_has_logical_annotation) {
+        result->element_node = &repeated_node;
+        result->element_context = result->repeated_context;
+        result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT;
+        return Status::OK();
+    }
+
+    const auto& only_child = *repeated_group.field(0);
+    if (only_child.is_repeated()) {
+        result->element_node = &repeated_node;
+        result->element_context = result->repeated_context;
+        result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT;
+        return Status::OK();
+    }
+
+    result->element_node = &only_child;
+    result->element_context = child_context(result->repeated_context, only_child, 0);
+    return Status::OK();
+}
+
+// Resolves the repeated entry group of a MAP/MAP_KEY_VALUE node. Unlike LIST, MAP has no supported
+// two-level form in this reader: Doris requires a repeated group with exactly key and value
+// children, then folds that physical entry group out of ParquetColumnSchema. Some external writers
+// emit optional MAP keys even though standard Parquet MAP keys are required; keep the key's
+// definition levels and expose it as nullable for compatibility with the old reader.
+Status resolve_map_entry_group(const ::parquet::schema::GroupNode& map_group,
+                               const SchemaBuildContext& map_context, MapEntryResolution* result) {
+    if (result == nullptr) {
+        return Status::InvalidArgument("result is null");
+    }
+    if (map_group.field_count() != 1) {
+        return Status::NotSupported("Unsupported parquet MAP encoding for column {}",
+                                    map_group.name());
+    }
+    const auto& entry_node = *map_group.field(0);
+    if (!entry_node.is_repeated()) {
+        return Status::NotSupported("Unsupported parquet MAP encoding for column {}",
+                                    map_group.name());
+    }
+    if (entry_node.is_primitive()) {
+        return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}",
+                                    map_group.name());
+    }
+    const auto& entry_group = static_cast<const ::parquet::schema::GroupNode&>(entry_node);
+    if (entry_group.field_count() != 2) {
+        return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}",
+                                    map_group.name());
+    }
+    // The Parquet logical MAP spec requires key to be REQUIRED. Some legacy/Hive-written files
+    // still mark the key field OPTIONAL even when all actual keys are non-null, for example:
+    //
+    //   optional group t_map_varchar (MAP) {
+    //     repeated group key_value {
+    //       optional binary key (STRING);
+    //       optional binary value (STRING);
+    //     }
+    //   }
+    //
+    // Accept that schema here so compatible files can be read. MapColumnReader validates the
+    // materialized key column and rejects data that really contains null map keys.
+    result->entry_group = &entry_group;
+    result->entry_context = child_context(map_context, entry_node, 0);
+    return Status::OK();
+}
+
+Status build_node_schema_with_mode(const ::parquet::SchemaDescriptor& schema,
+                                   const ::parquet::schema::Node& node,
+                                   const SchemaBuildContext& context,
+                                   std::unique_ptr<ParquetColumnSchema>* result,
+                                   SchemaBuildMode mode);
+
+// Builds a semantic ARRAY schema for a bare repeated field. Arrow handles this in
+// NodeToSchemaField()/GroupToSchemaField(); Doris needs the same compatibility behavior because
+// protobuf and old parquet writers often encode repeated fields without a LIST annotation.
+//
+// Example:
+//   optional group event {
+//     repeated group links {
+//       optional binary url (UTF8);
+//       optional int32 rank;
+//     }
+//   }
+// Doris exposes event.links as ARRAY<STRUCT<url, rank>>, not STRUCT<url, rank>. This keeps v2's
+// file-local schema aligned with the old schema parser used by HDFS TVF schema fetching.
+//
+// When the repeated field appears inside an already resolved LIST element, only the nested repeated
+// child should be wrapped:
+//   optional group a (LIST) {
+//     repeated group element {
+//       repeated int32 items;
+//     }
+//   }
+// The outer LIST element is the repeated "element" group, and its repeated "items" child should be
+// represented as a field of type ARRAY<INT> inside the struct element.
+Status build_repeated_field_as_list_schema(const ::parquet::SchemaDescriptor& schema,
+                                           const ::parquet::schema::Node& repeated_node,
+                                           const SchemaBuildContext& repeated_context,
+                                           std::unique_ptr<ParquetColumnSchema>* result) {
+    if (result == nullptr) {
+        return Status::InvalidArgument("result is null");
+    }
+    auto list_schema = std::make_unique<ParquetColumnSchema>();
+    inherit_common_schema_state(repeated_node, repeated_context, list_schema.get());
+    list_schema->kind = ParquetColumnSchemaKind::LIST;
+    list_schema->definition_level = repeated_context.definition_level;
+    list_schema->repetition_level = repeated_context.repetition_level;
+    list_schema->repeated_repetition_level = repeated_context.repeated_repetition_level;
+
+    std::unique_ptr<ParquetColumnSchema> element_child;
+    RETURN_IF_ERROR(build_node_schema_with_mode(schema, repeated_node, repeated_context,
+                                                &element_child,
+                                                SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT));
+    element_child->name = "element";
+    list_schema->type = std::make_shared<DataTypeArray>(element_child->type);
+    list_schema->children.push_back(std::move(element_child));
+    propagate_child_levels(list_schema.get());
+    *result = std::move(list_schema);
+    return Status::OK();
+}
+
+// Recursively builds ParquetColumnSchema for the given schema node and its children in Parquet
+// file's metadata. NORMAL mode exposes bare repeated fields as ARRAY for legacy compatibility.
+// REPEATED_NODE_AS_LIST_ELEMENT mode means the current repeated node was already selected as an
+// enclosing LIST element, so only its nested bare repeated children should be wrapped.
+Status build_node_schema_with_mode(const ::parquet::SchemaDescriptor& schema,
+                                   const ::parquet::schema::Node& node,
+                                   const SchemaBuildContext& context,
+                                   std::unique_ptr<ParquetColumnSchema>* result,
+                                   SchemaBuildMode mode) {
+    if (result == nullptr) {
+        return Status::InvalidArgument("result is null");
+    }
+    if (mode == SchemaBuildMode::NORMAL && should_build_repeated_field_as_list(node)) {
+        return build_repeated_field_as_list_schema(schema, node, context, result);
+    }
+
+    auto column_schema = std::make_unique<ParquetColumnSchema>();
+    inherit_common_schema_state(node, context, column_schema.get());
+
+    if (node.is_primitive()) {
+        const int leaf_column_id = schema.ColumnIndex(node);
+        if (leaf_column_id < 0) {
+            return Status::InvalidArgument("Cannot find leaf column id for parquet column {}",
+                                           node.name());
+        }
+        column_schema->kind = ParquetColumnSchemaKind::PRIMITIVE;
+        column_schema->leaf_column_id = leaf_column_id;
+        column_schema->descriptor = schema.Column(leaf_column_id);
+        if (column_schema->descriptor != nullptr) {
+            column_schema->max_definition_level = column_schema->descriptor->max_definition_level();
+            column_schema->max_repetition_level = column_schema->descriptor->max_repetition_level();
+        }
+        column_schema->type_descriptor = resolve_parquet_type(column_schema->descriptor);
+        column_schema->type = column_schema->type_descriptor.doris_type;
+        if (column_schema->type == nullptr) {
+            if (!column_schema->type_descriptor.unsupported_reason.empty()) {
+                return Status::NotSupported("Unsupported parquet column '{}': {}", node.name(),
+                                            column_schema->type_descriptor.unsupported_reason);
+            }
+            return Status::NotSupported("Unsupported parquet column type for column {}",
+                                        node.name());
+        }
+        column_schema->type = node.is_optional()
+                                      ? make_nullable(remove_nullable(column_schema->type))
+                                      : remove_nullable(column_schema->type);
+        *result = std::move(column_schema);
+        return Status::OK();
+    }
+
+    const auto& group = static_cast<const ::parquet::schema::GroupNode&>(node);
+    if (is_list_node(node) && mode != SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT) {
+        if (mode == SchemaBuildMode::NORMAL && node.is_repeated()) {
+            return Status::NotSupported("Unsupported repeated parquet LIST column {}", node.name());
+        }
+        column_schema->kind = ParquetColumnSchemaKind::LIST;
+        ListElementResolution list_element;
+        RETURN_IF_ERROR(resolve_list_element_node(group, context, &list_element));
+        column_schema->definition_level = list_element.repeated_context.definition_level;
+        column_schema->repetition_level = list_element.repeated_context.repetition_level;
+        column_schema->repeated_repetition_level =
+                list_element.repeated_context.repeated_repetition_level;
+        std::unique_ptr<ParquetColumnSchema> child;
+        RETURN_IF_ERROR(build_node_schema_with_mode(schema, *list_element.element_node,
+                                                    list_element.element_context, &child,
+                                                    list_element.element_build_mode));
+        child->name = "element";
+        column_schema->type =
+                nullable_if_needed(std::make_shared<DataTypeArray>(child->type), node);
+        column_schema->children.push_back(std::move(child));
+        propagate_child_levels(column_schema.get());
+        *result = std::move(column_schema);
+        return Status::OK();
+    }
+
+    if (is_map_node(node) && mode != SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT) {
+        if (mode == SchemaBuildMode::NORMAL && node.is_repeated()) {
+            return Status::NotSupported("Unsupported repeated parquet MAP column {}", node.name());
+        }
+        column_schema->kind = ParquetColumnSchemaKind::MAP;
+        MapEntryResolution map_entry;
+        RETURN_IF_ERROR(resolve_map_entry_group(group, context, &map_entry));
+        column_schema->definition_level = map_entry.entry_context.definition_level;
+        column_schema->repetition_level = map_entry.entry_context.repetition_level;
+        column_schema->repeated_repetition_level =
+                map_entry.entry_context.repeated_repetition_level;
+        for (int child_idx = 0; child_idx < map_entry.entry_group->field_count(); ++child_idx) {
+            std::unique_ptr<ParquetColumnSchema> child;
+            RETURN_IF_ERROR(build_node_schema_with_mode(
+                    schema, *map_entry.entry_group->field(child_idx),
+                    child_context(map_entry.entry_context, *map_entry.entry_group->field(child_idx),
+                                  child_idx),
+                    &child, SchemaBuildMode::NORMAL));
+            child->name = child_idx == 0 ? "key" : "value";
+            column_schema->children.push_back(std::move(child));
+        }
+        if (column_schema->children.size() != 2) {
+            return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}",
+                                        node.name());
+        }
+        auto key_type = make_nullable(column_schema->children[0]->type);
+        auto value_type = make_nullable(column_schema->children[1]->type);
+        column_schema->type =
+                nullable_if_needed(std::make_shared<DataTypeMap>(key_type, value_type), node);
+        propagate_child_levels(column_schema.get());
+        *result = std::move(column_schema);
+        return Status::OK();
+    }
+
+    column_schema->kind = ParquetColumnSchemaKind::STRUCT;
+    DataTypes child_types;
+    Strings child_names;
+    child_types.reserve(group.field_count());
+    child_names.reserve(group.field_count());
+    for (int child_idx = 0; child_idx < group.field_count(); ++child_idx) {
+        const auto& child_node = *group.field(child_idx);
+        std::unique_ptr<ParquetColumnSchema> child;
+        const auto child_ctx = child_context(context, child_node, child_idx);
+        if (should_build_repeated_field_as_list(child_node)) {
+            RETURN_IF_ERROR(
+                    build_repeated_field_as_list_schema(schema, child_node, child_ctx, &child));
+        } else {
+            RETURN_IF_ERROR(build_node_schema_with_mode(schema, child_node, child_ctx, &child,
+                                                        SchemaBuildMode::NORMAL));
+        }
+        child_types.push_back(make_nullable(child->type));
+        child_names.push_back(child->name);
+        column_schema->children.push_back(std::move(child));
+    }
+    column_schema->type =
+            nullable_if_needed(std::make_shared<DataTypeStruct>(child_types, child_names), node);
+    propagate_child_levels(column_schema.get());
+    *result = std::move(column_schema);
+    return Status::OK();
+}
+
+Status build_node_schema(const ::parquet::SchemaDescriptor& schema,
+                         const ::parquet::schema::Node& node, const SchemaBuildContext& context,
+                         std::unique_ptr<ParquetColumnSchema>* result) {
+    return build_node_schema_with_mode(schema, node, context, result, SchemaBuildMode::NORMAL);
+}
+
+} // namespace
+
+Status build_parquet_column_schema(const ::parquet::SchemaDescriptor& schema,
+                                   std::vector<std::unique_ptr<ParquetColumnSchema>>* fields) {
+    if (fields == nullptr) {
+        return Status::InvalidArgument("fields is null");
+    }
+    fields->clear();
+    const auto* root = schema.group_node();
+    if (root == nullptr) {
+        return Status::InvalidArgument("Parquet schema root is null");
+    }
+    fields->reserve(root->field_count());
+    for (int field_idx = 0; field_idx < root->field_count(); ++field_idx) {
+        std::unique_ptr<ParquetColumnSchema> field;
+        SchemaBuildContext context;
+        RETURN_IF_ERROR(build_node_schema(
+                schema, *root->field(field_idx),
+                child_context(context, *root->field(field_idx), field_idx), &field));
+        fields->push_back(std::move(field));
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_column_schema.h b/be/src/format_v2/parquet/parquet_column_schema.h
new file mode 100644
index 00000000000000..637930fb498ecd
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_column_schema.h
@@ -0,0 +1,119 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "format_v2/parquet/parquet_type.h"
+
+namespace parquet {
+class ColumnDescriptor;
+class SchemaDescriptor;
+} // namespace parquet
+
+namespace doris::format::parquet {
+
+// Schema 节点类型枚举，决定 ParquetColumnReaderFactory 创建哪种 Reader。
+enum class ParquetColumnSchemaKind {
+    PRIMITIVE, // 基本类型叶子 → ScalarColumnReader
+    STRUCT,    // 结构体 → StructColumnReader
+    LIST,      // 数组 → ListColumnReader
+    MAP,       // 字典 → MapColumnReader
+};
+
+// ============================================================================
+// Parquet 文件的 file-local schema 树 — build_parquet_column_schema() 的输出
+// ============================================================================
+//
+// 该树描述 Parquet 逻辑字段及其到物理 leaf column 的映射，将 Arrow 的物理 schema
+// （含 wrapper groups、Dremel levels）转换为 Doris reader 可以直接消费的语义化 schema。
+//
+// 关键设计决策：
+// - LIST/MAP 的物理 wrapper group 在构建时被折叠，children 直接是 [element] 或 [key, value]。
+//   wrapper 的 repeated 属性转为父节点的 level 字段，reader 通过 levels 重建嵌套结构。
+// - 所有类型统一 nullable（Doris external table 的策略：外部数据不可信）。
+// - Dremel levels 在 child_context() 中逐层累加，复杂 reader 用它们从 leaf 的 level 流中
+//   重建嵌套容器（offsets + null_map）。
+// ============================================================================
+struct ParquetColumnSchema {
+    // ======== 标识 ========
+
+    // 在父节点 children 中的序号。顶层字段使用 root field ordinal。
+    // ParquetColumnReaderFactory 通过 LocalColumnIndex 携带的 local_id 路径递归定位 reader。
+    int local_id = -1;
+
+    // Parquet 序列化 schema 中的 field_id attribute（-1 表示文件未定义）。
+    // 仅用于 Iceberg 等 table format 的 schema matching 标识，不用于 reader 寻址。
+    int parquet_field_id = -1;
+
+    std::string name;
+
+    // ======== 类型 ========
+
+    // Doris DataType。复杂类型的 children 已递归 nullable。
+    DataTypePtr type = nullptr;
+
+    // Parquet 物理 leaf column 序号。
+    // PRIMITIVE 节点才有有效值，用于访问 ColumnDescriptor、RecordReader、ColumnChunk、Statistics。
+    // 复杂类型节点本身不是物理列，值为 -1。
+    int leaf_column_id = -1;
+
+    // 从 leaf ColumnDescriptor 解析出的类型编码信息。仅 PRIMITIVE 节点有效。
+    ParquetTypeDescriptor type_descriptor {};
+
+    ParquetColumnSchemaKind kind = ParquetColumnSchemaKind::PRIMITIVE;
+
+    // Arrow ColumnDescriptor 指针。仅 PRIMITIVE 节点有效，复杂节点为 nullptr。
+    const ::parquet::ColumnDescriptor* descriptor = nullptr;
+
+    // ======== Dremel Levels ========
+
+    // 该子树中的最大 def/rep level。PRIMITIVE 从 ColumnDescriptor 获取，复杂节点从 children 上报。
+    int16_t max_definition_level = 0;
+    int16_t max_repetition_level = 0;
+
+    // 使本节点自身变为 nullable 的 def level 阈值。
+    // 复杂 reader 用此值区分"我的值是 NULL"（def < threshold）和"我有值但内容为空"（def >= threshold）。
+    int16_t nullable_definition_level = 0;
+
+    // 从 root 到本节点的累计 def/rep level。在 child_context() 中逐层 +1。
+    int16_t definition_level = 0;
+    int16_t repetition_level = 0;
+
+    // 最近 repeated 祖先的 def level。
+    int16_t repeated_ancestor_definition_level = 0;
+
+    // 最近 repeated 祖先的 rep level。
+    // LIST/MAP reader 用此值从孩子 rep level 流中判断"新元素开始"（rep_level >= threshold）。
+    int16_t repeated_repetition_level = 0;
+
+    // ======== 子树 ========
+    // LIST: [element]，MAP: [key, value]，STRUCT: 直接孩子，PRIMITIVE: 空
+    std::vector<std::unique_ptr<ParquetColumnSchema>> children {};
+};
+
+// 从 Arrow Parquet SchemaDescriptor 构造 file-local schema tree。
+// 这是 init() 阶段最重要的转换：物理 schema（含 wrapper groups）→ 语义化 schema（wrapper 已折叠）。
+Status build_parquet_column_schema(const ::parquet::SchemaDescriptor& schema,
+                                   std::vector<std::unique_ptr<ParquetColumnSchema>>* fields);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_file_context.cpp b/be/src/format_v2/parquet/parquet_file_context.cpp
new file mode 100644
index 00000000000000..60e48c947bd3e2
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_file_context.cpp
@@ -0,0 +1,168 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_file_context.h"
+
+#include <arrow/buffer.h>
+#include <arrow/result.h>
+#include <parquet/exception.h>
+
+#include <exception>
+#include <utility>
+
+#include "io/fs/file_reader.h"
+#include "util/slice.h"
+
+namespace doris::format::parquet {
+namespace {
+
+// 将 Doris 的 io::FileReader 适配为 Arrow 的 RandomAccessFile 接口。
+//
+// ParquetFileReader::Open() 要求一个 Arrow::RandomAccessFile，
+// 本适配器将 Doris 的 read_at() / size() 等接口映射为 Arrow 的 ReadAt() / GetSize()。
+// Seek() 和 Tell() 维护了内部的 position 游标用于顺序 read() 操作。
+class DorisRandomAccessFile final : public arrow::io::RandomAccessFile {
+public:
+    DorisRandomAccessFile(io::FileReaderSPtr file_reader, io::IOContext* io_ctx)
+            : _file_reader(std::move(file_reader)), _io_ctx(io_ctx) {
+        set_mode(arrow::io::FileMode::READ);
+    }
+
+    arrow::Status Close() override {
+        _closed = true;
+        return arrow::Status::OK();
+    }
+
+    bool closed() const override { return _closed; }
+
+    arrow::Result<int64_t> Tell() const override { return _pos; }
+
+    arrow::Status Seek(int64_t position) override {
+        if (position < 0) {
+            return arrow::Status::Invalid("negative seek position");
+        }
+        _pos = position;
+        return arrow::Status::OK();
+    }
+
+    arrow::Result<int64_t> GetSize() override {
+        if (!_file_reader) {
+            return arrow::Status::IOError("Doris file reader is not open");
+        }
+        return static_cast<int64_t>(_file_reader->size());
+    }
+
+    arrow::Result<int64_t> Read(int64_t nbytes, void* out) override {
+        ARROW_ASSIGN_OR_RAISE(auto bytes_read, ReadAt(_pos, nbytes, out));
+        _pos += bytes_read;
+        return bytes_read;
+    }
+
+    arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override {
+        ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes));
+        ARROW_ASSIGN_OR_RAISE(auto bytes_read, Read(nbytes, buffer->mutable_data()));
+        ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+        buffer->ZeroPadding();
+        return buffer;
+    }
+
+    arrow::Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override {
+        if (!_file_reader) {
+            return arrow::Status::IOError("Doris file reader is not open");
+        }
+        if (position < 0 || nbytes < 0) {
+            return arrow::Status::Invalid("negative read position or length");
+        }
+        size_t bytes_read = 0;
+        Status st = _file_reader->read_at(
+                static_cast<size_t>(position),
+                Slice(static_cast<uint8_t*>(out), static_cast<size_t>(nbytes)), &bytes_read,
+                _io_ctx);
+        if (!st.ok()) {
+            return arrow::Status::IOError(st.to_string_no_stack());
+        }
+        return static_cast<int64_t>(bytes_read);
+    }
+
+    arrow::Result<std::shared_ptr<arrow::Buffer>> ReadAt(int64_t position,
+                                                         int64_t nbytes) override {
+        ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes));
+        ARROW_ASSIGN_OR_RAISE(auto bytes_read, ReadAt(position, nbytes, buffer->mutable_data()));
+        ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+        buffer->ZeroPadding();
+        return buffer;
+    }
+
+private:
+    io::FileReaderSPtr _file_reader;
+    io::IOContext* _io_ctx = nullptr;
+    int64_t _pos = 0;
+    bool _closed = false;
+};
+
+} // namespace
+
+Status arrow_status_to_doris_status(const arrow::Status& status) {
+    if (status.ok()) {
+        return Status::OK();
+    }
+    if (status.IsIOError()) {
+        return Status::IOError(status.ToString());
+    }
+    if (status.IsInvalid()) {
+        return Status::InvalidArgument(status.ToString());
+    }
+    return Status::InternalError(status.ToString());
+}
+
+Status ParquetFileContext::open(io::FileReaderSPtr input_file_reader, io::IOContext* io_ctx) {
+    arrow_file = std::make_shared<DorisRandomAccessFile>(std::move(input_file_reader), io_ctx);
+    try {
+        // TODO: Cache parquet metadata in file system layer to avoid repeated metadata read for same file.
+        this->file_reader = ::parquet::ParquetFileReader::Open(
+                arrow_file, ::parquet::default_reader_properties());
+        metadata = this->file_reader->metadata();
+        schema = metadata != nullptr ? metadata->schema() : nullptr;
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to open parquet file: {}", e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to open parquet file: {}", e.what());
+    }
+
+    if (metadata == nullptr || schema == nullptr) {
+        return Status::Corruption("Failed to read parquet metadata");
+    }
+    return Status::OK();
+}
+
+Status ParquetFileContext::close() {
+    if (file_reader != nullptr) {
+        try {
+            file_reader->Close();
+        } catch (const std::exception&) {
+            // close 需要保持幂等；这里不覆盖此前 scan 路径上的真实错误。
+        }
+    }
+    if (arrow_file != nullptr) {
+        static_cast<void>(arrow_status_to_doris_status(arrow_file->Close()));
+    }
+    file_reader.reset();
+    arrow_file.reset();
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_file_context.h b/be/src/format_v2/parquet/parquet_file_context.h
new file mode 100644
index 00000000000000..cd8f9a0b962bbf
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_file_context.h
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <arrow/io/interfaces.h>
+#include <parquet/api/reader.h>
+
+#include <memory>
+
+#include "common/status.h"
+#include "io/fs/file_reader.h"
+
+namespace doris::format::parquet {
+
+// Parquet 文件上下文 — 管理 Arrow 层文件对象和元数据的生命周期。
+//
+// 该类是 Doris 与 Arrow Parquet C++ library 的边界：
+// - open():  将 Doris 的 io::FileReader 包装为 Arrow::RandomAccessFile，
+//            然后用 Arrow 的 ParquetFileReader::Open() 解析 footer。
+// - close(): 释放 Arrow 持有的文件句柄和 reader 资源。
+//
+// metadata 和 schema 在 open() 后可用，供 build_parquet_column_schema()、
+// plan_parquet_row_groups() 等使用。ParquetColumnReaderFactory 通过
+// file_reader->RowGroup(idx) 按需打开 RowGroupReader。
+struct ParquetFileContext {
+    std::shared_ptr<arrow::io::RandomAccessFile> arrow_file;   // Doris FileReader 的 Arrow 包装
+    std::unique_ptr<::parquet::ParquetFileReader> file_reader; // Arrow Parquet 文件解析器
+    std::shared_ptr<::parquet::FileMetaData> metadata;         // Footer metadata (RowGroup 信息)
+    const ::parquet::SchemaDescriptor* schema = nullptr;       // 物理 leaf column schema
+
+    Status open(io::FileReaderSPtr input_file_reader, io::IOContext* io_ctx);
+    Status close();
+};
+
+Status arrow_status_to_doris_status(const arrow::Status& status);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_profile.cpp b/be/src/format_v2/parquet/parquet_profile.cpp
new file mode 100644
index 00000000000000..79f979ea0cf1b8
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_profile.cpp
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_profile.h"
+
+#include "format_v2/parquet/parquet_statistics.h"
+
+namespace doris::format::parquet {
+
+void ParquetProfile::init(RuntimeProfile* profile) {
+    if (profile == nullptr) {
+        return;
+    }
+
+    static const char* parquet_profile = "ParquetReader";
+    ADD_TIMER_WITH_LEVEL(profile, parquet_profile, 1);
+
+    filtered_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsFiltered", TUnit::UNIT,
+                                                       parquet_profile, 1);
+    filtered_row_groups_by_min_max = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "RowGroupsFilteredByMinMax", TUnit::UNIT, parquet_profile, 1);
+    filtered_row_groups_by_dictionary = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "RowGroupsFilteredByDictionary", TUnit::UNIT, parquet_profile, 1);
+    filtered_row_groups_by_bloom_filter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "RowGroupsFilteredByBloomFilter", TUnit::UNIT, parquet_profile, 1);
+    to_read_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsReadNum", TUnit::UNIT,
+                                                      parquet_profile, 1);
+    total_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsTotalNum", TUnit::UNIT,
+                                                    parquet_profile, 1);
+    selected_row_ranges = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRowRanges", TUnit::UNIT,
+                                                       parquet_profile, 1);
+    filtered_group_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByGroup", TUnit::UNIT,
+                                                       parquet_profile, 1);
+    filtered_page_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByPage", TUnit::UNIT,
+                                                      parquet_profile, 1);
+    pages_skipped_by_data_page_filter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PagesSkippedByDataPageFilter", TUnit::UNIT, parquet_profile, 1);
+    data_page_filter_skip_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DataPageFilterSkipBytes",
+                                                               TUnit::BYTES, parquet_profile, 1);
+    selected_rows =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRows", TUnit::UNIT, parquet_profile, 1);
+    rows_filtered_by_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowsFilteredByConjunct",
+                                                             TUnit::UNIT, parquet_profile, 1);
+    total_batches =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "TotalBatches", TUnit::UNIT, parquet_profile, 1);
+    empty_selection_batches = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "EmptySelectionBatches",
+                                                           TUnit::UNIT, parquet_profile, 1);
+    range_gap_skipped_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RangeGapSkippedRows",
+                                                          TUnit::UNIT, parquet_profile, 1);
+    reader_read_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderReadRows", TUnit::UNIT,
+                                                    parquet_profile, 1);
+    reader_skip_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSkipRows", TUnit::UNIT,
+                                                    parquet_profile, 1);
+    reader_select_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSelectRows", TUnit::UNIT,
+                                                      parquet_profile, 1);
+    arrow_read_records_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "ArrowReadRecordsTime", parquet_profile, 1);
+    materialization_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "MaterializationTime", parquet_profile, 1);
+    lazy_read_filtered_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByLazyRead",
+                                                           TUnit::UNIT, parquet_profile, 1);
+    filtered_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredBytes", TUnit::BYTES,
+                                                  parquet_profile, 1);
+    raw_rows_read =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RawRowsRead", TUnit::UNIT, parquet_profile, 1);
+    column_read_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ColumnReadTime", parquet_profile, 1);
+    parse_meta_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseMetaTime", parquet_profile, 1);
+    parse_footer_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseFooterTime", parquet_profile, 1);
+    file_reader_create_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "FileReaderCreateTime", parquet_profile, 1);
+    open_file_num =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FileNum", TUnit::UNIT, parquet_profile, 1);
+    page_index_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "PageIndexReadCalls", TUnit::UNIT, 1);
+    page_index_filter_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexFilterTime", parquet_profile, 1);
+    read_page_index_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexReadTime", parquet_profile, 1);
+    parse_page_index_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexParseTime", parquet_profile, 1);
+    row_group_filter_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "RowGroupFilterTime", parquet_profile, 1);
+    file_footer_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterReadCalls", TUnit::UNIT, 1);
+    file_footer_hit_cache = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterHitCache", TUnit::UNIT, 1);
+    decompress_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecompressTime", parquet_profile, 1);
+    decompress_cnt = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DecompressCount", TUnit::UNIT,
+                                                  parquet_profile, 1);
+    page_read_counter =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageReadCount", TUnit::UNIT, parquet_profile, 1);
+    page_cache_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheWriteCount",
+                                                            TUnit::UNIT, parquet_profile, 1);
+    page_cache_compressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PageCacheCompressedWriteCount", TUnit::UNIT, parquet_profile, 1);
+    page_cache_decompressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PageCacheDecompressedWriteCount", TUnit::UNIT, parquet_profile, 1);
+    page_cache_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheHitCount", TUnit::UNIT,
+                                                          parquet_profile, 1);
+    page_cache_missing_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheMissingCount",
+                                                              TUnit::UNIT, parquet_profile, 1);
+    page_cache_compressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PageCacheCompressedHitCount", TUnit::UNIT, parquet_profile, 1);
+    page_cache_decompressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PageCacheDecompressedHitCount", TUnit::UNIT, parquet_profile, 1);
+    decode_header_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderDecodeTime", parquet_profile, 1);
+    read_page_header_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderReadTime", parquet_profile, 1);
+    decode_value_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeValueTime", parquet_profile, 1);
+    decode_dict_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeDictTime", parquet_profile, 1);
+    decode_level_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeLevelTime", parquet_profile, 1);
+    decode_null_map_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeNullMapTime", parquet_profile, 1);
+    skip_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SkipPageHeaderNum", TUnit::UNIT,
+                                                        parquet_profile, 1);
+    parse_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ParsePageHeaderNum", TUnit::UNIT,
+                                                         parquet_profile, 1);
+    predicate_filter_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PredicateFilterTime", parquet_profile, 1);
+    dict_filter_rewrite_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "DictFilterRewriteTime", parquet_profile, 1);
+    convert_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ConvertTime", parquet_profile, 1);
+    bloom_filter_read_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "BloomFilterReadTime", parquet_profile, 1);
+}
+
+void ParquetProfile::update_pruning_stats(const ParquetPruningStats& pruning_stats) const {
+    COUNTER_UPDATE(filtered_row_groups,
+                   pruning_stats.total_row_groups - pruning_stats.selected_row_groups);
+    COUNTER_UPDATE(filtered_row_groups_by_min_max, pruning_stats.filtered_row_groups_by_statistics);
+    COUNTER_UPDATE(filtered_row_groups_by_dictionary,
+                   pruning_stats.filtered_row_groups_by_dictionary);
+    COUNTER_UPDATE(filtered_row_groups_by_bloom_filter,
+                   pruning_stats.filtered_row_groups_by_bloom_filter);
+    COUNTER_UPDATE(to_read_row_groups, pruning_stats.selected_row_groups);
+    COUNTER_UPDATE(total_row_groups, pruning_stats.total_row_groups);
+    COUNTER_UPDATE(selected_row_ranges, pruning_stats.selected_row_ranges);
+    COUNTER_UPDATE(filtered_group_rows, pruning_stats.filtered_group_rows);
+    COUNTER_UPDATE(filtered_page_rows, pruning_stats.filtered_page_rows);
+    COUNTER_UPDATE(page_index_read_calls, pruning_stats.page_index_read_calls);
+    COUNTER_UPDATE(bloom_filter_read_time, pruning_stats.bloom_filter_read_time);
+    COUNTER_UPDATE(row_group_filter_time, pruning_stats.row_group_filter_time);
+    COUNTER_UPDATE(page_index_filter_time, pruning_stats.page_index_filter_time);
+    COUNTER_UPDATE(read_page_index_time, pruning_stats.read_page_index_time);
+}
+
+ParquetPageSkipProfile ParquetProfile::page_skip_profile() const {
+    return {
+            .skipped_pages = pages_skipped_by_data_page_filter,
+            .skipped_bytes = data_page_filter_skip_bytes,
+    };
+}
+
+ParquetColumnReaderProfile ParquetProfile::column_reader_profile() const {
+    return {
+            .reader_read_rows = reader_read_rows,
+            .reader_skip_rows = reader_skip_rows,
+            .reader_select_rows = reader_select_rows,
+            .arrow_read_records_time = arrow_read_records_time,
+            .materialization_time = materialization_time,
+    };
+}
+
+ParquetScanProfile ParquetProfile::scan_profile() const {
+    return {
+            .raw_rows_read = raw_rows_read,
+            .selected_rows = selected_rows,
+            .rows_filtered_by_conjunct = rows_filtered_by_conjunct,
+            .lazy_read_filtered_rows = lazy_read_filtered_rows,
+            .total_batches = total_batches,
+            .empty_selection_batches = empty_selection_batches,
+            .range_gap_skipped_rows = range_gap_skipped_rows,
+            .column_read_time = column_read_time,
+            .predicate_filter_time = predicate_filter_time,
+            .column_reader_profile = column_reader_profile(),
+    };
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_profile.h b/be/src/format_v2/parquet/parquet_profile.h
new file mode 100644
index 00000000000000..26e6d01d943fd0
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_profile.h
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "runtime/runtime_profile.h"
+
+namespace doris::format::parquet {
+
+struct ParquetPruningStats;
+
+// ============================================================================
+// Page Skip Profile — data page 级跳过统计
+// ============================================================================
+struct ParquetPageSkipProfile {
+    RuntimeProfile::Counter* skipped_pages = nullptr; // 被 page index 跳过的 data page 数
+    RuntimeProfile::Counter* skipped_bytes = nullptr; // 被跳过的压缩字节数
+};
+
+// ============================================================================
+// Column Reader Profile — 列读取统计
+// ============================================================================
+struct ParquetColumnReaderProfile {
+    RuntimeProfile::Counter* reader_read_rows = nullptr;        // read() 读取的行数
+    RuntimeProfile::Counter* reader_skip_rows = nullptr;        // skip() 跳过的行数
+    RuntimeProfile::Counter* reader_select_rows = nullptr;      // select() 选中的行数
+    RuntimeProfile::Counter* arrow_read_records_time = nullptr; // Arrow RecordReader 耗时 (ns)
+    RuntimeProfile::Counter* materialization_time = nullptr;    // 值物化耗时 (ns)
+};
+
+// ============================================================================
+// Scan Profile — 扫描调度统计（每个 batch 的粒度）
+// ============================================================================
+struct ParquetScanProfile {
+    RuntimeProfile::Counter* raw_rows_read = nullptr; // 从 RecordReader 读取的原始行数
+    RuntimeProfile::Counter* selected_rows = nullptr; // conjuncts 过滤后选中的行数
+    RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr; // 被 conjuncts 过滤掉的行数
+    RuntimeProfile::Counter* lazy_read_filtered_rows =
+            nullptr; // 因 late materialization 减少读取的行数
+    RuntimeProfile::Counter* total_batches = nullptr;           // 总批次数
+    RuntimeProfile::Counter* empty_selection_batches = nullptr; // 全过滤的空批次数
+    RuntimeProfile::Counter* range_gap_skipped_rows = nullptr;  // range gap 跳过的行数
+    RuntimeProfile::Counter* column_read_time = nullptr;        // 列读取耗时 (ns)
+    RuntimeProfile::Counter* predicate_filter_time = nullptr;   // predicate 过滤耗时 (ns)
+    ParquetColumnReaderProfile column_reader_profile;           // 嵌套的列读取统计
+};
+
+// ============================================================================
+// Parquet Profile — 统一的 RuntimeProfile Counter 集合
+// ============================================================================
+//
+// 管理 new Parquet reader 暴露的所有 RuntimeProfile Counter。
+// 通过 page_skip_profile() / column_reader_profile() / scan_profile() 方法
+// 将整体的 Counter 集合拆分为不同模块需要的窄视图。
+// ============================================================================
+struct ParquetProfile {
+    void init(RuntimeProfile* profile);
+    void update_pruning_stats(const ParquetPruningStats& pruning_stats) const;
+
+    // 构建各模块的窄视图（只是指针透传，不创建新 Counter）
+    ParquetPageSkipProfile page_skip_profile() const;
+    ParquetColumnReaderProfile column_reader_profile() const;
+    ParquetScanProfile scan_profile() const;
+
+    // ======== RowGroup 裁剪 ========
+    RuntimeProfile::Counter* filtered_row_groups = nullptr;
+    RuntimeProfile::Counter* filtered_row_groups_by_min_max = nullptr;
+    RuntimeProfile::Counter* filtered_row_groups_by_dictionary = nullptr;
+    RuntimeProfile::Counter* filtered_row_groups_by_bloom_filter = nullptr;
+    RuntimeProfile::Counter* to_read_row_groups = nullptr;
+    RuntimeProfile::Counter* total_row_groups = nullptr;
+    RuntimeProfile::Counter* selected_row_ranges = nullptr;
+    RuntimeProfile::Counter* filtered_group_rows = nullptr;
+    RuntimeProfile::Counter* filtered_page_rows = nullptr;
+
+    // ======== Page Skip ========
+    RuntimeProfile::Counter* pages_skipped_by_data_page_filter = nullptr;
+    RuntimeProfile::Counter* data_page_filter_skip_bytes = nullptr;
+
+    // ======== Batch 读取 ========
+    RuntimeProfile::Counter* selected_rows = nullptr;
+    RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr;
+    RuntimeProfile::Counter* total_batches = nullptr;
+    RuntimeProfile::Counter* empty_selection_batches = nullptr;
+    RuntimeProfile::Counter* range_gap_skipped_rows = nullptr;
+
+    // ======== Column Reader ========
+    RuntimeProfile::Counter* reader_read_rows = nullptr;
+    RuntimeProfile::Counter* reader_skip_rows = nullptr;
+    RuntimeProfile::Counter* reader_select_rows = nullptr;
+    RuntimeProfile::Counter* arrow_read_records_time = nullptr;
+    RuntimeProfile::Counter* materialization_time = nullptr;
+
+    // ======== 延迟读取 ========
+    RuntimeProfile::Counter* lazy_read_filtered_rows = nullptr;
+    RuntimeProfile::Counter* filtered_bytes = nullptr;
+    RuntimeProfile::Counter* raw_rows_read = nullptr;
+    RuntimeProfile::Counter* column_read_time = nullptr;
+
+    // ======== 文件操作 ========
+    RuntimeProfile::Counter* parse_meta_time = nullptr;
+    RuntimeProfile::Counter* parse_footer_time = nullptr;
+    RuntimeProfile::Counter* file_reader_create_time = nullptr;
+    RuntimeProfile::Counter* open_file_num = nullptr;
+    RuntimeProfile::Counter* file_footer_read_calls = nullptr;
+    RuntimeProfile::Counter* file_footer_hit_cache = nullptr;
+
+    // ======== 裁剪耗时 ========
+    RuntimeProfile::Counter* row_group_filter_time = nullptr;
+    RuntimeProfile::Counter* page_index_read_calls = nullptr;
+    RuntimeProfile::Counter* page_index_filter_time = nullptr;
+    RuntimeProfile::Counter* read_page_index_time = nullptr;
+    RuntimeProfile::Counter* parse_page_index_time = nullptr;
+
+    // ======== 解压 & Page Cache ========
+    RuntimeProfile::Counter* decompress_time = nullptr;
+    RuntimeProfile::Counter* decompress_cnt = nullptr;
+    RuntimeProfile::Counter* page_read_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_write_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_compressed_write_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_decompressed_write_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_hit_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_missing_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_compressed_hit_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_decompressed_hit_counter = nullptr;
+
+    // ======== 解码 ========
+    RuntimeProfile::Counter* decode_header_time = nullptr;
+    RuntimeProfile::Counter* read_page_header_time = nullptr;
+    RuntimeProfile::Counter* decode_value_time = nullptr;
+    RuntimeProfile::Counter* decode_dict_time = nullptr;
+    RuntimeProfile::Counter* decode_level_time = nullptr;
+    RuntimeProfile::Counter* decode_null_map_time = nullptr;
+    RuntimeProfile::Counter* skip_page_header_num = nullptr;
+    RuntimeProfile::Counter* parse_page_header_num = nullptr;
+
+    // ======== 其他 ========
+    RuntimeProfile::Counter* predicate_filter_time = nullptr;
+    RuntimeProfile::Counter* dict_filter_rewrite_time = nullptr;
+    RuntimeProfile::Counter* convert_time = nullptr;
+    RuntimeProfile::Counter* bloom_filter_read_time = nullptr;
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_reader.cpp b/be/src/format_v2/parquet/parquet_reader.cpp
new file mode 100644
index 00000000000000..a0ac9fbfdd799f
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_reader.cpp
@@ -0,0 +1,444 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_reader.h"
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <optional>
+#include <ranges>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_file_context.h"
+#include "format_v2/parquet/parquet_scan.h"
+#include "format_v2/parquet/parquet_statistics.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format::parquet {
+
+struct ParquetReaderScanState {
+    ParquetFileContext file_context;
+    std::vector<std::unique_ptr<ParquetColumnSchema>> file_schema;
+    RowGroupScanPlan scan_plan;
+    ParquetScanScheduler scheduler;
+    const cctz::time_zone* timezone = nullptr;
+    bool enable_bloom_filter = false;
+};
+
+DataTypePtr nullable_like_original(const DataTypePtr& type, DataTypePtr nested_type) {
+    return type != nullptr && type->is_nullable() ? make_nullable(nested_type) : nested_type;
+}
+
+int timestamp_tz_scale(const ParquetTypeDescriptor& type_descriptor) {
+    switch (type_descriptor.time_unit) {
+    case ParquetTimeUnit::MILLIS:
+        return 3;
+    case ParquetTimeUnit::MICROS:
+    case ParquetTimeUnit::UNKNOWN:
+    default:
+        return 6;
+    }
+}
+
+bool should_map_to_timestamp_tz(const ParquetColumnSchema& column_schema) {
+    const auto& type_descriptor = column_schema.type_descriptor;
+    return type_descriptor.physical_type == ::parquet::Type::INT96 ||
+           (type_descriptor.is_timestamp && type_descriptor.timestamp_is_adjusted_to_utc);
+}
+
+DataTypePtr apply_timestamp_tz_mapping(ParquetColumnSchema* column_schema) {
+    DORIS_CHECK(column_schema != nullptr);
+    if (column_schema->kind == ParquetColumnSchemaKind::PRIMITIVE) {
+        if (should_map_to_timestamp_tz(*column_schema)) {
+            const bool nullable =
+                    column_schema->type != nullptr && column_schema->type->is_nullable();
+            const auto scale = timestamp_tz_scale(column_schema->type_descriptor);
+            column_schema->type = DataTypeFactory::instance().create_data_type(TYPE_TIMESTAMPTZ,
+                                                                               nullable, 0, scale);
+            column_schema->type_descriptor.doris_type = column_schema->type;
+        }
+        return column_schema->type;
+    }
+
+    std::vector<DataTypePtr> child_types;
+    child_types.reserve(column_schema->children.size());
+    for (auto& child : column_schema->children) {
+        child_types.push_back(apply_timestamp_tz_mapping(child.get()));
+    }
+
+    if (column_schema->kind == ParquetColumnSchemaKind::LIST) {
+        DORIS_CHECK(child_types.size() == 1);
+        column_schema->type = nullable_like_original(
+                column_schema->type, std::make_shared<DataTypeArray>(child_types[0]));
+    } else if (column_schema->kind == ParquetColumnSchemaKind::MAP) {
+        DORIS_CHECK(child_types.size() == 2);
+        column_schema->type = nullable_like_original(
+                column_schema->type, std::make_shared<DataTypeMap>(make_nullable(child_types[0]),
+                                                                   make_nullable(child_types[1])));
+    } else if (column_schema->kind == ParquetColumnSchemaKind::STRUCT) {
+        Strings child_names;
+        child_names.reserve(column_schema->children.size());
+        for (const auto& child : column_schema->children) {
+            child_names.push_back(child->name);
+        }
+        column_schema->type = nullable_like_original(
+                column_schema->type, std::make_shared<DataTypeStruct>(child_types, child_names));
+    }
+    return column_schema->type;
+}
+
+static Status find_projected_minmax_leaf(const ParquetColumnSchema& column_schema,
+                                         const format::LocalColumnIndex& projection,
+                                         const ParquetColumnSchema** leaf_schema) {
+    DORIS_CHECK(leaf_schema != nullptr);
+    if (projection.project_all_children || projection.children.empty()) {
+        if (column_schema.leaf_column_id < 0) {
+            return Status::NotSupported(
+                    "Parquet aggregate pushdown only supports primitive column {}",
+                    column_schema.name);
+        }
+        if (column_schema.max_repetition_level > 0) {
+            return Status::NotSupported(
+                    "Parquet aggregate pushdown does not support repeated column {}",
+                    column_schema.name);
+        }
+        *leaf_schema = &column_schema;
+        return Status::OK();
+    }
+    if (projection.children.size() != 1) {
+        return Status::NotSupported(
+                "Parquet aggregate pushdown only supports a single nested leaf under column {}",
+                column_schema.name);
+    }
+    const auto& child_projection = projection.children[0];
+    const auto child_schema_it =
+            std::ranges::find_if(column_schema.children, [&](const auto& child_schema) {
+                return child_schema->local_id == child_projection.local_id();
+            });
+    if (child_schema_it != column_schema.children.end()) {
+        return find_projected_minmax_leaf(**child_schema_it, child_projection, leaf_schema);
+    }
+    return Status::InvalidArgument("Invalid parquet aggregate projection local id {} for column {}",
+                                   child_projection.local_id(), column_schema.name);
+}
+
+void ParquetReader::_fill_column_definition(const ParquetColumnSchema& column_schema,
+                                            format::ColumnDefinition* field) const {
+    if (column_schema.parquet_field_id >= 0) {
+        field->identifier = Field::create_field<TYPE_INT>(column_schema.parquet_field_id);
+    } else {
+        field->identifier = Field::create_field<TYPE_STRING>(column_schema.name);
+    }
+    field->local_id = column_schema.local_id;
+    field->name = column_schema.name;
+    field->type = column_schema.type != nullptr && !column_schema.type->is_nullable()
+                          ? make_nullable(column_schema.type)
+                          : column_schema.type;
+    field->children.clear();
+    field->children.reserve(column_schema.children.size());
+    for (const auto& child : column_schema.children) {
+        format::ColumnDefinition child_field;
+        _fill_column_definition(*child, &child_field);
+        field->children.push_back(std::move(child_field));
+    }
+}
+
+ParquetReader::ParquetReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                             std::unique_ptr<io::FileDescription>& file_description,
+                             std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                             std::optional<format::GlobalRowIdContext> global_rowid_context,
+                             bool enable_mapping_timestamp_tz)
+        : FileReader(system_properties, file_description, io_ctx, profile),
+          _global_rowid_context(global_rowid_context),
+          _enable_mapping_timestamp_tz(enable_mapping_timestamp_tz) {}
+
+ParquetReader::~ParquetReader() = default;
+
+Status ParquetReader::init(RuntimeState* state) {
+    RETURN_IF_ERROR(format::FileReader::init(state));
+    if (_profile != nullptr) {
+        COUNTER_UPDATE(_parquet_profile.file_reader_create_time,
+                       _reader_statistics.file_reader_create_time);
+        COUNTER_UPDATE(_parquet_profile.open_file_num, _reader_statistics.open_file_num);
+    }
+    _state = std::make_unique<ParquetReaderScanState>();
+    _state->enable_bloom_filter =
+            state != nullptr && state->query_options().enable_parquet_filter_by_bloom_filter;
+    if (state != nullptr) {
+        _state->timezone = &state->timezone_obj();
+        _state->scheduler.set_timezone(&state->timezone_obj());
+        _state->scheduler.set_enable_strict_mode(state->enable_strict_mode());
+    }
+    // Open parquet file and parse metadata to get file schema.
+    RETURN_IF_ERROR(_state->file_context.open(_tracing_file_reader, _io_ctx.get()));
+    // Build file schema from parquet metadata.
+    // A file reader may expose raw file identifiers, such as Parquet field_id, through ColumnDefinition::identifier
+    RETURN_IF_ERROR(
+            build_parquet_column_schema(*_state->file_context.schema, &_state->file_schema));
+    if (_enable_mapping_timestamp_tz) {
+        for (auto& column_schema : _state->file_schema) {
+            apply_timestamp_tz_mapping(column_schema.get());
+        }
+    }
+    return Status::OK();
+}
+
+Status ParquetReader::get_schema(std::vector<format::ColumnDefinition>* file_schema) const {
+    if (file_schema == nullptr) {
+        return Status::InvalidArgument("file_schema is null");
+    }
+    file_schema->clear();
+    if (_state == nullptr || _state->file_context.schema == nullptr) {
+        return Status::Uninitialized("ParquetReader is not open");
+    }
+
+    file_schema->reserve(_state->file_schema.size());
+    for (size_t column_idx = 0; column_idx < _state->file_schema.size(); ++column_idx) {
+        format::ColumnDefinition field;
+        _fill_column_definition(*_state->file_schema[column_idx], &field);
+        DORIS_CHECK(field.local_id == static_cast<int32_t>(column_idx));
+        file_schema->push_back(std::move(field));
+    }
+    if (_global_rowid_context.has_value()) {
+        file_schema->push_back(format::global_rowid_column_definition());
+    }
+    return Status::OK();
+}
+
+std::unique_ptr<format::TableColumnMapper> ParquetReader::create_column_mapper(
+        format::TableColumnMapperOptions options) const {
+    return std::make_unique<format::ParquetColumnMapper>(std::move(options));
+}
+
+Status ParquetReader::open(std::shared_ptr<format::FileScanRequest> request) {
+    if (_state == nullptr || _state->file_context.metadata == nullptr ||
+        _state->file_context.schema == nullptr) {
+        return Status::Uninitialized("ParquetReader is not open");
+    }
+    auto request_snapshot = request;
+    DORIS_CHECK(request_snapshot != nullptr);
+    RETURN_IF_ERROR(format::FileReader::open(std::move(request)));
+
+    const int num_fields = static_cast<int>(_state->file_schema.size());
+    for (const auto& column_filter : request_snapshot->column_predicate_filters) {
+        const auto file_column_id = column_filter.effective_file_column_id();
+        if (!file_column_id.is_valid() || file_column_id.value() >= num_fields) {
+            return Status::InvalidArgument("Invalid parquet filter top-level local id {}",
+                                           file_column_id.value());
+        }
+    }
+
+    // `local_positions.empty()` means all columns are needed by table reader
+    // TODO(gabriel): It will happen only for TVF `select *` query.
+    if (request_snapshot->local_positions.empty()) {
+        for (const auto& col : request_snapshot->predicate_columns) {
+            request_snapshot->local_positions.emplace(col.column_id(),
+                                                      format::LocalIndex(col.column_id().value()));
+        }
+        for (const auto& col : request_snapshot->non_predicate_columns) {
+            request_snapshot->local_positions.emplace(col.column_id(),
+                                                      format::LocalIndex(col.column_id().value()));
+        }
+    }
+
+    for (const auto& col : request_snapshot->predicate_columns) {
+        DORIS_CHECK(request_snapshot->local_positions.count(col.column_id()) > 0);
+        const auto local_id = col.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID ||
+            local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            continue;
+        }
+        DORIS_CHECK(local_id >= 0 && local_id < num_fields);
+    }
+    for (const auto& col : request_snapshot->non_predicate_columns) {
+        DORIS_CHECK(request_snapshot->local_positions.count(col.column_id()) > 0);
+        const auto local_id = col.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID ||
+            local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            continue;
+        }
+        DORIS_CHECK(local_id >= 0 && local_id < num_fields);
+    }
+
+    RowGroupScanPlan row_group_plan;
+    ParquetScanRange scan_range;
+    scan_range.start_offset = _file_description->range_start_offset;
+    scan_range.size = _file_description->range_size;
+    scan_range.file_size = _file_description->file_size;
+    // Get selected ranges in row groups according to metadata (Row-Group level index and Page Index including Zonemap, Dictionary, Bloom Filter).
+    RETURN_IF_ERROR(plan_parquet_row_groups(
+            *_state->file_context.metadata, _state->file_context.file_reader.get(),
+            _state->file_schema, *request_snapshot, scan_range, _state->enable_bloom_filter,
+            &row_group_plan, _state->timezone));
+    if (_profile != nullptr) {
+        _parquet_profile.update_pruning_stats(row_group_plan.pruning_stats);
+    }
+    _state->scan_plan = row_group_plan;
+    _state->scheduler.set_page_skip_profile(_parquet_profile.page_skip_profile());
+    _state->scheduler.set_global_rowid_context(_global_rowid_context);
+    _state->scheduler.set_scan_profile(_parquet_profile.scan_profile());
+    _state->scheduler.set_plan(std::move(row_group_plan));
+    _eof = _state->scheduler.empty();
+    return Status::OK();
+}
+
+Status ParquetReader::get_block(Block* file_block, size_t* rows, bool* eof) {
+    if (_state == nullptr || _state->file_context.file_reader == nullptr ||
+        _state->file_context.schema == nullptr) {
+        return Status::Uninitialized("ParquetReader is not open");
+    }
+    *rows = 0;
+    if (_eof) {
+        *eof = true;
+        return Status::OK();
+    }
+    auto request_snapshot = _request;
+    if (request_snapshot == nullptr) {
+        return Status::Cancelled("ParquetReader is closed");
+    }
+
+    const auto predicate_filtered_rows_before = _state->scheduler.predicate_filtered_rows();
+    RETURN_IF_ERROR(_state->scheduler.read_next_batch(_state->file_context, _state->file_schema,
+                                                      *request_snapshot, file_block, rows, eof));
+    if (_io_ctx != nullptr) {
+        _io_ctx->predicate_filtered_rows +=
+                _state->scheduler.predicate_filtered_rows() - predicate_filtered_rows_before;
+    }
+    _eof = *eof;
+    return Status::OK();
+}
+
+void ParquetReader::set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) {
+    if (_state == nullptr) {
+        return;
+    }
+    _state->scheduler.set_condition_cache_context(std::move(ctx));
+    if (_io_ctx != nullptr) {
+        // Condition-cache HIT filters row ranges before batch reading, so skipped rows never belong
+        // to a later get_block() batch. Report the plan-level skipped rows at the same point where
+        // the scan plan is rewritten.
+        _io_ctx->condition_cache_filtered_rows += _state->scheduler.condition_cache_filtered_rows();
+    }
+}
+
+int64_t ParquetReader::get_total_rows() const {
+    if (_state == nullptr) {
+        return 0;
+    }
+    int64_t rows = 0;
+    for (const auto& row_group_plan : _state->scan_plan.row_groups) {
+        rows += row_group_plan.row_group_rows;
+    }
+    return rows;
+}
+
+Status ParquetReader::get_aggregate_result(const format::FileAggregateRequest& request,
+                                           format::FileAggregateResult* result) {
+    DORIS_CHECK(result != nullptr);
+    if (_state == nullptr || _state->file_context.metadata == nullptr ||
+        _state->file_context.schema == nullptr) {
+        return Status::Uninitialized("ParquetReader is not open");
+    }
+    result->count = 0;
+    result->columns.clear();
+    if (request.agg_type != TPushAggOp::type::COUNT &&
+        request.agg_type != TPushAggOp::type::MINMAX) {
+        return Status::NotSupported("Unsupported parquet aggregate pushdown type {}",
+                                    request.agg_type);
+    }
+
+    // Aggregate row count in all selected row groups. For MIN/MAX aggregate, this is used to determine whether there is no row group selected.
+    for (const auto& row_group_plan : _state->scan_plan.row_groups) {
+        auto row_group_metadata =
+                _state->file_context.metadata->RowGroup(row_group_plan.row_group_id);
+        DORIS_CHECK(row_group_metadata != nullptr);
+        result->count += row_group_metadata->num_rows();
+    }
+    if (request.agg_type == TPushAggOp::type::COUNT) {
+        return Status::OK();
+    }
+
+    result->columns.resize(request.columns.size());
+    for (size_t request_column_idx = 0; request_column_idx < request.columns.size();
+         ++request_column_idx) {
+        const auto file_column_id = request.columns[request_column_idx].projection.local_id();
+        if (file_column_id < 0 ||
+            file_column_id >= static_cast<int32_t>(_state->file_schema.size())) {
+            return Status::InvalidArgument("Invalid parquet aggregate column id {}",
+                                           file_column_id);
+        }
+        const auto& column_schema = _state->file_schema[file_column_id];
+        DORIS_CHECK(column_schema != nullptr);
+        const ParquetColumnSchema* leaf_schema = nullptr;
+        RETURN_IF_ERROR(find_projected_minmax_leaf(
+                *column_schema, request.columns[request_column_idx].projection, &leaf_schema));
+        DORIS_CHECK(leaf_schema != nullptr);
+
+        auto& aggregate_column = result->columns[request_column_idx];
+        aggregate_column.projection = request.columns[request_column_idx].projection;
+        for (const auto& row_group_plan : _state->scan_plan.row_groups) {
+            auto row_group_metadata =
+                    _state->file_context.metadata->RowGroup(row_group_plan.row_group_id);
+            DORIS_CHECK(row_group_metadata != nullptr);
+            auto column_chunk = row_group_metadata->ColumnChunk(leaf_schema->leaf_column_id);
+            DORIS_CHECK(column_chunk != nullptr);
+            const auto statistics = ParquetStatisticsUtils::TransformColumnStatistics(
+                    *leaf_schema, column_chunk->statistics(), _state->timezone);
+            if (!statistics.has_min_max) {
+                return Status::NotSupported("Missing parquet min/max statistics for column {}",
+                                            leaf_schema->name);
+            }
+            if (!aggregate_column.has_min || statistics.min_value < aggregate_column.min_value) {
+                aggregate_column.min_value = statistics.min_value;
+                aggregate_column.has_min = true;
+            }
+            if (!aggregate_column.has_max || aggregate_column.max_value < statistics.max_value) {
+                aggregate_column.max_value = statistics.max_value;
+                aggregate_column.has_max = true;
+            }
+        }
+        if (!aggregate_column.has_min || !aggregate_column.has_max) {
+            return Status::NotSupported("No parquet row group selected for min/max pushdown");
+        }
+    }
+    return Status::OK();
+}
+
+Status ParquetReader::close() {
+    if (_state != nullptr) {
+        RETURN_IF_ERROR(_state->file_context.close());
+    }
+    return FileReader::close();
+}
+
+void ParquetReader::_init_profile() {
+    _parquet_profile.init(_profile);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_reader.h b/be/src/format_v2/parquet/parquet_reader.h
new file mode 100644
index 00000000000000..8112108e32d207
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_reader.h
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_profile.h"
+
+namespace doris {
+namespace io {
+struct IOContext;
+} // namespace io
+} // namespace doris
+
+namespace doris::format::parquet {
+
+struct ParquetReaderScanState;
+
+// ============================================================================
+// Parquet 文件物理读取层 — FileReader 接口的 Parquet 实现
+// ============================================================================
+//
+// 职责边界：
+//   ✓ 理解 Parquet file-local schema，处理文件打开、元数据解析、批次读取
+//   ✗ 不理解 Iceberg/global schema，不处理 table-level cast/default/generated/partition 列
+//
+// 被 TableReader（HiveReader、IcebergTableReader）通过 FileReader 接口调用。
+// TableReader 负责 schema mapping、predicate localization，生成 FileScanRequest 后
+// 传给 ParquetReader::open()。
+//
+// 生命周期（TableReader 视角）：
+//   init() → get_schema() → open(request) → get_block() [loop] → close()
+//
+// init()     — 打开文件，解析 footer，构建 ParquetColumnSchema 树
+// get_schema() — 暴露 file-local ColumnDefinition[]，供 TableReader 做 schema matching
+// open()     — 接收 FileScanRequest，执行 RowGroup 裁剪（plan_parquet_row_groups），
+//              初始化 ParquetScanScheduler
+// get_block() — 委托 ParquetScanScheduler 逐批读取（late materialization）
+// close()    — 释放 Arrow 文件资源
+// ============================================================================
+class ParquetReader : public format::FileReader {
+public:
+    ParquetReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                  std::unique_ptr<io::FileDescription>& file_description,
+                  std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                  std::optional<format::GlobalRowIdContext> global_rowid_context = std::nullopt,
+                  bool enable_mapping_timestamp_tz = false);
+    ~ParquetReader() override;
+
+    Status init(RuntimeState* state) override;
+
+    Status get_schema(std::vector<format::ColumnDefinition>* file_schema) const override;
+
+    std::unique_ptr<format::TableColumnMapper> create_column_mapper(
+            format::TableColumnMapperOptions options) const override;
+
+    Status open(std::shared_ptr<format::FileScanRequest> request) override;
+
+    // 读取下一批 Parquet file-local block。
+    // 返回的列保持 file-local 语义，不补 default/generated/partition 列。
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override;
+
+    // 聚合下推：从已裁剪选中的 RowGroup statistics 中提取 COUNT / MIN / MAX。
+    Status get_aggregate_result(const format::FileAggregateRequest& request,
+                                format::FileAggregateResult* result) override;
+
+    void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) override;
+
+    int64_t get_total_rows() const override;
+
+    Status close() override;
+
+protected:
+    void _init_profile() override;
+
+private:
+    // 递归将 ParquetColumnSchema 树转换为 ColumnDefinition。
+    // identifier 生成规则：有 parquet_field_id → Field(INT, field_id)，否则 → Field(STRING, name)
+    void _fill_column_definition(const ParquetColumnSchema& column_schema,
+                                 format::ColumnDefinition* field) const;
+
+    std::unique_ptr<ParquetReaderScanState>
+            _state;                  // 全部扫描状态（file_context + schema + scheduler）
+    ParquetProfile _parquet_profile; // RuntimeProfile 计数器集合
+    std::optional<format::GlobalRowIdContext> _global_rowid_context; // 全局 RowId 上下文
+    bool _enable_mapping_timestamp_tz = false; // 是否将 UTC timestamp 映射为 TIMESTAMPTZ
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_scan.cpp b/be/src/format_v2/parquet/parquet_scan.cpp
new file mode 100644
index 00000000000000..5148436fbc49a5
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_scan.cpp
@@ -0,0 +1,671 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_scan.h"
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <utility>
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_vector.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_file_context.h"
+#include "format_v2/parquet/parquet_statistics.h"
+
+namespace doris::format::parquet {
+
+namespace {
+
+int64_t column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? cast_set<int64_t>(column_metadata.dictionary_page_offset())
+                   : cast_set<int64_t>(column_metadata.data_page_offset());
+}
+
+// 判断 RG 是否在 scan_range 的 offset 范围之外。
+//
+// 策略：取 RG 第一个和最后一个 column chunk 的起始 offset 的中点，
+// 如果中点不在 [range_start, range_end) 内则该 RG 不属于当前 split。
+// 特殊处理：当 scan_range 覆盖整个文件（start=0, size>=file_size）时直接返回 false。
+bool is_row_group_outside_range(const ::parquet::FileMetaData& metadata,
+                                const ParquetScanRange& scan_range, int row_group_idx) {
+    // size < 0 表示不限制范围（读整个文件）
+    if (scan_range.size < 0) {
+        return false;
+    }
+    const int64_t range_start_offset = scan_range.start_offset;
+    const int64_t range_end_offset = range_start_offset + scan_range.size;
+    DORIS_CHECK(range_start_offset >= 0);
+    DORIS_CHECK(range_end_offset >= range_start_offset);
+    // 覆盖整个文件 → 不过滤
+    if (range_start_offset == 0 &&
+        (scan_range.file_size < 0 || range_end_offset >= scan_range.file_size)) {
+        return false;
+    }
+
+    auto row_group_metadata = metadata.RowGroup(row_group_idx);
+    DORIS_CHECK(row_group_metadata != nullptr);
+    DORIS_CHECK(row_group_metadata->num_columns() > 0);
+    const auto first_column = row_group_metadata->ColumnChunk(0);
+    const auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1);
+    DORIS_CHECK(first_column != nullptr);
+    DORIS_CHECK(last_column != nullptr);
+    // RG 的 offset 范围 = [第一个 column chunk 起始, 最后一个 column chunk 结束)
+    const int64_t row_group_start_offset = column_start_offset(*first_column);
+    const int64_t row_group_end_offset =
+            column_start_offset(*last_column) + last_column->total_compressed_size();
+    // 用 RGB 的中点判断归属 — 中点在哪个 split 的范围就属于哪个 split
+    const int64_t row_group_mid_offset =
+            row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2;
+    return row_group_mid_offset < range_start_offset || row_group_mid_offset >= range_end_offset;
+}
+
+} // namespace
+
+// 最外层裁剪入口：三级流水线（代价从低到高）→ 输出 RowGroupScanPlan。
+//
+// 1. 计算 first_file_row + 过滤 scan_range 外的 RG — O(1) 算术（is_row_group_outside_range）
+// 2. select_row_groups_by_statistics() — RG 级裁剪 (min/max + dictionary + bloom filter)，
+//    仅对 scan_range 内的 RG 执行，避免对范围外的 RG 做昂贵的 bloom filter/dictionary 读取
+// 3. select_row_group_ranges_by_page_index() — Page 级细粒度裁剪
+Status plan_parquet_row_groups(const ::parquet::FileMetaData& metadata,
+                               ::parquet::ParquetFileReader* file_reader,
+                               const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                               const format::FileScanRequest& request,
+                               const ParquetScanRange& scan_range, bool enable_bloom_filter,
+                               RowGroupScanPlan* plan, const cctz::time_zone* timezone) {
+    DORIS_CHECK(plan != nullptr);
+    plan->row_groups.clear();
+    plan->pruning_stats = ParquetPruningStats {};
+
+    // ① 计算 first_file_row + 过滤 scan_range（代价最低，先做）
+    std::vector<int64_t> row_group_first_rows(metadata.num_row_groups());
+    std::vector<int> scan_range_selected_row_groups;
+    scan_range_selected_row_groups.reserve(metadata.num_row_groups());
+    int64_t next_row_group_first_row = 0;
+    for (int row_group_idx = 0; row_group_idx < metadata.num_row_groups(); ++row_group_idx) {
+        row_group_first_rows[row_group_idx] = next_row_group_first_row;
+        auto row_group_metadata = metadata.RowGroup(row_group_idx);
+        DORIS_CHECK(row_group_metadata != nullptr);
+        const int64_t row_group_rows = row_group_metadata->num_rows();
+        if (row_group_rows < 0) {
+            return Status::Corruption("Invalid negative row count in parquet row group {}",
+                                      row_group_idx);
+        }
+        next_row_group_first_row += row_group_rows;
+        if (!is_row_group_outside_range(metadata, scan_range, row_group_idx)) {
+            scan_range_selected_row_groups.push_back(row_group_idx);
+        }
+    }
+
+    // ② RG 级裁剪：仅对 scan_range 内的 RG 执行
+    std::vector<int> statistics_selected_row_groups;
+    RETURN_IF_ERROR(select_row_groups_by_statistics(
+            metadata, file_reader, file_schema, request, &scan_range_selected_row_groups,
+            &statistics_selected_row_groups, enable_bloom_filter, &plan->pruning_stats, timezone));
+
+    plan->row_groups.reserve(statistics_selected_row_groups.size());
+    for (const auto row_group_idx : statistics_selected_row_groups) {
+        auto row_group_metadata = metadata.RowGroup(row_group_idx);
+        DORIS_CHECK(row_group_metadata != nullptr);
+        const int64_t row_group_rows = row_group_metadata->num_rows();
+        if (row_group_rows == 0) {
+            continue;
+        }
+
+        RowGroupReadPlan row_group_plan;
+        row_group_plan.row_group_id = row_group_idx;
+        row_group_plan.first_file_row = row_group_first_rows[row_group_idx];
+        row_group_plan.row_group_rows = row_group_rows;
+        RETURN_IF_ERROR(select_row_group_ranges_by_page_index(
+                file_reader, file_schema, request, row_group_idx, row_group_rows,
+                &row_group_plan.selected_ranges, &row_group_plan.page_skip_plans,
+                &plan->pruning_stats, timezone));
+        if (row_group_plan.selected_ranges.empty()) {
+            continue;
+        }
+        plan->pruning_stats.selected_row_ranges += row_group_plan.selected_ranges.size();
+        plan->row_groups.push_back(std::move(row_group_plan));
+    }
+    plan->pruning_stats.selected_row_groups = plan->row_groups.size();
+    return Status::OK();
+}
+
+namespace {
+
+uint16_t apply_filter_to_selection(const IColumn::Filter& filter, SelectionVector* selection,
+                                   uint16_t selected_rows) {
+    uint16_t new_selected_rows = 0;
+    for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) {
+        const auto row_idx = selection->get_index(selection_idx);
+        if (filter[row_idx] != 0) {
+            selection->set_index(new_selected_rows++, static_cast<SelectionVector::Index>(row_idx));
+        }
+    }
+    return new_selected_rows;
+}
+
+Status execute_filter_conjuncts(const format::FileScanRequest& request, int64_t batch_rows,
+                                Block* file_block, SelectionVector* selection,
+                                uint16_t* selected_rows) {
+    for (const auto& conjunct : request.conjuncts) {
+        if (*selected_rows == 0) {
+            break;
+        }
+        DORIS_CHECK(conjunct != nullptr);
+        IColumn::Filter filter(static_cast<size_t>(batch_rows), 1);
+        bool can_filter_all = false;
+        RETURN_IF_ERROR(conjunct->execute_filter(file_block, filter.data(),
+                                                 static_cast<size_t>(batch_rows), false,
+                                                 &can_filter_all));
+        *selected_rows =
+                can_filter_all ? 0 : apply_filter_to_selection(filter, selection, *selected_rows);
+    }
+    return Status::OK();
+}
+
+Status execute_delete_conjuncts(const format::FileScanRequest& request, int64_t batch_rows,
+                                Block* file_block, SelectionVector* selection,
+                                uint16_t* selected_rows) {
+    for (const auto& delete_conjunct : request.delete_conjuncts) {
+        if (*selected_rows == 0) {
+            break;
+        }
+        DORIS_CHECK(delete_conjunct != nullptr);
+        int result_column_id = -1;
+        RETURN_IF_ERROR(delete_conjunct->root()->execute(delete_conjunct.get(), file_block,
+                                                         &result_column_id));
+        DORIS_CHECK(result_column_id >= 0 &&
+                    result_column_id < static_cast<int>(file_block->columns()));
+        const auto& delete_filter = assert_cast<const ColumnUInt8&>(
+                                            *file_block->get_by_position(result_column_id).column)
+                                            .get_data();
+        DORIS_CHECK(delete_filter.size() == static_cast<size_t>(batch_rows));
+        IColumn::Filter keep_filter(static_cast<size_t>(batch_rows), 1);
+        bool has_kept_row = false;
+        for (size_t row = 0; row < static_cast<size_t>(batch_rows); ++row) {
+            keep_filter[row] = !delete_filter[row];
+            has_kept_row |= keep_filter[row] != 0;
+        }
+        file_block->erase(result_column_id);
+        *selected_rows =
+                !has_kept_row ? 0
+                              : apply_filter_to_selection(keep_filter, selection, *selected_rows);
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+IColumn::Filter selection_to_filter(const SelectionVector& selection, uint16_t selected_rows,
+                                    int64_t batch_rows) {
+    IColumn::Filter filter(static_cast<size_t>(batch_rows), 0);
+    for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) {
+        filter[selection.get_index(selection_idx)] = 1;
+    }
+    return filter;
+}
+
+Status execute_batch_filters(const format::FileScanRequest& request, int64_t batch_rows,
+                             Block* file_block, SelectionVector* selection, uint16_t* selected_rows,
+                             int64_t* conjunct_filtered_rows) {
+    if (request.conjuncts.empty() && request.delete_conjuncts.empty()) {
+        return Status::OK();
+    }
+    const auto selected_rows_before_conjunct = *selected_rows;
+    RETURN_IF_ERROR(
+            execute_filter_conjuncts(request, batch_rows, file_block, selection, selected_rows));
+    if (conjunct_filtered_rows != nullptr) {
+        *conjunct_filtered_rows += static_cast<int64_t>(selected_rows_before_conjunct) -
+                                   static_cast<int64_t>(*selected_rows);
+    }
+    if (*selected_rows == 0) {
+        return Status::OK();
+    }
+    return execute_delete_conjuncts(request, batch_rows, file_block, selection, selected_rows);
+}
+
+namespace {
+// TODO: batch size in SessionVariable
+constexpr int64_t DEFAULT_PARQUET_READ_BATCH_SIZE = 4096;
+
+int64_t count_range_rows(const std::vector<RowRange>& ranges) {
+    int64_t rows = 0;
+    for (const auto& range : ranges) {
+        rows += range.length;
+    }
+    return rows;
+}
+
+void append_intersection(const RowRange& left, const RowRange& right,
+                         std::vector<RowRange>* result) {
+    const int64_t start = std::max(left.start, right.start);
+    const int64_t end = std::min(left.start + left.length, right.start + right.length);
+    if (start < end) {
+        result->push_back(RowRange {.start = start, .length = end - start});
+    }
+}
+
+std::vector<RowRange> filter_ranges_by_condition_cache(const std::vector<RowRange>& ranges,
+                                                       const std::vector<bool>& cache,
+                                                       int64_t row_group_first_row,
+                                                       int64_t base_granule) {
+    std::vector<RowRange> result;
+    if (cache.empty()) {
+        return ranges;
+    }
+
+    // Cache coordinates are file-global granules; RowRange coordinates are row-group-relative.
+    // Walk every selected range in order and split it by granule. Granules covered by the bitmap
+    // are kept only when the bit is true. Granules outside the bitmap are kept conservatively, so
+    // an undersized or old-format cache entry cannot skip valid rows.
+    for (const auto& range : ranges) {
+        const int64_t global_start = row_group_first_row + range.start;
+        const int64_t global_end = global_start + range.length;
+        for (int64_t granule = global_start / ConditionCacheContext::GRANULE_SIZE;
+             granule <= (global_end - 1) / ConditionCacheContext::GRANULE_SIZE; ++granule) {
+            const int64_t cache_idx = granule - base_granule;
+            const bool keep = cache_idx < 0 || static_cast<size_t>(cache_idx) >= cache.size() ||
+                              cache[static_cast<size_t>(cache_idx)];
+            if (!keep) {
+                continue;
+            }
+            const int64_t granule_start = granule * ConditionCacheContext::GRANULE_SIZE;
+            const int64_t granule_end = granule_start + ConditionCacheContext::GRANULE_SIZE;
+            const RowRange file_granule_range {.start = granule_start - row_group_first_row,
+                                               .length = granule_end - granule_start};
+            append_intersection(range, file_granule_range, &result);
+        }
+    }
+    return result;
+}
+
+} // namespace
+
+void ParquetScanScheduler::set_plan(RowGroupScanPlan plan) {
+    _row_group_plans = std::move(plan.row_groups);
+    _condition_cache_filtered_rows = 0;
+    _predicate_filtered_rows = 0;
+    reset();
+}
+
+void ParquetScanScheduler::set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) {
+    _condition_cache_ctx = std::move(ctx);
+    if (!_condition_cache_ctx || !_condition_cache_ctx->filter_result || _row_group_plans.empty()) {
+        return;
+    }
+
+    _condition_cache_ctx->base_granule =
+            _row_group_plans.front().first_file_row / ConditionCacheContext::GRANULE_SIZE;
+    if (!_condition_cache_ctx->is_hit) {
+        return;
+    }
+
+    std::vector<RowGroupReadPlan> filtered_plans;
+    filtered_plans.reserve(_row_group_plans.size());
+    for (auto& plan : _row_group_plans) {
+        const int64_t old_rows = count_range_rows(plan.selected_ranges);
+        plan.selected_ranges = filter_ranges_by_condition_cache(
+                plan.selected_ranges, *_condition_cache_ctx->filter_result, plan.first_file_row,
+                _condition_cache_ctx->base_granule);
+        const int64_t new_rows = count_range_rows(plan.selected_ranges);
+        _condition_cache_filtered_rows += old_rows - new_rows;
+        if (!plan.selected_ranges.empty()) {
+            filtered_plans.push_back(std::move(plan));
+        }
+    }
+    _row_group_plans = std::move(filtered_plans);
+    reset();
+}
+
+void ParquetScanScheduler::reset() {
+    _next_row_group_plan_idx = 0;
+    reset_current_row_group();
+}
+
+void ParquetScanScheduler::reset_current_row_group() {
+    _current_row_group.reset();
+    _current_predicate_columns.clear();
+    _current_non_predicate_columns.clear();
+    _current_row_group_rows = 0;
+    _current_row_group_rows_read = 0;
+    _current_row_group_first_row = 0;
+    _current_selected_ranges.clear();
+    _current_range_idx = 0;
+    _current_range_rows_read = 0;
+}
+
+Status ParquetScanScheduler::open_next_row_group(
+        ParquetFileContext& file_context,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, bool* has_row_group) {
+    *has_row_group = false;
+    if (_next_row_group_plan_idx >= _row_group_plans.size()) {
+        return Status::OK();
+    }
+    const RowGroupReadPlan& row_group_plan = _row_group_plans[_next_row_group_plan_idx++];
+    const int row_group_idx = row_group_plan.row_group_id;
+    try {
+        _current_row_group = file_context.file_reader->RowGroup(row_group_idx);
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to open parquet row group {}: {}", row_group_idx,
+                                  e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to open parquet row group {}: {}", row_group_idx,
+                                     e.what());
+    }
+
+    auto row_group_metadata = file_context.metadata->RowGroup(row_group_idx);
+    DORIS_CHECK(row_group_metadata != nullptr);
+    _current_row_group_rows = row_group_metadata->num_rows();
+    DORIS_CHECK(_current_row_group_rows == row_group_plan.row_group_rows);
+    DORIS_CHECK(_current_row_group_rows > 0);
+    DORIS_CHECK(!row_group_plan.selected_ranges.empty());
+    _current_row_group_first_row = row_group_plan.first_file_row;
+    _current_row_group_rows_read = 0;
+    _current_selected_ranges = row_group_plan.selected_ranges;
+    _current_range_idx = 0;
+    _current_range_rows_read = 0;
+    _current_predicate_columns.clear();
+    _current_non_predicate_columns.clear();
+
+    ParquetColumnReaderFactory column_reader_factory(
+            _current_row_group, file_context.schema->num_columns(), &row_group_plan.page_skip_plans,
+            _page_skip_profile, _timezone, _enable_strict_mode,
+            _scan_profile.column_reader_profile);
+    for (const auto& col : request.predicate_columns) {
+        const auto local_id = col.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID) {
+            _current_predicate_columns[local_id] =
+                    column_reader_factory.create_row_position_column_reader(
+                            _current_row_group_first_row);
+            continue;
+        }
+        if (local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            DORIS_CHECK(_global_rowid_context.has_value());
+            _current_predicate_columns[local_id] =
+                    column_reader_factory.create_global_rowid_column_reader(
+                            *_global_rowid_context, _current_row_group_first_row);
+            continue;
+        }
+
+        DORIS_CHECK(local_id >= 0 && local_id < static_cast<int32_t>(file_schema.size()));
+        const auto& column_schema = file_schema[local_id];
+        DORIS_CHECK(column_schema != nullptr);
+        std::unique_ptr<ParquetColumnReader> column_reader;
+        RETURN_IF_ERROR(column_reader_factory.create(*column_schema, &col, &column_reader));
+        _current_predicate_columns[local_id] = std::move(column_reader);
+    }
+    for (const auto& col : request.non_predicate_columns) {
+        const auto local_id = col.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID) {
+            _current_non_predicate_columns[local_id] =
+                    column_reader_factory.create_row_position_column_reader(
+                            _current_row_group_first_row);
+            continue;
+        }
+        if (local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            DORIS_CHECK(_global_rowid_context.has_value());
+            _current_non_predicate_columns[local_id] =
+                    column_reader_factory.create_global_rowid_column_reader(
+                            *_global_rowid_context, _current_row_group_first_row);
+            continue;
+        }
+        DORIS_CHECK(local_id >= 0 && local_id < static_cast<int32_t>(file_schema.size()));
+        const auto& column_schema = file_schema[local_id];
+        DORIS_CHECK(column_schema != nullptr);
+        std::unique_ptr<ParquetColumnReader> column_reader;
+        RETURN_IF_ERROR(column_reader_factory.create(*column_schema, &col, &column_reader));
+        _current_non_predicate_columns[local_id] = std::move(column_reader);
+    }
+    *has_row_group = true;
+    return Status::OK();
+}
+
+Status ParquetScanScheduler::skip_current_row_group_rows(int64_t rows) {
+    DORIS_CHECK(rows >= 0);
+    if (rows == 0) {
+        return Status::OK();
+    }
+    if (_scan_profile.range_gap_skipped_rows != nullptr) {
+        COUNTER_UPDATE(_scan_profile.range_gap_skipped_rows, rows);
+    }
+    for (const auto& column_reader : _current_predicate_columns | std::views::values) {
+        RETURN_IF_ERROR(column_reader->skip(rows));
+    }
+    for (const auto& column_reader : _current_non_predicate_columns | std::views::values) {
+        RETURN_IF_ERROR(column_reader->skip(rows));
+    }
+    _current_row_group_rows_read += rows;
+    return Status::OK();
+}
+
+Status ParquetScanScheduler::read_filter_columns(int64_t batch_rows,
+                                                 const format::FileScanRequest& request,
+                                                 Block* file_block, SelectionVector* selection,
+                                                 uint16_t* selected_rows,
+                                                 int64_t* conjunct_filtered_rows) {
+    if (!request.conjuncts.empty() || !request.delete_conjuncts.empty()) {
+        selection->resize(static_cast<size_t>(batch_rows));
+    }
+    for (const auto& [fid, column_reader] : _current_predicate_columns) {
+        auto position_it = request.local_positions.find(format::LocalColumnId(fid));
+        DORIS_CHECK(position_it != request.local_positions.end());
+        const auto block_position = position_it->second.value();
+        DCHECK(remove_nullable(column_reader->type())
+                       ->equals(*remove_nullable(file_block->get_by_position(block_position).type)))
+                << column_reader->type()->get_name() << " "
+                << file_block->get_by_position(block_position).type->get_name() << " "
+                << column_reader->name() << " " << file_block->get_by_position(block_position).name;
+        auto column = file_block->get_by_position(block_position).column->assert_mutable();
+        int64_t column_rows = 0;
+        {
+            SCOPED_TIMER(_scan_profile.column_read_time);
+            RETURN_IF_ERROR(column_reader->read(batch_rows, column, &column_rows));
+        }
+        if (column_rows != batch_rows) {
+            return Status::Corruption("Parquet filter column {} returned {} rows, expected {} rows",
+                                      column_reader->name(), column_rows, batch_rows);
+        }
+        file_block->replace_by_position(block_position, std::move(column));
+    }
+    if (_scan_profile.predicate_filter_time == nullptr) {
+        return execute_batch_filters(request, batch_rows, file_block, selection, selected_rows,
+                                     conjunct_filtered_rows);
+    }
+    SCOPED_TIMER(_scan_profile.predicate_filter_time);
+    return execute_batch_filters(request, batch_rows, file_block, selection, selected_rows,
+                                 conjunct_filtered_rows);
+}
+
+Status ParquetScanScheduler::read_current_row_group_batch(int64_t batch_rows,
+                                                          const format::FileScanRequest& request,
+                                                          int64_t batch_first_file_row,
+                                                          Block* file_block, size_t* rows) {
+    if (_scan_profile.total_batches != nullptr) {
+        COUNTER_UPDATE(_scan_profile.total_batches, 1);
+    }
+    if (_scan_profile.raw_rows_read != nullptr) {
+        COUNTER_UPDATE(_scan_profile.raw_rows_read, batch_rows);
+    }
+    if (_current_predicate_columns.empty() && _current_non_predicate_columns.empty()) {
+        *rows = static_cast<size_t>(batch_rows);
+        if (_scan_profile.selected_rows != nullptr) {
+            COUNTER_UPDATE(_scan_profile.selected_rows, batch_rows);
+        }
+        return Status::OK();
+    }
+    SelectionVector selection;
+    DORIS_CHECK(batch_rows <= std::numeric_limits<uint16_t>::max());
+    uint16_t selected_rows = static_cast<uint16_t>(batch_rows);
+    int64_t conjunct_filtered_rows = 0;
+    RETURN_IF_ERROR(read_filter_columns(batch_rows, request, file_block, &selection, &selected_rows,
+                                        &conjunct_filtered_rows));
+    _predicate_filtered_rows += conjunct_filtered_rows;
+    mark_condition_cache_granules(selection, selected_rows, batch_first_file_row);
+
+    const bool need_filter_output = selected_rows != batch_rows;
+    if (_scan_profile.selected_rows != nullptr) {
+        COUNTER_UPDATE(_scan_profile.selected_rows, selected_rows);
+    }
+    if (_scan_profile.rows_filtered_by_conjunct != nullptr) {
+        COUNTER_UPDATE(_scan_profile.rows_filtered_by_conjunct, conjunct_filtered_rows);
+    }
+    if (!_current_non_predicate_columns.empty() &&
+        _scan_profile.lazy_read_filtered_rows != nullptr) {
+        COUNTER_UPDATE(_scan_profile.lazy_read_filtered_rows, batch_rows - selected_rows);
+    }
+    if (selected_rows == 0 && _scan_profile.empty_selection_batches != nullptr) {
+        COUNTER_UPDATE(_scan_profile.empty_selection_batches, 1);
+    }
+    if (need_filter_output) {
+        IColumn::Filter output_filter = selection_to_filter(selection, selected_rows, batch_rows);
+        for (const auto& col : request.predicate_columns) {
+            auto position_it = request.local_positions.find(col.column_id());
+            DORIS_CHECK(position_it != request.local_positions.end());
+            const auto block_position = position_it->second.value();
+            RETURN_IF_CATCH_EXCEPTION(file_block->replace_by_position(
+                    block_position, file_block->get_by_position(block_position)
+                                            .column->filter(output_filter, selected_rows)));
+        }
+    }
+
+    {
+        SCOPED_TIMER(_scan_profile.column_read_time);
+        for (const auto& [fid, column_reader] : _current_non_predicate_columns) {
+            auto position_it = request.local_positions.find(format::LocalColumnId(fid));
+            DORIS_CHECK(position_it != request.local_positions.end());
+            const auto block_position = position_it->second.value();
+            auto column = file_block->get_by_position(block_position).column->assert_mutable();
+            DCHECK_EQ(file_block->get_by_position(block_position).type->get_primitive_type(),
+                      column_reader->type()->get_primitive_type())
+                    << type_to_string(file_block->get_by_position(block_position)
+                                              .type->get_primitive_type())
+                    << " " << type_to_string(column_reader->type()->get_primitive_type()) << " "
+                    << column_reader->name() << " " << fid << " " << block_position;
+            if (need_filter_output) {
+                [[maybe_unused]] auto old_size = column->size();
+                RETURN_IF_ERROR(
+                        column_reader->select(selection, selected_rows, batch_rows, column));
+                if (column->size() != old_size + selected_rows) {
+                    return Status::Corruption(
+                            "Parquet selected output column {} returned {} rows, expected {} rows",
+                            column_reader->name(), column->size(), old_size + selected_rows);
+                }
+            } else {
+                int64_t column_rows = 0;
+                RETURN_IF_ERROR(column_reader->read(batch_rows, column, &column_rows));
+                if (column_rows != batch_rows) {
+                    return Status::Corruption(
+                            "Parquet output column {} returned {} rows, expected {} rows",
+                            column_reader->name(), column_rows, batch_rows);
+                }
+            }
+            file_block->replace_by_position(block_position, std::move(column));
+        }
+    }
+    *rows = static_cast<size_t>(selected_rows);
+    return Status::OK();
+}
+
+void ParquetScanScheduler::mark_condition_cache_granules(const SelectionVector& selection,
+                                                         uint16_t selected_rows,
+                                                         int64_t batch_first_file_row) {
+    if (!_condition_cache_ctx || _condition_cache_ctx->is_hit ||
+        !_condition_cache_ctx->filter_result) {
+        return;
+    }
+    auto& cache = *_condition_cache_ctx->filter_result;
+    for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) {
+        const int64_t file_row = batch_first_file_row + selection.get_index(selection_idx);
+        const int64_t granule = file_row / ConditionCacheContext::GRANULE_SIZE;
+        const int64_t cache_idx = granule - _condition_cache_ctx->base_granule;
+        if (cache_idx >= 0 && static_cast<size_t>(cache_idx) < cache.size()) {
+            cache[static_cast<size_t>(cache_idx)] = true;
+        }
+    }
+}
+
+Status ParquetScanScheduler::read_next_batch(
+        ParquetFileContext& file_context,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, Block* file_block, size_t* rows, bool* eof) {
+    *rows = 0;
+    while (true) {
+        if (_current_row_group == nullptr) {
+            bool has_row_group = false;
+            RETURN_IF_ERROR(
+                    open_next_row_group(file_context, file_schema, request, &has_row_group));
+            if (!has_row_group) {
+                *eof = true;
+                return Status::OK();
+            }
+        }
+
+        if (_current_range_idx >= _current_selected_ranges.size()) {
+            // Current row group finished, try next row group.
+            reset_current_row_group();
+            continue;
+        }
+
+        const RowRange& current_range = _current_selected_ranges[_current_range_idx];
+        DORIS_CHECK(current_range.start >= 0);
+        DORIS_CHECK(current_range.length > 0);
+        DORIS_CHECK(current_range.start + current_range.length <= _current_row_group_rows);
+
+        if (_current_row_group_rows_read < current_range.start) {
+            // Skip filtered rows according to row group level pruning.
+            RETURN_IF_ERROR(skip_current_row_group_rows(current_range.start -
+                                                        _current_row_group_rows_read));
+        }
+        DORIS_CHECK(_current_row_group_rows_read == current_range.start + _current_range_rows_read);
+        const int64_t remaining_rows = current_range.length - _current_range_rows_read;
+        if (remaining_rows <= 0) {
+            // Current range finished, try next range in the same row group.
+            ++_current_range_idx;
+            _current_range_rows_read = 0;
+            continue;
+        }
+
+        const int64_t batch_rows =
+                std::min<int64_t>(DEFAULT_PARQUET_READ_BATCH_SIZE, remaining_rows);
+        const int64_t physical_rows_read = batch_rows;
+        const int64_t batch_first_file_row =
+                _current_row_group_first_row + _current_row_group_rows_read;
+        RETURN_IF_ERROR(read_current_row_group_batch(batch_rows, request, batch_first_file_row,
+                                                     file_block, rows));
+        _current_row_group_rows_read += physical_rows_read;
+        _current_range_rows_read += physical_rows_read;
+        if (_current_range_rows_read >= current_range.length) {
+            ++_current_range_idx;
+            _current_range_rows_read = 0;
+        }
+        if (*rows == 0) {
+            continue;
+        }
+        *eof = false;
+        return Status::OK();
+    }
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_scan.h b/be/src/format_v2/parquet/parquet_scan.h
new file mode 100644
index 00000000000000..528722902ec6d3
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_scan.h
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/status.h"
+#include "core/column/column.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_profile.h"
+#include "format_v2/parquet/parquet_statistics.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/parquet/selection_vector.h"
+#include "runtime/runtime_profile.h"
+#include "storage/segment/condition_cache.h"
+
+namespace parquet {
+class FileMetaData;
+class ParquetFileReader;
+class RowGroupReader;
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris {
+class Block;
+
+namespace format {
+struct FileScanRequest;
+} // namespace format
+} // namespace doris
+
+namespace doris::format::parquet {
+
+struct ParquetFileContext;
+struct ParquetColumnSchema;
+
+// ============================================================================
+// 扫描范围 & 裁剪计划
+// ============================================================================
+
+// 文件扫描范围，来自 FileDescription 的 range_start_offset/range_size/file_size。
+// 用于判断 RowGroup 是否落在当前 split 负责的 offset 区间内。
+struct ParquetScanRange {
+    int64_t start_offset = 0;
+    int64_t size = -1;      // -1 表示读整个文件
+    int64_t file_size = -1; // -1 表示未知
+};
+
+// 单个 RowGroup 的读取计划，由 plan_parquet_row_groups() 生成。
+struct RowGroupReadPlan {
+    int row_group_id = -1;                 // RG 编号
+    int64_t first_file_row = 0;            // 该 RG 在文件中的起始行号（从 0 累加）
+    int64_t row_group_rows = 0;            // 该 RG 的总行数
+    std::vector<RowRange> selected_ranges; // page index 裁剪后需读取的行范围
+    std::map<int, ParquetPageSkipPlan> page_skip_plans; // leaf_column_id → 可完全跳过的 data page
+};
+
+// 整个文件的扫描计划 — plan_parquet_row_groups() 的输出。
+struct RowGroupScanPlan {
+    std::vector<RowGroupReadPlan> row_groups; // 裁剪后需要扫描的 RowGroup 列表
+    ParquetPruningStats pruning_stats;        // 裁剪统计
+};
+
+// ============================================================================
+// RowGroup 裁剪 & 调度
+// ============================================================================
+
+// 最外层裁剪入口：从文件的所有 RowGroup 中选出需要扫描的 RG 及其内部行范围。
+//
+// 裁剪流水线（代价从低到高）：
+//   1. 按 scan_range 过滤（O(1) offset 算术）→ 只在当前 split offset 范围内的 RG 进入下一步
+//   2. select_row_groups_by_statistics() → statistics/dictionary/bloom filter 裁剪
+//   3. select_row_group_ranges_by_page_index() → 逐 page 细粒度裁剪，生成 selected_ranges
+//
+// 输出 RowGroupScanPlan，供 ParquetScanScheduler 逐批调度读取。
+Status plan_parquet_row_groups(const ::parquet::FileMetaData& metadata,
+                               ::parquet::ParquetFileReader* file_reader,
+                               const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                               const format::FileScanRequest& request,
+                               const ParquetScanRange& scan_range, bool enable_bloom_filter,
+                               RowGroupScanPlan* plan, const cctz::time_zone* timezone = nullptr);
+
+// 将 SelectionVector 转换为 IColumn::Filter（Doris 的 bitmap 格式）。
+// 用于在 late materialization 中对已读取的 predicate 列做行过滤。
+IColumn::Filter selection_to_filter(const SelectionVector& selection, uint16_t selected_rows,
+                                    int64_t batch_rows);
+
+// 执行批次级的 conjuncts + delete_conjuncts 过滤。
+// 对 predicate 列的全量 batch 执行表达式，生成 SelectionVector 标记命中行。
+Status execute_batch_filters(const format::FileScanRequest& request, int64_t batch_rows,
+                             Block* file_block, SelectionVector* selection, uint16_t* selected_rows,
+                             int64_t* conjunct_filtered_rows = nullptr);
+
+// ============================================================================
+// Parquet 扫描调度器
+// ============================================================================
+//
+// 持有 RowGroupScanPlan，在 get_block() 被调用时逐批推进扫描。
+//
+// 核心循环（read_next_batch）：
+//   while true:
+//     ① open_next_row_group()     — 打开下一个 RG，创建 predicate/non-predicate ColumnReaders
+//     ② skip range gap            — 跳过被 page index 裁剪掉的行范围
+//     ③ read_current_row_group_batch(batch_rows)
+//          ├─ read_filter_columns()  ：读 predicate 列 → 执行 conjuncts → SelectionVector
+//          ├─ filter predicate columns：按 selection 过滤已读的 predicate 列
+//          └─ read non-predicate      ：select() 只读命中的 non-predicate 列
+//     ④ 返回 batch，或继续循环（当前 batch 全被 filter 掉）
+//
+// 设计要点：
+// - Late materialization：predicate 列先读（全量 4096 行），生成 selection；
+//   non-predicate 列按 selection 的 select() 只读命中行，大幅减少 I/O。
+// - RowGroup 级别的 ColumnReader 工厂在 open_next_row_group 中创建，
+//   每个 RG 创建一套新的 reader，reader 生命周期绑定到当前 RG。
+// ============================================================================
+class ParquetScanScheduler {
+public:
+    void set_plan(RowGroupScanPlan plan);
+    void set_page_skip_profile(ParquetPageSkipProfile page_skip_profile) {
+        _page_skip_profile = page_skip_profile;
+    }
+    void set_scan_profile(ParquetScanProfile scan_profile) { _scan_profile = scan_profile; }
+    void set_global_rowid_context(std::optional<format::GlobalRowIdContext> context) {
+        _global_rowid_context = context;
+    }
+    void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx);
+    void set_timezone(const cctz::time_zone* timezone) { _timezone = timezone; }
+    void set_enable_strict_mode(bool enable_strict_mode) {
+        _enable_strict_mode = enable_strict_mode;
+    }
+    void reset();
+    bool empty() const { return _row_group_plans.empty(); }
+    int64_t condition_cache_filtered_rows() const { return _condition_cache_filtered_rows; }
+    int64_t predicate_filtered_rows() const { return _predicate_filtered_rows; }
+
+    Status read_next_batch(ParquetFileContext& file_context,
+                           const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                           const format::FileScanRequest& request, Block* file_block, size_t* rows,
+                           bool* eof);
+
+private:
+    void reset_current_row_group();
+
+    // 打开下一个 RG，创建 ParquetColumnReaderFactory 并创建 predicate/non-predicate readers。
+    Status open_next_row_group(ParquetFileContext& file_context,
+                               const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                               const format::FileScanRequest& request, bool* has_row_group);
+
+    // 跳过当前 RG 内 rows 行（range gap），对所有 reader 调用 skip()。
+    Status skip_current_row_group_rows(int64_t rows);
+
+    // 读取 predicate 列并执行 conjuncts + delete_conjuncts，生成 SelectionVector。
+    Status read_filter_columns(int64_t batch_rows, const format::FileScanRequest& request,
+                               Block* file_block, SelectionVector* selection,
+                               uint16_t* selected_rows, int64_t* conjunct_filtered_rows);
+
+    // 单 batch 的完整读取：predicate 列 → filter → non-predicate 列（select 模式）。
+    Status read_current_row_group_batch(int64_t batch_rows, const format::FileScanRequest& request,
+                                        int64_t batch_first_file_row, Block* file_block,
+                                        size_t* rows);
+
+    void mark_condition_cache_granules(const SelectionVector& selection, uint16_t selected_rows,
+                                       int64_t batch_first_file_row);
+
+    // ======== 计划状态 ========
+    std::vector<RowGroupReadPlan> _row_group_plans; // 待扫描的 RG 队列
+    size_t _next_row_group_plan_idx = 0;            // 下一个待处理的 RG 下标
+
+    // ======== 当前 RG 状态 ========
+    std::shared_ptr<::parquet::RowGroupReader> _current_row_group; // Arrow RowGroup 读取器
+    std::map<ColumnId, std::unique_ptr<ParquetColumnReader>>
+            _current_predicate_columns; // predicate ColumnReaders
+    std::map<ColumnId, std::unique_ptr<ParquetColumnReader>>
+            _current_non_predicate_columns;   // non-predicate ColumnReaders
+    int64_t _current_row_group_rows = 0;      // 当前 RG 总行数
+    int64_t _current_row_group_rows_read = 0; // 当前 RG 已读行数（游标）
+    int64_t _current_row_group_first_row = 0; // 当前 RG 在文件中的起始行号
+    std::vector<RowRange>
+            _current_selected_ranges;     // 当前 RG 的 selected_ranges（page index 裁剪后）
+    size_t _current_range_idx = 0;        // 当前处理到第几个 selected_range
+    int64_t _current_range_rows_read = 0; // 当前 range 已读行数
+
+    // ======== 配置 ========
+    ParquetPageSkipProfile _page_skip_profile;
+    ParquetScanProfile _scan_profile;
+    std::optional<format::GlobalRowIdContext> _global_rowid_context;
+    const cctz::time_zone* _timezone = nullptr;
+    bool _enable_strict_mode = false;
+    std::shared_ptr<ConditionCacheContext> _condition_cache_ctx;
+    int64_t _condition_cache_filtered_rows = 0;
+    int64_t _predicate_filtered_rows = 0;
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_statistics.cpp b/be/src/format_v2/parquet/parquet_statistics.cpp
new file mode 100644
index 00000000000000..676f6cb4e379d0
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_statistics.cpp
@@ -0,0 +1,1327 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_statistics.h"
+
+#include <parquet/api/reader.h>
+#include <parquet/bloom_filter.h>
+#include <parquet/bloom_filter_reader.h>
+#include <parquet/column_page.h>
+#include <parquet/encoding.h>
+#include <parquet/page_index.h>
+#include <parquet/statistics.h>
+#include <parquet/types.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <cstring>
+#include <exception>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/config.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "core/field.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "runtime/runtime_profile.h"
+#include "storage/index/zone_map/zone_map_index.h"
+#include "storage/predicate/accept_null_predicate.h"
+#include "storage/predicate/column_predicate.h"
+
+namespace doris::format::parquet {
+
+namespace {
+
+// RG 级裁剪原因枚举。row_group_prune_reason() 依次检查每种裁剪方式，
+// 返回第一个命中裁剪的原因。NONE 表示该 RG 无法被任何方式裁剪。
+enum class ParquetRowGroupPruneReason {
+    NONE,         // 无法裁剪，需要读取
+    STATISTICS,   // min/max statistics 排除
+    DICTIONARY,   // dictionary 排除
+    BLOOM_FILTER, // bloom filter 排除
+};
+
+PrimitiveType physical_filter_type(const ParquetColumnSchema& column_schema) {
+    if (column_schema.type == nullptr) {
+        return INVALID_TYPE;
+    }
+    switch (remove_nullable(column_schema.type)->get_primitive_type()) {
+    case TYPE_BOOLEAN:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_STRING:
+        return remove_nullable(column_schema.type)->get_primitive_type();
+    default:
+        return INVALID_TYPE;
+    }
+}
+
+DecodedTimeUnit decoded_time_unit(ParquetTimeUnit time_unit) {
+    switch (time_unit) {
+    case ParquetTimeUnit::MILLIS:
+        return DecodedTimeUnit::MILLIS;
+    case ParquetTimeUnit::MICROS:
+        return DecodedTimeUnit::MICROS;
+    case ParquetTimeUnit::NANOS:
+        return DecodedTimeUnit::NANOS;
+    default:
+        return DecodedTimeUnit::UNKNOWN;
+    }
+}
+
+Status read_decoded_field(const ParquetColumnSchema& column_schema, DecodedColumnView view,
+                          Field* field, const cctz::time_zone* timezone) {
+    DORIS_CHECK(column_schema.type != nullptr);
+    DORIS_CHECK(field != nullptr);
+    constexpr uint8_t not_null = 0;
+    view.row_count = 1;
+    view.null_map = &not_null;
+    view.time_unit = decoded_time_unit(column_schema.type_descriptor.time_unit);
+    view.logical_integer_bit_width = column_schema.type_descriptor.integer_bit_width;
+    view.logical_integer_is_signed = !column_schema.type_descriptor.is_unsigned_integer;
+    view.decimal_precision = column_schema.type_descriptor.decimal_precision;
+    view.decimal_scale = column_schema.type_descriptor.decimal_scale;
+    view.fixed_length = column_schema.type_descriptor.fixed_length;
+    view.timestamp_is_adjusted_to_utc = column_schema.type_descriptor.timestamp_is_adjusted_to_utc;
+    view.timezone = timezone;
+    return column_schema.type->get_serde()->read_field_from_decoded_value(*column_schema.type,
+                                                                          field, view);
+}
+
+template <typename NativeType>
+bool set_decoded_field(const ParquetColumnSchema& column_schema, DecodedValueKind value_kind,
+                       const NativeType& value, Field* field, const cctz::time_zone* timezone) {
+    DecodedColumnView view;
+    view.value_kind = value_kind;
+    view.values = reinterpret_cast<const uint8_t*>(&value);
+    return read_decoded_field(column_schema, view, field, timezone).ok();
+}
+
+template <typename ParquetDType>
+bool set_decoded_min_max(const std::shared_ptr<::parquet::Statistics>& statistics,
+                         const ParquetColumnSchema& column_schema, DecodedValueKind value_kind,
+                         ParquetColumnStatistics* column_statistics,
+                         const cctz::time_zone* timezone) {
+    auto typed_statistics =
+            std::static_pointer_cast<::parquet::TypedStatistics<ParquetDType>>(statistics);
+    if (!set_decoded_field(column_schema, value_kind, typed_statistics->min(),
+                           &column_statistics->min_value, timezone) ||
+        !set_decoded_field(column_schema, value_kind, typed_statistics->max(),
+                           &column_statistics->max_value, timezone)) {
+        return false;
+    }
+    return true;
+}
+
+bool set_decoded_binary_field(const ParquetColumnSchema& column_schema, DecodedValueKind value_kind,
+                              const StringRef& value, Field* field,
+                              const cctz::time_zone* timezone) {
+    std::vector<StringRef> binary_values {value};
+    DecodedColumnView view;
+    view.value_kind = value_kind;
+    view.binary_values = &binary_values;
+    return read_decoded_field(column_schema, view, field, timezone).ok();
+}
+
+bool set_string_min_max(const std::shared_ptr<::parquet::Statistics>& statistics,
+                        const ParquetColumnSchema& column_schema,
+                        ParquetColumnStatistics* column_statistics,
+                        const cctz::time_zone* timezone) {
+    switch (statistics->physical_type()) {
+    case ::parquet::Type::BYTE_ARRAY: {
+        auto typed_statistics =
+                std::static_pointer_cast<::parquet::TypedStatistics<::parquet::ByteArrayType>>(
+                        statistics);
+        const auto min = ::parquet::ByteArrayToString(typed_statistics->min());
+        const auto max = ::parquet::ByteArrayToString(typed_statistics->max());
+        if (!set_decoded_binary_field(column_schema, DecodedValueKind::BINARY,
+                                      StringRef(min.data(), min.size()),
+                                      &column_statistics->min_value, timezone) ||
+            !set_decoded_binary_field(column_schema, DecodedValueKind::BINARY,
+                                      StringRef(max.data(), max.size()),
+                                      &column_statistics->max_value, timezone)) {
+            return false;
+        }
+        return true;
+    }
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: {
+        if (column_schema.descriptor == nullptr || column_schema.descriptor->type_length() <= 0) {
+            return false;
+        }
+        auto typed_statistics =
+                std::static_pointer_cast<::parquet::TypedStatistics<::parquet::FLBAType>>(
+                        statistics);
+        const int type_length = column_schema.descriptor->type_length();
+        const std::string min(reinterpret_cast<const char*>(typed_statistics->min().ptr),
+                              type_length);
+        const std::string max(reinterpret_cast<const char*>(typed_statistics->max().ptr),
+                              type_length);
+        if (!set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY,
+                                      StringRef(min.data(), min.size()),
+                                      &column_statistics->min_value, timezone) ||
+            !set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY,
+                                      StringRef(max.data(), max.size()),
+                                      &column_statistics->max_value, timezone)) {
+            return false;
+        }
+        return true;
+    }
+    default:
+        return false;
+    }
+}
+
+bool is_null_only_predicate(const ColumnPredicate& predicate) {
+    return predicate.type() == PredicateType::IS_NULL ||
+           predicate.type() == PredicateType::IS_NOT_NULL;
+}
+
+bool is_supported_dictionary_predicate(const ColumnPredicate& predicate) {
+    switch (predicate.type()) {
+    case PredicateType::EQ:
+    case PredicateType::IN_LIST:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool is_bloom_filter_prunable_predicate(const ColumnPredicate& predicate) {
+    if (dynamic_cast<const AcceptNullPredicate*>(&predicate) != nullptr ||
+        is_null_only_predicate(predicate)) {
+        return false;
+    }
+    return predicate.can_do_bloom_filter(false);
+}
+
+template <typename T>
+T load_predicate_value(const char* data) {
+    T value;
+    memcpy(&value, data, sizeof(T));
+    return value;
+}
+
+// 将 Arrow 的 ::parquet::BloomFilter 适配为 Doris 的 segment_v2::BloomFilter 接口。
+// 仅实现 test_bytes()（谓词评估所需），add_bytes/add_hash 等写操作不支持。
+//
+// 适配的关键：Arrow BloomFilter 按物理类型存储 hash，而 Doris ColumnPredicate
+// 通过 evaluate_and( BloomFilter*) 来测试谓词值是否可能存在。
+// 本适配器根据 Doris 类型将谓词值转为对应物理类型的 hash 去查询 Arrow BloomFilter。
+class ArrowParquetBloomFilterAdapter final : public segment_v2::BloomFilter {
+public:
+    ArrowParquetBloomFilterAdapter(const ParquetColumnSchema& column_schema,
+                                   const ::parquet::BloomFilter& bloom_filter)
+            : _column_schema(column_schema), _bloom_filter(bloom_filter) {}
+
+    void add_bytes(const char* buf, size_t size) override { DORIS_CHECK(false); }
+
+    bool test_bytes(const char* buf, size_t size) const override {
+        if (buf == nullptr) {
+            return true;
+        }
+        switch (physical_filter_type(_column_schema)) {
+        case TYPE_BOOLEAN:
+            return test_boolean(buf, size);
+        case TYPE_INT:
+            return test_int32(buf, size);
+        case TYPE_BIGINT:
+            return test_int64(buf, size);
+        case TYPE_FLOAT:
+            return test_float(buf, size);
+        case TYPE_DOUBLE:
+            return test_double(buf, size);
+        case TYPE_STRING:
+            return test_string(buf, size);
+        default:
+            return true;
+        }
+    }
+
+    void set_has_null(bool has_null) override { DORIS_CHECK(!has_null); }
+    bool has_null() const override { return false; }
+    void add_hash(uint64_t hash) override { DORIS_CHECK(false); }
+    bool test_hash(uint64_t hash) const override { return _bloom_filter.FindHash(hash); }
+
+private:
+    bool test_boolean(const char* buf, size_t size) const {
+        if (size == sizeof(bool)) {
+            const int32_t value = load_predicate_value<bool>(buf) ? 1 : 0;
+            return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+        }
+        if (size == sizeof(int32_t)) {
+            const int32_t value = load_predicate_value<int32_t>(buf);
+            return _bloom_filter.FindHash(_bloom_filter.Hash(value != 0 ? 1 : 0));
+        }
+        return true;
+    }
+
+    bool test_int32(const char* buf, size_t size) const {
+        if (size == sizeof(int8_t)) {
+            return find_int32(static_cast<int32_t>(load_predicate_value<int8_t>(buf)));
+        }
+        if (size == sizeof(int16_t)) {
+            return find_int32(static_cast<int32_t>(load_predicate_value<int16_t>(buf)));
+        }
+        if (size == sizeof(int32_t)) {
+            return find_int32(load_predicate_value<int32_t>(buf));
+        }
+        return true;
+    }
+
+    bool test_int64(const char* buf, size_t size) const {
+        if (size != sizeof(int64_t)) {
+            return true;
+        }
+        const int64_t value = load_predicate_value<int64_t>(buf);
+        return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+    }
+
+    bool test_float(const char* buf, size_t size) const {
+        if (size != sizeof(float)) {
+            return true;
+        }
+        const float value = load_predicate_value<float>(buf);
+        return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+    }
+
+    bool test_double(const char* buf, size_t size) const {
+        if (size != sizeof(double)) {
+            return true;
+        }
+        const double value = load_predicate_value<double>(buf);
+        return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+    }
+
+    bool test_string(const char* buf, size_t size) const {
+        ::parquet::ByteArray value(static_cast<uint32_t>(size),
+                                   reinterpret_cast<const uint8_t*>(buf));
+        return _bloom_filter.FindHash(_bloom_filter.Hash(&value));
+    }
+
+    bool find_int32(int32_t value) const {
+        return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+    }
+
+    const ParquetColumnSchema& _column_schema;
+    const ::parquet::BloomFilter& _bloom_filter;
+};
+
+const ParquetColumnSchema* resolve_predicate_leaf_schema(
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& schema,
+        const format::FileColumnPredicateFilter& column_filter);
+
+bool bloom_filter_supported(const ParquetColumnSchema& column_schema) {
+    switch (physical_filter_type(column_schema)) {
+    case TYPE_BOOLEAN:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_STRING:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool bloom_filter_excludes(const ParquetColumnSchema& column_schema,
+                           const format::FileColumnPredicateFilter& column_filter,
+                           const ::parquet::BloomFilter& bloom_filter) {
+    if (!bloom_filter_supported(column_schema)) {
+        return false;
+    }
+    ArrowParquetBloomFilterAdapter adapter(column_schema, bloom_filter);
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (column_predicate == nullptr || !is_bloom_filter_prunable_predicate(*column_predicate)) {
+            return false;
+        }
+        if (!column_predicate->evaluate_and(&adapter)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+struct RowGroupBloomFilterCache {
+    ::parquet::BloomFilterReader* bloom_filter_reader = nullptr;
+    std::map<int, std::unique_ptr<::parquet::BloomFilter>> column_bloom_filters;
+    std::set<int> loaded_columns;
+
+    ::parquet::BloomFilter* get(int row_group_idx, int leaf_column_id,
+                                ParquetPruningStats* pruning_stats) {
+        if (bloom_filter_reader == nullptr || leaf_column_id < 0) {
+            return nullptr;
+        }
+        if (loaded_columns.find(leaf_column_id) == loaded_columns.end()) {
+            loaded_columns.insert(leaf_column_id);
+            try {
+                std::shared_ptr<::parquet::RowGroupBloomFilterReader> row_group_reader;
+                if (pruning_stats != nullptr) {
+                    SCOPED_RAW_TIMER(&pruning_stats->bloom_filter_read_time);
+                    row_group_reader = bloom_filter_reader->RowGroup(row_group_idx);
+                    if (row_group_reader != nullptr) {
+                        column_bloom_filters[leaf_column_id] =
+                                row_group_reader->GetColumnBloomFilter(leaf_column_id);
+                    }
+                } else {
+                    row_group_reader = bloom_filter_reader->RowGroup(row_group_idx);
+                    if (row_group_reader != nullptr) {
+                        column_bloom_filters[leaf_column_id] =
+                                row_group_reader->GetColumnBloomFilter(leaf_column_id);
+                    }
+                }
+            } catch (const ::parquet::ParquetException&) {
+                return nullptr;
+            } catch (const std::exception&) {
+                return nullptr;
+            }
+        }
+        auto it = column_bloom_filters.find(leaf_column_id);
+        return it == column_bloom_filters.end() ? nullptr : it->second.get();
+    }
+};
+
+ParquetRowGroupPruneReason bloom_filter_prune_reason(
+        int row_group_idx, const std::vector<std::unique_ptr<ParquetColumnSchema>>& schema,
+        const format::FileColumnPredicateFilter& column_filter,
+        RowGroupBloomFilterCache* bloom_filter_cache, ParquetPruningStats* pruning_stats) {
+    if (bloom_filter_cache == nullptr || column_filter.predicates.empty()) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    const auto* column_schema = resolve_predicate_leaf_schema(schema, column_filter);
+    if (column_schema == nullptr || !bloom_filter_supported(*column_schema)) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (column_predicate == nullptr || !is_bloom_filter_prunable_predicate(*column_predicate)) {
+            return ParquetRowGroupPruneReason::NONE;
+        }
+    }
+    auto* bloom_filter =
+            bloom_filter_cache->get(row_group_idx, column_schema->leaf_column_id, pruning_stats);
+    if (bloom_filter == nullptr) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    return bloom_filter_excludes(*column_schema, column_filter, *bloom_filter)
+                   ? ParquetRowGroupPruneReason::BLOOM_FILTER
+                   : ParquetRowGroupPruneReason::NONE;
+}
+
+bool is_dictionary_data_encoding(::parquet::Encoding::type encoding) {
+    return encoding == ::parquet::Encoding::PLAIN_DICTIONARY ||
+           encoding == ::parquet::Encoding::RLE_DICTIONARY;
+}
+
+bool is_level_encoding(::parquet::Encoding::type encoding) {
+    return encoding == ::parquet::Encoding::RLE || encoding == ::parquet::Encoding::BIT_PACKED;
+}
+
+bool is_data_page_type(::parquet::PageType::type page_type) {
+    return page_type == ::parquet::PageType::DATA_PAGE ||
+           page_type == ::parquet::PageType::DATA_PAGE_V2;
+}
+
+bool is_dictionary_encoded_chunk(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    if (!column_metadata.has_dictionary_page()) {
+        return false;
+    }
+
+    const auto& encoding_stats = column_metadata.encoding_stats();
+    if (!encoding_stats.empty()) {
+        bool has_dictionary_data_page = false;
+        for (const auto& encoding_stat : encoding_stats) {
+            if (!is_data_page_type(encoding_stat.page_type) || encoding_stat.count <= 0) {
+                continue;
+            }
+            if (!is_dictionary_data_encoding(encoding_stat.encoding)) {
+                return false;
+            }
+            has_dictionary_data_page = true;
+        }
+        return has_dictionary_data_page;
+    }
+
+    bool has_dictionary_encoding = false;
+    for (const auto encoding : column_metadata.encodings()) {
+        if (is_dictionary_data_encoding(encoding)) {
+            has_dictionary_encoding = true;
+            continue;
+        }
+        if (!is_level_encoding(encoding)) {
+            return false;
+        }
+    }
+    return has_dictionary_encoding;
+}
+
+bool supports_dictionary_pruning(const ParquetColumnSchema& column_schema,
+                                 const ::parquet::ColumnChunkMetaData& column_metadata,
+                                 const format::FileColumnPredicateFilter& column_filter) {
+    if (column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE ||
+        column_schema.descriptor == nullptr || column_schema.type == nullptr) {
+        return false;
+    }
+    if (!column_schema.type_descriptor.is_string_like) {
+        return false;
+    }
+    if (column_metadata.type() != ::parquet::Type::BYTE_ARRAY &&
+        column_metadata.type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
+        return false;
+    }
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (column_predicate == nullptr || !is_supported_dictionary_predicate(*column_predicate)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+struct OwnedDictionaryWords {
+    std::vector<std::string> values;
+    std::vector<StringRef> refs;
+
+    void clear() {
+        values.clear();
+        refs.clear();
+    }
+
+    void build_refs() {
+        refs.reserve(values.size());
+        for (const auto& value : values) {
+            refs.emplace_back(value.data(), value.size());
+        }
+    }
+};
+
+bool read_dictionary_words(::parquet::ParquetFileReader* file_reader, int row_group_idx,
+                           int leaf_column_id, const ParquetColumnSchema& column_schema,
+                           OwnedDictionaryWords* dict_words) {
+    DORIS_CHECK(dict_words != nullptr);
+    dict_words->clear();
+    if (file_reader == nullptr || leaf_column_id < 0) {
+        return false;
+    }
+
+    auto row_group_reader = file_reader->RowGroup(row_group_idx);
+    if (row_group_reader == nullptr) {
+        return false;
+    }
+    auto page_reader = row_group_reader->GetColumnPageReader(leaf_column_id);
+    if (page_reader == nullptr) {
+        return false;
+    }
+
+    std::shared_ptr<::parquet::Page> page;
+    try {
+        page = page_reader->NextPage();
+    } catch (const ::parquet::ParquetException&) {
+        return false;
+    } catch (const std::exception&) {
+        return false;
+    }
+    if (page == nullptr || page->type() != ::parquet::PageType::DICTIONARY_PAGE) {
+        return false;
+    }
+    const auto* dictionary_page = static_cast<const ::parquet::DictionaryPage*>(page.get());
+    if (dictionary_page->encoding() != ::parquet::Encoding::PLAIN &&
+        dictionary_page->encoding() != ::parquet::Encoding::PLAIN_DICTIONARY) {
+        return false;
+    }
+    const int32_t dictionary_length = dictionary_page->num_values();
+    if (dictionary_length <= 0) {
+        return false;
+    }
+    const auto* dictionary_data = dictionary_page->data();
+    const int dictionary_size = dictionary_page->size();
+
+    dict_words->values.reserve(static_cast<size_t>(dictionary_length));
+    if (column_schema.descriptor->physical_type() == ::parquet::Type::BYTE_ARRAY) {
+        auto decoder = ::parquet::MakeTypedDecoder<::parquet::ByteArrayType>(
+                ::parquet::Encoding::PLAIN, column_schema.descriptor);
+        decoder->SetData(dictionary_length, dictionary_data, dictionary_size);
+        std::vector<::parquet::ByteArray> byte_array_values(static_cast<size_t>(dictionary_length));
+        if (decoder->Decode(byte_array_values.data(), dictionary_length) != dictionary_length) {
+            return false;
+        }
+        for (int32_t dict_idx = 0; dict_idx < dictionary_length; ++dict_idx) {
+            dict_words->values.emplace_back(
+                    reinterpret_cast<const char*>(byte_array_values[dict_idx].ptr),
+                    byte_array_values[dict_idx].len);
+        }
+        dict_words->build_refs();
+        return true;
+    }
+    if (column_schema.descriptor->physical_type() == ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
+        const int type_length = column_schema.descriptor->type_length();
+        if (type_length <= 0) {
+            return false;
+        }
+        auto decoder = ::parquet::MakeTypedDecoder<::parquet::FLBAType>(::parquet::Encoding::PLAIN,
+                                                                        column_schema.descriptor);
+        decoder->SetData(dictionary_length, dictionary_data, dictionary_size);
+        std::vector<::parquet::FixedLenByteArray> flba_values(
+                static_cast<size_t>(dictionary_length));
+        if (decoder->Decode(flba_values.data(), dictionary_length) != dictionary_length) {
+            return false;
+        }
+        for (int32_t dict_idx = 0; dict_idx < dictionary_length; ++dict_idx) {
+            dict_words->values.emplace_back(
+                    reinterpret_cast<const char*>(flba_values[dict_idx].ptr), type_length);
+        }
+        dict_words->build_refs();
+        return true;
+    }
+    return false;
+}
+
+segment_v2::ZoneMap to_column_predicate_statistics(const ParquetColumnStatistics& statistics) {
+    segment_v2::ZoneMap predicate_statistics;
+    predicate_statistics.min_value = statistics.min_value;
+    predicate_statistics.max_value = statistics.max_value;
+    predicate_statistics.has_null = statistics.has_null;
+    predicate_statistics.has_not_null = statistics.has_not_null;
+    return predicate_statistics;
+}
+
+const ParquetColumnSchema* find_child_schema_by_local_id(const ParquetColumnSchema& column_schema,
+                                                         int32_t local_id) {
+    const auto child_it = std::ranges::find_if(
+            column_schema.children, [&](const std::unique_ptr<ParquetColumnSchema>& child) {
+                return child != nullptr && child->local_id == local_id;
+            });
+    return child_it == column_schema.children.end() ? nullptr : child_it->get();
+}
+
+const ParquetColumnSchema* resolve_predicate_leaf_schema(
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& schema,
+        const format::FileColumnPredicateFilter& column_filter) {
+    const auto file_column_id = column_filter.effective_file_column_id();
+    if (!file_column_id.is_valid() || file_column_id.value() >= static_cast<int>(schema.size())) {
+        return nullptr;
+    }
+    const ParquetColumnSchema* column_schema = schema[file_column_id.value()].get();
+    if (column_schema == nullptr) {
+        return nullptr;
+    }
+    for (const auto child_local_id : column_filter.effective_file_child_id_path()) {
+        column_schema = find_child_schema_by_local_id(*column_schema, child_local_id);
+        if (column_schema == nullptr) {
+            return nullptr;
+        }
+    }
+    if (column_schema->kind != ParquetColumnSchemaKind::PRIMITIVE ||
+        column_schema->leaf_column_id < 0 || column_schema->max_repetition_level > 0) {
+        return nullptr;
+    }
+    return column_schema;
+}
+
+bool check_statistics(const format::FileColumnPredicateFilter& column_filter,
+                      const ParquetColumnStatistics& statistics) {
+    if (!statistics.has_any_statistics()) {
+        return false;
+    }
+
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (is_null_only_predicate(*column_predicate)) {
+            if (!statistics.has_null_count) {
+                continue;
+            }
+        } else if (!statistics.has_any_statistics()) {
+            continue;
+        }
+        if (!column_predicate->evaluate_and(to_column_predicate_statistics(statistics))) {
+            return true;
+        }
+    }
+    return false;
+}
+
+} // namespace
+
+ParquetColumnStatistics ParquetStatisticsUtils::TransformColumnStatistics(
+        const ParquetColumnSchema& column_schema,
+        const std::shared_ptr<::parquet::Statistics>& statistics, const cctz::time_zone* timezone) {
+    ParquetColumnStatistics result;
+    if (statistics == nullptr) {
+        return result;
+    }
+
+    result.has_null = statistics->HasNullCount() && statistics->null_count() > 0;
+    result.has_not_null = statistics->num_values() > 0 || statistics->HasMinMax();
+    result.has_null_count = statistics->HasNullCount();
+    if (!result.has_not_null || !statistics->HasMinMax()) {
+        return result;
+    }
+
+    DORIS_CHECK(column_schema.type != nullptr);
+    switch (statistics->physical_type()) {
+    case ::parquet::Type::BOOLEAN:
+        result.has_min_max = set_decoded_min_max<::parquet::BooleanType>(
+                statistics, column_schema, DecodedValueKind::BOOL, &result, timezone);
+        return result;
+    case ::parquet::Type::INT32:
+        result.has_min_max = set_decoded_min_max<::parquet::Int32Type>(
+                statistics, column_schema, decoded_value_kind(column_schema.type_descriptor),
+                &result, timezone);
+        return result;
+    case ::parquet::Type::INT64:
+        result.has_min_max = set_decoded_min_max<::parquet::Int64Type>(
+                statistics, column_schema, decoded_value_kind(column_schema.type_descriptor),
+                &result, timezone);
+        return result;
+    case ::parquet::Type::FLOAT:
+        result.has_min_max = set_decoded_min_max<::parquet::FloatType>(
+                statistics, column_schema, DecodedValueKind::FLOAT, &result, timezone);
+        return result;
+    case ::parquet::Type::DOUBLE:
+        result.has_min_max = set_decoded_min_max<::parquet::DoubleType>(
+                statistics, column_schema, DecodedValueKind::DOUBLE, &result, timezone);
+        return result;
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        result.has_min_max = set_string_min_max(statistics, column_schema, &result, timezone);
+        return result;
+    default:
+        return result;
+    }
+}
+
+namespace {
+
+// 统一的 RG 级裁剪入口 — 依次检查 statistics → dictionary → bloom filter。
+//
+// 裁剪流水线：
+//   1. resolve_predicate_leaf_schema() — 从 file schema 树中定位谓词目标叶子
+//   2. TransformColumnStatistics() + check_statistics() — min/max 范围是否冲突
+//   3. supports_dictionary_pruning() + read_dictionary_words() — EQ/IN_LIST 谓词字典裁剪
+//   4. bloom_filter_prune_reason() — 查询 bloom filter
+// 每个步骤命中即返回对应的 prune reason，否则继续下一步。
+ParquetRowGroupPruneReason row_group_prune_reason(
+        const ::parquet::RowGroupMetaData& row_group, ::parquet::ParquetFileReader* file_reader,
+        int row_group_idx, const std::vector<std::unique_ptr<ParquetColumnSchema>>& schema,
+        const format::FileColumnPredicateFilter& column_filter,
+        RowGroupBloomFilterCache* bloom_filter_cache, ParquetPruningStats* pruning_stats,
+        const cctz::time_zone* timezone) {
+    if (column_filter.predicates.empty()) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    const auto* column_schema = resolve_predicate_leaf_schema(schema, column_filter);
+    if (column_schema == nullptr) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    DCHECK_LT(column_schema->leaf_column_id, row_group.num_columns());
+    auto column_chunk = row_group.ColumnChunk(column_schema->leaf_column_id);
+    if (column_chunk == nullptr) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    if (check_statistics(column_filter,
+                         ParquetStatisticsUtils::TransformColumnStatistics(
+                                 *column_schema, column_chunk->statistics(), timezone))) {
+        return ParquetRowGroupPruneReason::STATISTICS;
+    }
+    if (!supports_dictionary_pruning(*column_schema, *column_chunk, column_filter) ||
+        !is_dictionary_encoded_chunk(*column_chunk)) {
+        return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache,
+                                         pruning_stats);
+    }
+    OwnedDictionaryWords dict_words;
+    if (!read_dictionary_words(file_reader, row_group_idx, column_schema->leaf_column_id,
+                               *column_schema, &dict_words)) {
+        return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache,
+                                         pruning_stats);
+    }
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (!column_predicate->evaluate_and(dict_words.refs.data(), dict_words.refs.size())) {
+            return ParquetRowGroupPruneReason::DICTIONARY;
+        }
+    }
+    return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache,
+                                     pruning_stats);
+}
+
+void init_bloom_filter_cache(::parquet::ParquetFileReader* file_reader, bool enable_bloom_filter,
+                             RowGroupBloomFilterCache* bloom_filter_cache) {
+    DORIS_CHECK(bloom_filter_cache != nullptr);
+    if (!enable_bloom_filter || file_reader == nullptr) {
+        return;
+    }
+    try {
+        bloom_filter_cache->bloom_filter_reader = &file_reader->GetBloomFilterReader();
+    } catch (const ::parquet::ParquetException&) {
+        bloom_filter_cache->bloom_filter_reader = nullptr;
+    } catch (const std::exception&) {
+        bloom_filter_cache->bloom_filter_reader = nullptr;
+    }
+}
+
+// 统计信息裁剪的主循环。遍历 candidate_row_groups（或所有 RG），
+// 对每个 RG 调用 row_group_prune_reason() 判断是否可跳过。
+//
+// candidate_row_groups 参数：
+//   nullptr → 遍历 [0, num_row_groups)
+//   非 null → 只遍历指定的候选 RG（调用方已通过 scan_range 等其他条件预过滤）
+Status select_row_groups(const ::parquet::FileMetaData& metadata,
+                         ::parquet::ParquetFileReader* file_reader,
+                         const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                         const format::FileScanRequest& request,
+                         const std::vector<int>* candidate_row_groups,
+                         std::vector<int>* selected_row_groups, bool enable_bloom_filter,
+                         ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) {
+    int64_t row_group_filter_time_sink = 0;
+    SCOPED_RAW_TIMER(pruning_stats == nullptr ? &row_group_filter_time_sink
+                                              : &pruning_stats->row_group_filter_time);
+    if (selected_row_groups == nullptr) {
+        return Status::InvalidArgument("selected_row_groups is null");
+    }
+    selected_row_groups->clear();
+
+    const int num_row_groups = metadata.num_row_groups();
+    if (pruning_stats != nullptr) {
+        pruning_stats->total_row_groups = num_row_groups;
+    }
+    const auto candidate_size = candidate_row_groups == nullptr
+                                        ? static_cast<size_t>(num_row_groups)
+                                        : candidate_row_groups->size();
+    selected_row_groups->reserve(candidate_size);
+    for (size_t candidate_idx = 0; candidate_idx < candidate_size; ++candidate_idx) {
+        const int row_group_idx = candidate_row_groups == nullptr
+                                          ? static_cast<int>(candidate_idx)
+                                          : (*candidate_row_groups)[candidate_idx];
+        DORIS_CHECK(row_group_idx >= 0);
+        DORIS_CHECK(row_group_idx < num_row_groups);
+        auto row_group = metadata.RowGroup(row_group_idx);
+        if (row_group == nullptr) {
+            selected_row_groups->push_back(row_group_idx);
+            continue;
+        }
+        bool drop = false;
+        RowGroupBloomFilterCache bloom_filter_cache;
+        init_bloom_filter_cache(file_reader, enable_bloom_filter, &bloom_filter_cache);
+        for (const auto& column_filter : request.column_predicate_filters) {
+            const auto prune_reason = row_group_prune_reason(
+                    *row_group, file_reader, row_group_idx, file_schema, column_filter,
+                    &bloom_filter_cache, pruning_stats, timezone);
+            if (prune_reason == ParquetRowGroupPruneReason::NONE) {
+                continue;
+            }
+            drop = true;
+            if (pruning_stats != nullptr) {
+                pruning_stats->filtered_group_rows += row_group->num_rows();
+                if (prune_reason == ParquetRowGroupPruneReason::STATISTICS) {
+                    ++pruning_stats->filtered_row_groups_by_statistics;
+                } else if (prune_reason == ParquetRowGroupPruneReason::DICTIONARY) {
+                    ++pruning_stats->filtered_row_groups_by_dictionary;
+                } else if (prune_reason == ParquetRowGroupPruneReason::BLOOM_FILTER) {
+                    ++pruning_stats->filtered_row_groups_by_bloom_filter;
+                }
+                break;
+            }
+            break;
+        }
+        if (drop) {
+            continue;
+        }
+        selected_row_groups->push_back(row_group_idx);
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+bool ParquetStatisticsUtils::BloomFilterExcludes(
+        const ParquetColumnSchema& column_schema,
+        const format::FileColumnPredicateFilter& column_filter,
+        const ::parquet::BloomFilter& bloom_filter) {
+    return bloom_filter_excludes(column_schema, column_filter, bloom_filter);
+}
+
+Status select_row_groups_by_statistics(
+        const ::parquet::FileMetaData& metadata, ::parquet::ParquetFileReader* file_reader,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, const std::vector<int>* candidate_row_groups,
+        std::vector<int>* selected_row_groups, bool enable_bloom_filter,
+        ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) {
+    return select_row_groups(metadata, file_reader, file_schema, request, candidate_row_groups,
+                             selected_row_groups, enable_bloom_filter, pruning_stats, timezone);
+}
+
+namespace {
+
+template <typename ParquetDType>
+bool set_page_decoded_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index,
+                              const ParquetColumnSchema& column_schema, size_t page_idx,
+                              DecodedValueKind value_kind, ParquetColumnStatistics* page_statistics,
+                              const cctz::time_zone* timezone) {
+    const auto typed_index =
+            std::static_pointer_cast<::parquet::TypedColumnIndex<ParquetDType>>(column_index);
+    if (page_idx >= typed_index->min_values().size() ||
+        page_idx >= typed_index->max_values().size()) {
+        return false;
+    }
+    if (!set_decoded_field(column_schema, value_kind, typed_index->min_values()[page_idx],
+                           &page_statistics->min_value, timezone) ||
+        !set_decoded_field(column_schema, value_kind, typed_index->max_values()[page_idx],
+                           &page_statistics->max_value, timezone)) {
+        return false;
+    }
+    page_statistics->has_min_max = true;
+    return true;
+}
+
+bool set_page_string_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index,
+                             const ParquetColumnSchema& column_schema, size_t page_idx,
+                             ParquetColumnStatistics* page_statistics,
+                             const cctz::time_zone* timezone) {
+    switch (column_schema.descriptor->physical_type()) {
+    case ::parquet::Type::BYTE_ARRAY: {
+        const auto typed_index =
+                std::static_pointer_cast<::parquet::ByteArrayColumnIndex>(column_index);
+        if (page_idx >= typed_index->min_values().size() ||
+            page_idx >= typed_index->max_values().size()) {
+            return false;
+        }
+        const auto min = ::parquet::ByteArrayToString(typed_index->min_values()[page_idx]);
+        const auto max = ::parquet::ByteArrayToString(typed_index->max_values()[page_idx]);
+        if (!set_decoded_binary_field(column_schema, DecodedValueKind::BINARY,
+                                      StringRef(min.data(), min.size()),
+                                      &page_statistics->min_value, timezone) ||
+            !set_decoded_binary_field(column_schema, DecodedValueKind::BINARY,
+                                      StringRef(max.data(), max.size()),
+                                      &page_statistics->max_value, timezone)) {
+            return false;
+        }
+        page_statistics->has_min_max = true;
+        return true;
+    }
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: {
+        const int type_length = column_schema.descriptor->type_length();
+        if (type_length <= 0) {
+            return false;
+        }
+        const auto typed_index = std::static_pointer_cast<::parquet::FLBAColumnIndex>(column_index);
+        if (page_idx >= typed_index->min_values().size() ||
+            page_idx >= typed_index->max_values().size()) {
+            return false;
+        }
+        const std::string min(
+                reinterpret_cast<const char*>(typed_index->min_values()[page_idx].ptr),
+                type_length);
+        const std::string max(
+                reinterpret_cast<const char*>(typed_index->max_values()[page_idx].ptr),
+                type_length);
+        if (!set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY,
+                                      StringRef(min.data(), min.size()),
+                                      &page_statistics->min_value, timezone) ||
+            !set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY,
+                                      StringRef(max.data(), max.size()),
+                                      &page_statistics->max_value, timezone)) {
+            return false;
+        }
+        page_statistics->has_min_max = true;
+        return true;
+    }
+    default:
+        return false;
+    }
+}
+
+bool set_page_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index,
+                      const ParquetColumnSchema& column_schema, size_t page_idx,
+                      ParquetColumnStatistics* page_statistics, const cctz::time_zone* timezone) {
+    DORIS_CHECK(column_schema.type != nullptr);
+    switch (column_schema.descriptor->physical_type()) {
+    case ::parquet::Type::BOOLEAN:
+        return set_page_decoded_min_max<::parquet::BooleanType>(column_index, column_schema,
+                                                                page_idx, DecodedValueKind::BOOL,
+                                                                page_statistics, timezone);
+    case ::parquet::Type::INT32:
+        return set_page_decoded_min_max<::parquet::Int32Type>(
+                column_index, column_schema, page_idx,
+                decoded_value_kind(column_schema.type_descriptor), page_statistics, timezone);
+    case ::parquet::Type::INT64:
+        return set_page_decoded_min_max<::parquet::Int64Type>(
+                column_index, column_schema, page_idx,
+                decoded_value_kind(column_schema.type_descriptor), page_statistics, timezone);
+    case ::parquet::Type::FLOAT:
+        return set_page_decoded_min_max<::parquet::FloatType>(column_index, column_schema, page_idx,
+                                                              DecodedValueKind::FLOAT,
+                                                              page_statistics, timezone);
+    case ::parquet::Type::DOUBLE:
+        return set_page_decoded_min_max<::parquet::DoubleType>(column_index, column_schema,
+                                                               page_idx, DecodedValueKind::DOUBLE,
+                                                               page_statistics, timezone);
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        return set_page_string_min_max(column_index, column_schema, page_idx, page_statistics,
+                                       timezone);
+    default:
+        return false;
+    }
+}
+
+bool build_page_statistics(const std::shared_ptr<::parquet::ColumnIndex>& column_index,
+                           const ParquetColumnSchema& column_schema, size_t page_idx,
+                           ParquetColumnStatistics* page_statistics,
+                           const cctz::time_zone* timezone) {
+    DORIS_CHECK(page_statistics != nullptr);
+    *page_statistics = ParquetColumnStatistics {};
+
+    const auto& null_pages = column_index->null_pages();
+    if (!column_index->has_null_counts() || page_idx >= null_pages.size() ||
+        page_idx >= column_index->null_counts().size()) {
+        return false;
+    }
+
+    page_statistics->has_null_count = true;
+    page_statistics->has_null = column_index->null_counts()[page_idx] > 0;
+    page_statistics->has_not_null = !null_pages[page_idx];
+    if (!page_statistics->has_not_null) {
+        return true;
+    }
+    return set_page_min_max(column_index, column_schema, page_idx, page_statistics, timezone);
+}
+
+std::vector<RowRange> intersect_ranges(const std::vector<RowRange>& left,
+                                       const std::vector<RowRange>& right) {
+    std::vector<RowRange> result;
+    size_t left_idx = 0;
+    size_t right_idx = 0;
+    while (left_idx < left.size() && right_idx < right.size()) {
+        const int64_t left_start = left[left_idx].start;
+        const int64_t left_end = left_start + left[left_idx].length;
+        const int64_t right_start = right[right_idx].start;
+        const int64_t right_end = right_start + right[right_idx].length;
+        const int64_t start = std::max(left_start, right_start);
+        const int64_t end = std::min(left_end, right_end);
+        if (start < end) {
+            result.push_back(RowRange {start, end - start});
+        }
+        if (left_end < right_end) {
+            ++left_idx;
+        } else {
+            ++right_idx;
+        }
+    }
+    return result;
+}
+
+int64_t count_range_rows(const std::vector<RowRange>& ranges) {
+    int64_t rows = 0;
+    for (const auto& range : ranges) {
+        rows += range.length;
+    }
+    return rows;
+}
+
+RowRange page_row_range(const ::parquet::OffsetIndex& offset_index, size_t page_idx,
+                        int64_t row_group_rows) {
+    const auto& page_locations = offset_index.page_locations();
+    const int64_t start = page_locations[page_idx].first_row_index;
+    const int64_t end = page_idx + 1 == page_locations.size()
+                                ? row_group_rows
+                                : page_locations[page_idx + 1].first_row_index;
+    DORIS_CHECK(start >= 0);
+    DORIS_CHECK(end >= start);
+    DORIS_CHECK(end <= row_group_rows);
+    return RowRange {start, end - start};
+}
+
+void append_row_range(const RowRange& range, std::vector<RowRange>* ranges) {
+    if (range.length == 0) {
+        return;
+    }
+    if (!ranges->empty()) {
+        auto& previous = ranges->back();
+        if (previous.start + previous.length == range.start) {
+            previous.length += range.length;
+            return;
+        }
+    }
+    ranges->push_back(range);
+}
+
+bool select_ranges_for_filter(const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group,
+                              const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                              const format::FileColumnPredicateFilter& column_filter,
+                              int64_t row_group_rows, std::vector<RowRange>* ranges,
+                              const cctz::time_zone* timezone) {
+    if (column_filter.predicates.empty()) {
+        return false;
+    }
+    const auto* column_schema = resolve_predicate_leaf_schema(file_schema, column_filter);
+    if (column_schema == nullptr || column_schema->descriptor == nullptr) {
+        return false;
+    }
+
+    std::shared_ptr<::parquet::ColumnIndex> column_index;
+    std::shared_ptr<::parquet::OffsetIndex> offset_index;
+    try {
+        column_index = row_group->GetColumnIndex(column_schema->leaf_column_id);
+        offset_index = row_group->GetOffsetIndex(column_schema->leaf_column_id);
+    } catch (const ::parquet::ParquetException&) {
+        return false;
+    } catch (const std::exception&) {
+        return false;
+    }
+    if (column_index == nullptr || offset_index == nullptr ||
+        column_index->null_pages().size() != offset_index->page_locations().size()) {
+        return false;
+    }
+
+    ranges->clear();
+    const auto page_count = offset_index->page_locations().size();
+    for (size_t page_idx = 0; page_idx < page_count; ++page_idx) {
+        ParquetColumnStatistics page_statistics;
+        if (!build_page_statistics(column_index, *column_schema, page_idx, &page_statistics,
+                                   timezone)) {
+            ranges->clear();
+            return false;
+        }
+        const RowRange row_range = page_row_range(*offset_index, page_idx, row_group_rows);
+        if (check_statistics(column_filter, page_statistics)) {
+            continue;
+        }
+        append_row_range(row_range, ranges);
+    }
+    return true;
+}
+
+bool ranges_intersect(const std::vector<RowRange>& ranges, const RowRange& range) {
+    const int64_t range_end = range.start + range.length;
+    for (const auto& selected_range : ranges) {
+        const int64_t selected_end = selected_range.start + selected_range.length;
+        if (selected_end <= range.start) {
+            continue;
+        }
+        if (selected_range.start >= range_end) {
+            return false;
+        }
+        return true;
+    }
+    return false;
+}
+
+void collect_leaf_schemas(const ParquetColumnSchema& column_schema,
+                          const format::LocalColumnIndex* projection,
+                          std::vector<const ParquetColumnSchema*>* leaf_schemas) {
+    if (column_schema.kind == ParquetColumnSchemaKind::PRIMITIVE) {
+        leaf_schemas->push_back(&column_schema);
+        return;
+    }
+    for (const auto& child_schema : column_schema.children) {
+        if (!format::is_child_projected(projection, child_schema->local_id)) {
+            continue;
+        }
+        const auto* child_projection =
+                format::find_child_projection(projection, child_schema->local_id);
+        collect_leaf_schemas(*child_schema, child_projection, leaf_schemas);
+    }
+}
+
+void collect_request_leaf_schemas(
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request,
+        std::vector<const ParquetColumnSchema*>* leaf_schemas) {
+    std::set<int> seen_leaf_ids;
+    auto collect_projection = [&](const format::LocalColumnIndex& projection) {
+        const int32_t local_id = projection.local_id();
+        if (local_id < 0 || local_id >= static_cast<int32_t>(file_schema.size())) {
+            return;
+        }
+        std::vector<const ParquetColumnSchema*> projection_leaf_schemas;
+        collect_leaf_schemas(*file_schema[local_id], &projection, &projection_leaf_schemas);
+        for (const auto* leaf_schema : projection_leaf_schemas) {
+            DORIS_CHECK(leaf_schema != nullptr);
+            if (seen_leaf_ids.insert(leaf_schema->leaf_column_id).second) {
+                leaf_schemas->push_back(leaf_schema);
+            }
+        }
+    };
+    for (const auto& projection : request.predicate_columns) {
+        collect_projection(projection);
+    }
+    for (const auto& projection : request.non_predicate_columns) {
+        collect_projection(projection);
+    }
+    for (const auto& column_filter : request.column_predicate_filters) {
+        const auto* leaf_schema = resolve_predicate_leaf_schema(file_schema, column_filter);
+        if (leaf_schema == nullptr) {
+            continue;
+        }
+        if (seen_leaf_ids.insert(leaf_schema->leaf_column_id).second) {
+            leaf_schemas->push_back(leaf_schema);
+        }
+    }
+}
+
+bool build_page_skip_plan_for_leaf(
+        const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group,
+        const ParquetColumnSchema& column_schema, const std::vector<RowRange>& selected_ranges,
+        int64_t row_group_rows, ParquetPageSkipPlan* page_skip_plan) {
+    DORIS_CHECK(page_skip_plan != nullptr);
+    *page_skip_plan = ParquetPageSkipPlan {};
+    // OffsetIndex first_row_index is row-based only for non-repeated leaves. LIST/MAP/repeated
+    // leaves need repetition-level-aware range mapping and are intentionally left out for now.
+    if (column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE ||
+        column_schema.descriptor == nullptr || column_schema.leaf_column_id < 0 ||
+        column_schema.descriptor->max_repetition_level() != 0) {
+        return false;
+    }
+
+    std::shared_ptr<::parquet::OffsetIndex> offset_index;
+    try {
+        offset_index = row_group->GetOffsetIndex(column_schema.leaf_column_id);
+    } catch (const ::parquet::ParquetException&) {
+        return false;
+    } catch (const std::exception&) {
+        return false;
+    }
+    if (offset_index == nullptr) {
+        return false;
+    }
+
+    const auto page_count = offset_index->page_locations().size();
+    page_skip_plan->leaf_column_id = column_schema.leaf_column_id;
+    page_skip_plan->skipped_pages.resize(page_count);
+    page_skip_plan->skipped_page_compressed_sizes.resize(page_count);
+    const auto& page_locations = offset_index->page_locations();
+    for (size_t page_idx = 0; page_idx < page_count; ++page_idx) {
+        const RowRange row_range = page_row_range(*offset_index, page_idx, row_group_rows);
+        if (row_range.length == 0 || ranges_intersect(selected_ranges, row_range)) {
+            continue;
+        }
+        page_skip_plan->skipped_pages[page_idx] = 1;
+        page_skip_plan->skipped_page_compressed_sizes[page_idx] =
+                page_locations[page_idx].compressed_page_size;
+        append_row_range(row_range, &page_skip_plan->skipped_ranges);
+    }
+    if (page_skip_plan->empty()) {
+        *page_skip_plan = ParquetPageSkipPlan {};
+        return false;
+    }
+    return true;
+}
+
+void build_page_skip_plans(const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group,
+                           const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                           const format::FileScanRequest& request,
+                           const std::vector<RowRange>& selected_ranges, int64_t row_group_rows,
+                           std::map<int, ParquetPageSkipPlan>* page_skip_plans) {
+    DORIS_CHECK(page_skip_plans != nullptr);
+    page_skip_plans->clear();
+    std::vector<const ParquetColumnSchema*> leaf_schemas;
+    collect_request_leaf_schemas(file_schema, request, &leaf_schemas);
+    for (const auto* leaf_schema : leaf_schemas) {
+        DORIS_CHECK(leaf_schema != nullptr);
+        ParquetPageSkipPlan page_skip_plan;
+        if (build_page_skip_plan_for_leaf(row_group, *leaf_schema, selected_ranges, row_group_rows,
+                                          &page_skip_plan)) {
+            page_skip_plans->emplace(page_skip_plan.leaf_column_id, std::move(page_skip_plan));
+        }
+    }
+}
+
+} // namespace
+
+Status select_row_group_ranges_by_page_index(
+        ::parquet::ParquetFileReader* file_reader,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, int row_group_idx, int64_t row_group_rows,
+        std::vector<RowRange>* selected_ranges, std::map<int, ParquetPageSkipPlan>* page_skip_plans,
+        ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) {
+    int64_t page_index_filter_time_sink = 0;
+    SCOPED_RAW_TIMER(pruning_stats == nullptr ? &page_index_filter_time_sink
+                                              : &pruning_stats->page_index_filter_time);
+    DORIS_CHECK(selected_ranges != nullptr);
+    selected_ranges->clear();
+    if (page_skip_plans != nullptr) {
+        page_skip_plans->clear();
+    }
+    if (row_group_rows <= 0) {
+        return Status::OK();
+    }
+    selected_ranges->push_back(RowRange {0, row_group_rows});
+    if (!config::enable_parquet_page_index || request.column_predicate_filters.empty() ||
+        file_reader == nullptr) {
+        return Status::OK();
+    }
+
+    std::shared_ptr<::parquet::PageIndexReader> page_index_reader;
+    std::shared_ptr<::parquet::RowGroupPageIndexReader> row_group_index_reader;
+    try {
+        if (pruning_stats != nullptr) {
+            ++pruning_stats->page_index_read_calls;
+        }
+        {
+            int64_t read_page_index_time_sink = 0;
+            SCOPED_RAW_TIMER(pruning_stats == nullptr ? &read_page_index_time_sink
+                                                      : &pruning_stats->read_page_index_time);
+            page_index_reader = file_reader->GetPageIndexReader();
+            if (page_index_reader == nullptr) {
+                return Status::OK();
+            }
+            row_group_index_reader = page_index_reader->RowGroup(row_group_idx);
+        }
+    } catch (const ::parquet::ParquetException&) {
+        return Status::OK();
+    } catch (const std::exception&) {
+        return Status::OK();
+    }
+    if (row_group_index_reader == nullptr) {
+        return Status::OK();
+    }
+
+    for (const auto& column_filter : request.column_predicate_filters) {
+        std::vector<RowRange> filter_ranges;
+        if (!select_ranges_for_filter(row_group_index_reader, file_schema, column_filter,
+                                      row_group_rows, &filter_ranges, timezone)) {
+            continue;
+        }
+        *selected_ranges = intersect_ranges(*selected_ranges, filter_ranges);
+        if (selected_ranges->empty()) {
+            if (page_skip_plans != nullptr) {
+                page_skip_plans->clear();
+            }
+            if (pruning_stats != nullptr) {
+                pruning_stats->filtered_page_rows += row_group_rows;
+                ++pruning_stats->filtered_row_groups_by_page_index;
+            }
+            return Status::OK();
+        }
+    }
+    if (page_skip_plans != nullptr) {
+        build_page_skip_plans(row_group_index_reader, file_schema, request, *selected_ranges,
+                              row_group_rows, page_skip_plans);
+    }
+    if (pruning_stats != nullptr) {
+        const int64_t selected_rows = count_range_rows(*selected_ranges);
+        DORIS_CHECK(selected_rows <= row_group_rows);
+        pruning_stats->filtered_page_rows += row_group_rows - selected_rows;
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_statistics.h b/be/src/format_v2/parquet/parquet_statistics.h
new file mode 100644
index 00000000000000..741ca77779c35a
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_statistics.h
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "common/status.h"
+#include "core/field.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/selection_vector.h"
+
+namespace parquet {
+class BloomFilter;
+class FileMetaData;
+class ParquetFileReader;
+class Statistics;
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris {
+class ColumnPredicate;
+} // namespace doris
+
+namespace doris::format::parquet {
+
+struct ParquetColumnSchema;
+
+// ============================================================================
+// 裁剪统计信息
+// ============================================================================
+
+// RowGroup/Page 裁剪过程中的统计计数。
+// 由 plan_parquet_row_groups() 填充，最终汇总到 ParquetProfile 的 RuntimeProfile。
+struct ParquetPruningStats {
+    int64_t total_row_groups = 0;                    // 文件中 RG 总数
+    int64_t selected_row_groups = 0;                 // 裁剪后选中的 RG 数
+    int64_t filtered_row_groups_by_statistics = 0;   // 被 min/max statistics 裁剪的 RG 数
+    int64_t filtered_row_groups_by_dictionary = 0;   // 被 dictionary 裁剪的 RG 数
+    int64_t filtered_row_groups_by_bloom_filter = 0; // 被 bloom filter 裁剪的 RG 数
+    int64_t filtered_row_groups_by_page_index = 0;   // 被 page index 完全裁剪的 RG 数
+    int64_t filtered_group_rows = 0;                 // 被裁剪的 RG 的总行数
+    int64_t filtered_page_rows = 0;                  // 被 page index 裁剪的行数
+    int64_t selected_row_ranges = 0;                 // 选中的行范围数
+    int64_t page_index_read_calls = 0;               // Page Index 读取次数
+    int64_t bloom_filter_read_time = 0;              // Bloom filter 读取耗时 (ns)
+    int64_t row_group_filter_time = 0;               // RG 级裁剪耗时 (ns)
+    int64_t page_index_filter_time = 0;              // Page index 裁剪耗时 (ns)
+    int64_t read_page_index_time = 0;                // Page index 读取耗时 (ns)
+};
+
+// Parquet ColumnChunk statistics 转换后的 Doris 统计视图。
+// 将 Arrow 的 min/max 物理值按 type_descriptor 转换为 Doris Field，
+// 供 ColumnPredicate::evaluate_and() 判断是否可以裁剪。
+struct ParquetColumnStatistics {
+    Field min_value;             // 列最小值（已转换为 Doris 类型）
+    Field max_value;             // 列最大值
+    bool has_null = false;       // 是否包含 NULL
+    bool has_not_null = false;   // 是否包含非 NULL 值
+    bool has_null_count = false; // null_count 字段是否有效
+    bool has_min_max = false;    // min/max 字段是否有效（转换成功）
+
+    bool has_any_statistics() const { return has_null_count || has_min_max; }
+};
+
+// ============================================================================
+// Parquet 文件级裁剪工具
+// ============================================================================
+//
+// 裁剪逻辑分层：
+//   ① select_row_groups_by_statistics()  — RG 级：min/max + dictionary + bloom filter
+//   ② select_row_group_ranges_by_page_index() — Page 级：ColumnIndex 细粒度 range 裁剪 + skip plan
+//
+// 这些函数只消费已 localize 到 file schema 的 FileScanRequest，
+// 不理解 table/global schema。所有裁剪发生在 plan_parquet_row_groups() 阶段。
+//
+// 内部实现（.cpp 中）：
+//   row_group_prune_reason() 是统一的 RG 级裁剪入口，依次检查：
+//     statistics(TransformColumnStatistics + check_statistics)
+//     → dictionary(read_dictionary_words + predicate::evaluate_and)
+//     → bloom filter(bloom_filter_prune_reason)
+//   返回第一个命中裁剪的原因。
+// ============================================================================
+struct ParquetStatisticsUtils {
+    // 将 Arrow Parquet Statistics 转换为 Doris 的 ParquetColumnStatistics。
+    // 如果 min/max 是 Parquet 物理值（如 INT32 存的 decimal、INT96 的 timestamp），
+    // 会按 column_schema.type_descriptor 转换为 Doris 逻辑值。
+    static ParquetColumnStatistics TransformColumnStatistics(
+            const ParquetColumnSchema& column_schema,
+            const std::shared_ptr<::parquet::Statistics>& statistics,
+            const cctz::time_zone* timezone = nullptr);
+
+    // 检查 bloom filter 是否排除该列的所有 predicate 值。
+    // 通过 ArrowParquetBloomFilterAdapter 将 Arrow BloomFilter 适配为 Doris BloomFilter 接口。
+    static bool BloomFilterExcludes(const ParquetColumnSchema& column_schema,
+                                    const format::FileColumnPredicateFilter& column_filter,
+                                    const ::parquet::BloomFilter& bloom_filter);
+};
+
+// ① RG 级裁剪：对 candidate_row_groups 中的每个 RG，调用 row_group_prune_reason()
+// 依次检查 statistics → dictionary → bloom filter。
+// candidate_row_groups 为 nullptr 时遍历所有 RG。
+Status select_row_groups_by_statistics(
+        const ::parquet::FileMetaData& metadata, ::parquet::ParquetFileReader* file_reader,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, const std::vector<int>* candidate_row_groups,
+        std::vector<int>* selected_row_groups, bool enable_bloom_filter,
+        ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone = nullptr);
+
+// ② Page 级裁剪：对指定 RG，通过 ColumnIndex (page statistics) 和 OffsetIndex
+// 对每个 column_predicate_filter 逐 page 裁剪，生成 selected_ranges（该 RG 内需读取的行范围）
+// 和 page_skip_plans（完全跳过哪些 data page，供 Arrow RecordReader 在物理读取层跳过）。
+//
+// 所有 column_filter 的 range 取交集（intersect_ranges），
+// 交集为空则该 RG 被完全裁剪。
+Status select_row_group_ranges_by_page_index(
+        ::parquet::ParquetFileReader* file_reader,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, int row_group_idx, int64_t row_group_rows,
+        std::vector<RowRange>* selected_ranges, std::map<int, ParquetPageSkipPlan>* page_skip_plans,
+        ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone = nullptr);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_type.cpp b/be/src/format_v2/parquet/parquet_type.cpp
new file mode 100644
index 00000000000000..d35181d0397178
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_type.cpp
@@ -0,0 +1,358 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_type.h"
+
+#include <parquet/api/schema.h>
+
+#include <memory>
+#include <string>
+
+#include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/primitive_type.h"
+
+namespace doris::format::parquet {
+namespace {
+
+DataTypePtr create_type(PrimitiveType type, bool nullable, int precision = 0, int scale = 0) {
+    return DataTypeFactory::instance().create_data_type(type, nullable, precision, scale);
+}
+
+PrimitiveType decimal_primitive_type(int precision) {
+    return precision > 38 ? TYPE_DECIMAL256 : TYPE_DECIMAL128I;
+}
+
+void mark_decimal(const ::parquet::ColumnDescriptor* column, int precision, int scale,
+                  ParquetTypeDescriptor* result) {
+    result->is_decimal = true;
+    result->decimal_precision = precision;
+    result->decimal_scale = scale;
+    switch (column->physical_type()) {
+    case ::parquet::Type::INT32:
+        result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_INT32;
+        break;
+    case ::parquet::Type::INT64:
+        result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_INT64;
+        break;
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY;
+        break;
+    default:
+        result->extra_type_info = ParquetExtraTypeInfo::NONE;
+        break;
+    }
+}
+
+void mark_integer(int bit_width, bool is_signed, ParquetTypeDescriptor* result) {
+    result->integer_bit_width = bit_width;
+    result->is_unsigned_integer = !is_signed;
+}
+
+DataTypePtr converted_type_to_doris_type(const ::parquet::ColumnDescriptor* column,
+                                         ParquetTypeDescriptor* result) {
+    const bool nullable = column->max_definition_level() > 0;
+    switch (column->converted_type()) {
+    case ::parquet::ConvertedType::UTF8:
+    case ::parquet::ConvertedType::ENUM:
+    case ::parquet::ConvertedType::JSON:
+    case ::parquet::ConvertedType::BSON:
+        return create_type(TYPE_STRING, nullable);
+    case ::parquet::ConvertedType::DECIMAL:
+        mark_decimal(column, column->type_precision(), column->type_scale(), result);
+        return create_type(decimal_primitive_type(column->type_precision()), nullable,
+                           column->type_precision(), column->type_scale());
+    case ::parquet::ConvertedType::DATE:
+        return create_type(TYPE_DATEV2, nullable);
+    case ::parquet::ConvertedType::TIME_MILLIS:
+        result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported";
+        return nullptr;
+    case ::parquet::ConvertedType::TIME_MICROS:
+        result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported";
+        return nullptr;
+    case ::parquet::ConvertedType::TIMESTAMP_MILLIS:
+        result->is_timestamp = true;
+        result->timestamp_is_adjusted_to_utc = true;
+        result->time_unit = ParquetTimeUnit::MILLIS;
+        result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS;
+        return create_type(TYPE_DATETIMEV2, nullable, 0, 3);
+    case ::parquet::ConvertedType::TIMESTAMP_MICROS:
+        result->is_timestamp = true;
+        result->timestamp_is_adjusted_to_utc = true;
+        result->time_unit = ParquetTimeUnit::MICROS;
+        result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS;
+        return create_type(TYPE_DATETIMEV2, nullable, 0, 6);
+    // Parquet stores signed and unsigned integer logical annotations on signed physical carriers:
+    // INT_8/UINT_8/INT_16/UINT_16/INT_32/UINT_32 use physical INT32, and
+    // INT_64/UINT_64 use physical INT64. Doris maps unsigned integers to the next wider
+    // signed type so all values in the unsigned range can be represented.
+    case ::parquet::ConvertedType::INT_8:
+        mark_integer(8, true, result);
+        return create_type(TYPE_TINYINT, nullable);
+    case ::parquet::ConvertedType::UINT_8:
+        mark_integer(8, false, result);
+        return create_type(TYPE_SMALLINT, nullable);
+    case ::parquet::ConvertedType::INT_16:
+        mark_integer(16, true, result);
+        return create_type(TYPE_SMALLINT, nullable);
+    case ::parquet::ConvertedType::UINT_16:
+        mark_integer(16, false, result);
+        return create_type(TYPE_INT, nullable);
+    case ::parquet::ConvertedType::INT_32:
+        mark_integer(32, true, result);
+        return create_type(TYPE_INT, nullable);
+    case ::parquet::ConvertedType::UINT_32:
+        mark_integer(32, false, result);
+        return create_type(TYPE_BIGINT, nullable);
+    case ::parquet::ConvertedType::INT_64:
+        mark_integer(64, true, result);
+        return create_type(TYPE_BIGINT, nullable);
+    case ::parquet::ConvertedType::UINT_64:
+        mark_integer(64, false, result);
+        return create_type(TYPE_LARGEINT, nullable);
+    case ::parquet::ConvertedType::NONE:
+    default:
+        return nullptr;
+    }
+}
+
+DataTypePtr logical_type_to_doris_type(const ::parquet::ColumnDescriptor* column,
+                                       ParquetTypeDescriptor* result) {
+    const auto& logical_type = column->logical_type();
+    if (logical_type == nullptr || !logical_type->is_valid() || logical_type->is_none()) {
+        return nullptr;
+    }
+    const bool nullable = column->max_definition_level() > 0;
+    if (logical_type->is_string() || logical_type->is_enum() || logical_type->is_JSON() ||
+        logical_type->is_BSON() || logical_type->is_UUID()) {
+        return create_type(TYPE_STRING, nullable);
+    }
+    if (logical_type->is_decimal()) {
+        const auto& decimal_type = static_cast<const ::parquet::DecimalLogicalType&>(*logical_type);
+        mark_decimal(column, decimal_type.precision(), decimal_type.scale(), result);
+        return create_type(decimal_primitive_type(decimal_type.precision()), nullable,
+                           decimal_type.precision(), decimal_type.scale());
+    }
+    if (logical_type->is_date()) {
+        return create_type(TYPE_DATEV2, nullable);
+    }
+    if (logical_type->is_time()) {
+        const auto& time_type = static_cast<const ::parquet::TimeLogicalType&>(*logical_type);
+        if (time_type.is_adjusted_to_utc()) {
+            result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported";
+            return nullptr;
+        }
+        int scale = 0;
+        if (time_type.time_unit() == ::parquet::LogicalType::TimeUnit::MILLIS) {
+            scale = 3;
+            result->time_unit = ParquetTimeUnit::MILLIS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS;
+        } else if (time_type.time_unit() == ::parquet::LogicalType::TimeUnit::MICROS) {
+            scale = 6;
+            result->time_unit = ParquetTimeUnit::MICROS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS;
+        } else {
+            return nullptr;
+        }
+        return create_type(TYPE_TIMEV2, nullable, 0, scale);
+    }
+    if (logical_type->is_timestamp()) {
+        const auto& timestamp_type =
+                static_cast<const ::parquet::TimestampLogicalType&>(*logical_type);
+        int scale = 0;
+        if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::MILLIS) {
+            scale = 3;
+            result->time_unit = ParquetTimeUnit::MILLIS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS;
+        } else if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::MICROS) {
+            scale = 6;
+            result->time_unit = ParquetTimeUnit::MICROS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS;
+        } else if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::NANOS) {
+            scale = 6;
+            result->time_unit = ParquetTimeUnit::NANOS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_NS;
+        } else {
+            return nullptr;
+        }
+        result->is_timestamp = true;
+        result->timestamp_is_adjusted_to_utc = timestamp_type.is_adjusted_to_utc();
+        return create_type(TYPE_DATETIMEV2, nullable, 0, scale);
+    }
+    if (logical_type->is_int()) {
+        const auto& int_type = static_cast<const ::parquet::IntLogicalType&>(*logical_type);
+        mark_integer(int_type.bit_width(), int_type.is_signed(), result);
+        switch (int_type.bit_width()) {
+        case 8:
+            return create_type(int_type.is_signed() ? TYPE_TINYINT : TYPE_SMALLINT, nullable);
+        case 16:
+            return create_type(int_type.is_signed() ? TYPE_SMALLINT : TYPE_INT, nullable);
+        case 32:
+            return create_type(int_type.is_signed() ? TYPE_INT : TYPE_BIGINT, nullable);
+        case 64:
+            return create_type(int_type.is_signed() ? TYPE_BIGINT : TYPE_LARGEINT, nullable);
+        default:
+            return nullptr;
+        }
+    }
+    if (logical_type->is_float16()) {
+        if (column->physical_type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY ||
+            column->type_length() != 2) {
+            return nullptr;
+        }
+        result->extra_type_info = ParquetExtraTypeInfo::FLOAT16;
+        return create_type(TYPE_FLOAT, nullable);
+    }
+    return nullptr;
+}
+
+DataTypePtr physical_type_to_doris_type(const ::parquet::ColumnDescriptor* column) {
+    const bool nullable = column->max_definition_level() > 0;
+    DataTypePtr type;
+    switch (column->physical_type()) {
+    case ::parquet::Type::BOOLEAN:
+        type = std::make_shared<DataTypeBool>();
+        break;
+    case ::parquet::Type::INT32:
+        type = std::make_shared<DataTypeInt32>();
+        break;
+    case ::parquet::Type::INT64:
+        type = std::make_shared<DataTypeInt64>();
+        break;
+    case ::parquet::Type::FLOAT:
+        type = std::make_shared<DataTypeFloat32>();
+        break;
+    case ::parquet::Type::DOUBLE:
+        type = std::make_shared<DataTypeFloat64>();
+        break;
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        type = std::make_shared<DataTypeString>();
+        break;
+    case ::parquet::Type::INT96:
+        type = create_type(TYPE_DATETIMEV2, nullable, 0, 6);
+        break;
+    default:
+        return nullptr;
+    }
+    return nullable ? make_nullable(type) : type;
+}
+
+bool record_reader_physical_type_supported(::parquet::Type::type physical_type) {
+    switch (physical_type) {
+    case ::parquet::Type::BOOLEAN:
+    case ::parquet::Type::INT32:
+    case ::parquet::Type::INT64:
+    case ::parquet::Type::INT96:
+    case ::parquet::Type::FLOAT:
+    case ::parquet::Type::DOUBLE:
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        return true;
+    default:
+        return false;
+    }
+}
+
+} // namespace
+
+std::string parquet_column_name(const ::parquet::ColumnDescriptor* column) {
+    if (column == nullptr) {
+        return {};
+    }
+    auto path = column->path();
+    if (path) {
+        return path->ToDotString();
+    }
+    return column->name();
+}
+
+ParquetTypeDescriptor resolve_parquet_type(const ::parquet::ColumnDescriptor* column) {
+    ParquetTypeDescriptor result;
+    if (column == nullptr) {
+        return result;
+    }
+
+    result.physical_type = column->physical_type();
+    result.converted_type = column->converted_type();
+    result.fixed_length = column->type_length();
+
+    if (auto logical_type = logical_type_to_doris_type(column, &result); logical_type != nullptr) {
+        result.doris_type = logical_type;
+    } else if (!result.unsupported_reason.empty()) {
+        result.doris_type = nullptr;
+        result.supports_record_reader = false;
+    } else if (auto converted_type = converted_type_to_doris_type(column, &result);
+               converted_type != nullptr) {
+        result.doris_type = converted_type;
+    } else if (!result.unsupported_reason.empty()) {
+        result.doris_type = nullptr;
+        result.supports_record_reader = false;
+    } else {
+        result.doris_type = physical_type_to_doris_type(column);
+        if (result.physical_type == ::parquet::Type::INT96) {
+            result.extra_type_info = ParquetExtraTypeInfo::IMPALA_TIMESTAMP;
+        }
+    }
+
+    result.is_string_like = !result.is_decimal &&
+                            result.extra_type_info != ParquetExtraTypeInfo::FLOAT16 &&
+                            (result.physical_type == ::parquet::Type::BYTE_ARRAY ||
+                             result.physical_type == ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+
+    if (!record_reader_physical_type_supported(result.physical_type)) {
+        result.supports_record_reader = false;
+    }
+    return result;
+}
+
+bool supports_record_reader(const ParquetTypeDescriptor& type_descriptor) {
+    return type_descriptor.supports_record_reader;
+}
+
+DecodedValueKind decoded_value_kind(const ParquetTypeDescriptor& type_descriptor) {
+    switch (type_descriptor.physical_type) {
+    case ::parquet::Type::BOOLEAN:
+        return DecodedValueKind::BOOL;
+    case ::parquet::Type::INT32:
+        if (type_descriptor.is_unsigned_integer && type_descriptor.integer_bit_width == 32) {
+            return DecodedValueKind::UINT32;
+        }
+        return DecodedValueKind::INT32;
+    case ::parquet::Type::INT64:
+        if (type_descriptor.is_unsigned_integer && type_descriptor.integer_bit_width == 64) {
+            return DecodedValueKind::UINT64;
+        }
+        return DecodedValueKind::INT64;
+    case ::parquet::Type::INT96:
+        return DecodedValueKind::INT96;
+    case ::parquet::Type::FLOAT:
+        return DecodedValueKind::FLOAT;
+    case ::parquet::Type::DOUBLE:
+        return DecodedValueKind::DOUBLE;
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        return DecodedValueKind::FIXED_BINARY;
+    case ::parquet::Type::BYTE_ARRAY:
+    default:
+        return DecodedValueKind::BINARY;
+    }
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_type.h b/be/src/format_v2/parquet/parquet_type.h
new file mode 100644
index 00000000000000..1f99bf332e532f
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_type.h
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <parquet/types.h>
+
+#include <string>
+
+#include "core/data_type/data_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
+
+namespace parquet {
+class ColumnDescriptor;
+} // namespace parquet
+
+namespace doris::format::parquet {
+
+// ============================================================================
+// Parquet 额外类型编码信息
+// ============================================================================
+//
+// Doris 的 DataType 只能表达最终展示类型（如 Decimal(10,2)、DATETIMEV2(6)），
+// 但 reader 读值时还需要知道 Parquet 物理存储方式。
+// ParquetExtraTypeInfo 补全了这个信息。
+
+enum class ParquetExtraTypeInfo {
+    NONE,               // 无特殊编码，按物理类型直接读取
+    DECIMAL_INT32,      // decimal 存为 4-byte big-endian int
+    DECIMAL_INT64,      // decimal 存为 8-byte big-endian int
+    DECIMAL_BYTE_ARRAY, // decimal 存为变长或定长 big-endian byte array
+    UNIT_MS,            // 时间精度为毫秒
+    UNIT_MICROS,        // 时间精度为微秒
+    UNIT_NS,            // 时间精度为纳秒
+    IMPALA_TIMESTAMP,   // INT96 格式的 Impala 兼容 timestamp
+    FLOAT16,            // 半精度浮点（FIXED_LEN_BYTE_ARRAY(2) → Float32）
+};
+
+enum class ParquetTimeUnit {
+    UNKNOWN,
+    MILLIS,
+    MICROS,
+    NANOS,
+};
+
+// ============================================================================
+// Parquet 类型解析结果 — resolve_parquet_type() 的输出
+// ============================================================================
+//
+// 将 Arrow ColumnDescriptor（physical_type + logical_type + converted_type）
+// 解析为 Doris DataType + 读值时需要的全部编码信息。
+//
+// 三级解析优先级：logical_type（优先）→ converted_type（次）→ physical_type（兜底）
+//
+// 关键字段说明：
+//   doris_type        — Doris 侧的最终类型（如 DECIMAL128(10,2)、DATETIMEV2(6)）
+//   extra_type_info   — 物理编码方式（如 DECIMAL_INT32、IMPALA_TIMESTAMP）
+//   physical_type     — Parquet 物理类型（INT32/INT64/BYTE_ARRAY/...）
+//   is_string_like    — 物理类型是 binary 且不是 decimal/FLOAT16 → 归为 string-like
+//   supports_record_reader — 是否可以通过 Arrow RecordReader 读取（当前全部支持）
+struct ParquetTypeDescriptor {
+    DataTypePtr doris_type;
+    ParquetExtraTypeInfo extra_type_info = ParquetExtraTypeInfo::NONE;
+    ParquetTimeUnit time_unit = ParquetTimeUnit::UNKNOWN;
+    ::parquet::Type::type physical_type = ::parquet::Type::UNDEFINED;
+    ::parquet::ConvertedType::type converted_type = ::parquet::ConvertedType::UNDEFINED;
+    int integer_bit_width = -1;                // INT_8/16/32/64 的位宽
+    int decimal_precision = -1;                // DECIMAL(p,s) 的精度
+    int decimal_scale = -1;                    // DECIMAL(p,s) 的小数位
+    int fixed_length = -1;                     // FIXED_LEN_BYTE_ARRAY 的固定长度
+    bool is_unsigned_integer = false;          // 是否 unsigned 整数（UINT_8/16/32/64）
+    bool is_decimal = false;                   // 是否 decimal 类型
+    bool is_timestamp = false;                 // 是否 timestamp 类型
+    bool timestamp_is_adjusted_to_utc = false; // timestamp 是否已 UTC 归一化
+    bool is_string_like = false;               // binary 但不是 decimal/FLOAT16
+    bool supports_record_reader = true;        // 能否通过 Arrow RecordReader 读取
+    std::string unsupported_reason;            // 非空表示该 Parquet 逻辑类型暂不支持
+};
+
+// 返回 Parquet leaf column 的 file-local 展示名（如 "a.b.c"）。
+std::string parquet_column_name(const ::parquet::ColumnDescriptor* column);
+
+// 将 Parquet ColumnDescriptor 解析为 ParquetTypeDescriptor。
+// 不做 table schema evolution：类型提升和 default/generated/partition 列由 TableReader 处理。
+ParquetTypeDescriptor resolve_parquet_type(const ::parquet::ColumnDescriptor* column);
+
+// 判断该类型是否可以通过 Arrow Parquet RecordReader 读取。
+// 当前所有已知物理类型均支持，预留扩展性。
+bool supports_record_reader(const ParquetTypeDescriptor& type_descriptor);
+
+// 返回该类型的值在 Arrow RecordReader 中的解码方式。
+// 用于 ParquetLeafReader 确定按 INT32/INT64/FLOAT/BINARY 等哪种格式解读 values buffer。
+DecodedValueKind decoded_value_kind(const ParquetTypeDescriptor& type_descriptor);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/column_reader.cpp b/be/src/format_v2/parquet/reader/column_reader.cpp
new file mode 100644
index 00000000000000..fd8921a6a79ad6
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/column_reader.cpp
@@ -0,0 +1,576 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/column_reader.h"
+
+#include <arrow/memory_pool.h>
+#include <parquet/api/reader.h>
+#include <parquet/api/schema.h>
+#include <parquet/level_conversion.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <exception>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_struct.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/global_rowid_column_reader.h"
+#include "format_v2/parquet/reader/list_column_reader.h"
+#include "format_v2/parquet/reader/map_column_reader.h"
+#include "format_v2/parquet/reader/row_position_column_reader.h"
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+#include "format_v2/parquet/reader/struct_column_reader.h"
+#include "runtime/runtime_profile.h"
+
+namespace doris::format::parquet {
+namespace {
+
+// Arrow PageReader 的 data page filter 回调。
+// 在 page index 裁剪阶段已生成 ParquetPageSkipPlan，其中记录了每个 data page 是否应跳过。
+// 本回调在 Arrow 遍历 data page 时被触发，按 skip plan 标记跳过对应 page，
+// 使 RecordReader 在物理读取层面就不读这些 page。
+class DataPageSkipFilter {
+public:
+    DataPageSkipFilter(const ParquetPageSkipPlan* page_skip_plan,
+                       ParquetPageSkipProfile page_skip_profile)
+            : _page_skip_plan(page_skip_plan), _page_skip_profile(page_skip_profile) {
+        DORIS_CHECK(_page_skip_plan != nullptr);
+    }
+
+    bool operator()(const ::parquet::DataPageStats&) {
+        // Arrow invokes this callback once for each DATA_PAGE/DATA_PAGE_V2 and never for
+        // dictionary pages, so this ordinal matches Parquet OffsetIndex page locations.
+        const size_t page_idx = _next_data_page_idx++;
+        const bool skip = _page_skip_plan->should_skip_page(page_idx);
+        if (!skip) {
+            return false;
+        }
+        update_skip_profile(page_idx);
+        return true;
+    }
+
+private:
+    void update_skip_profile(size_t page_idx) const {
+        if (_page_skip_profile.skipped_pages != nullptr) {
+            COUNTER_UPDATE(_page_skip_profile.skipped_pages, 1);
+        }
+        if (_page_skip_profile.skipped_bytes != nullptr) {
+            COUNTER_UPDATE(_page_skip_profile.skipped_bytes,
+                           _page_skip_plan->skipped_page_compressed_size(page_idx));
+        }
+    }
+
+    const ParquetPageSkipPlan* _page_skip_plan = nullptr;
+    ParquetPageSkipProfile _page_skip_profile;
+    size_t _next_data_page_idx = 0;
+};
+
+// 从 page_skip_plans map 中查找指定 leaf_column_id 的 skip plan。
+const ParquetPageSkipPlan* find_page_skip_plan(
+        const std::map<int, ParquetPageSkipPlan>* page_skip_plans, int leaf_column_id) {
+    if (page_skip_plans == nullptr) {
+        return nullptr;
+    }
+    const auto plan_it = page_skip_plans->find(leaf_column_id);
+    return plan_it == page_skip_plans->end() ? nullptr : &plan_it->second;
+}
+
+// 为 Arrow PageReader 安装 data page 级别的跳过过滤器。
+// 如果该 leaf column 没有 page skip plan，跳过安装。
+void install_data_page_filter(std::unique_ptr<::parquet::PageReader>& page_reader,
+                              const std::map<int, ParquetPageSkipPlan>* page_skip_plans,
+                              int leaf_column_id, ParquetPageSkipProfile page_skip_profile) {
+    DORIS_CHECK(page_reader != nullptr);
+    const ParquetPageSkipPlan* page_skip_plan =
+            find_page_skip_plan(page_skip_plans, leaf_column_id);
+    if (page_skip_plan == nullptr) {
+        return;
+    }
+    page_reader->set_data_page_filter(DataPageSkipFilter(page_skip_plan, page_skip_profile));
+}
+
+// 判断嵌套场景下该列是否可以通过简化版 ScalarColumnReader 读取。
+// 当前只对纯物理类型（无 logical/converted annotation）返回 true。
+bool supports_nested_scalar_record_reader(const ParquetColumnSchema& column_schema) {
+    if (column_schema.type_descriptor.supports_record_reader) {
+        return true;
+    }
+    const auto& type_descriptor = column_schema.type_descriptor;
+    if ((type_descriptor.extra_type_info != ParquetExtraTypeInfo::NONE &&
+         type_descriptor.extra_type_info != ParquetExtraTypeInfo::FLOAT16) ||
+        type_descriptor.is_decimal || type_descriptor.is_timestamp ||
+        type_descriptor.is_string_like) {
+        return false;
+    }
+    if (type_descriptor.converted_type != ::parquet::ConvertedType::NONE &&
+        type_descriptor.converted_type != ::parquet::ConvertedType::UNDEFINED) {
+        return false;
+    }
+    switch (type_descriptor.physical_type) {
+    case ::parquet::Type::BOOLEAN:
+    case ::parquet::Type::INT32:
+    case ::parquet::Type::INT64:
+    case ::parquet::Type::FLOAT:
+    case ::parquet::Type::DOUBLE:
+        return true;
+    default:
+        return false;
+    }
+    return true;
+}
+
+} // namespace
+
+Status ParquetColumnReader::skip(int64_t rows) {
+    return Status::NotSupported("Parquet column skip is not implemented, rows={}", rows);
+}
+
+void ParquetColumnReader::advance_nested_build_level_cursor_past_parent(
+        int16_t parent_repetition_level) {
+    int64_t child_cursor = nested_build_level_cursor();
+    const auto& child_rep_levels = nested_repetition_levels();
+    const int64_t child_levels_written = nested_levels_written();
+    while (child_cursor < child_levels_written) {
+        const int16_t child_rep_level = child_rep_levels[child_cursor];
+        ++child_cursor;
+        if (!is_or_has_repeated_child() || child_rep_level <= parent_repetition_level) {
+            break;
+        }
+    }
+    set_nested_build_level_cursor(child_cursor);
+}
+
+void ParquetColumnReader::update_reader_read_rows(int64_t rows) const {
+    if (_profile.reader_read_rows != nullptr) {
+        COUNTER_UPDATE(_profile.reader_read_rows, rows);
+    }
+}
+
+void ParquetColumnReader::update_reader_skip_rows(int64_t rows) const {
+    if (_profile.reader_skip_rows != nullptr) {
+        COUNTER_UPDATE(_profile.reader_skip_rows, rows);
+    }
+}
+
+// select() 的默认实现：将 SelectionVector 转为连续的 RowRange 列表，
+// 对每个 range 调用 skip(range.start - cursor) + read(range.length)，
+// 最后 skip(batch_rows - cursor) 消耗完整个 batch。
+// 子类可以覆写以获得更高效的实现。
+Status ParquetColumnReader::select(const SelectionVector& sel, uint16_t selected_rows,
+                                   int64_t batch_rows, MutableColumnPtr& column) {
+    if (column.get() == nullptr) {
+        return Status::InvalidArgument("Parquet selected read result is null for column {}",
+                                       name());
+    }
+    RETURN_IF_ERROR(sel.verify(selected_rows, batch_rows));
+
+    const auto ranges = selection_to_ranges(sel, selected_rows);
+    int64_t cursor = 0;
+    for (const auto& range : ranges) {
+        if (range.start < cursor || range.start + range.length > batch_rows) {
+            return Status::InvalidArgument("Invalid parquet selection range [{}, {}) for column {}",
+                                           range.start, range.start + range.length, name());
+        }
+        RETURN_IF_ERROR(skip(range.start - cursor));
+
+        int64_t range_rows_read = 0;
+        RETURN_IF_ERROR(read(range.length, column, &range_rows_read));
+        if (range_rows_read != range.length) {
+            return Status::Corruption(
+                    "Parquet selected read returned {} rows, expected {} rows for column {}",
+                    range_rows_read, range.length, name());
+        }
+        cursor = range.start + range.length;
+    }
+    RETURN_IF_ERROR(skip(batch_rows - cursor));
+    if (_profile.reader_select_rows != nullptr) {
+        COUNTER_UPDATE(_profile.reader_select_rows, selected_rows);
+    }
+    return Status::OK();
+}
+
+ParquetColumnReaderFactory::ParquetColumnReaderFactory(
+        std::shared_ptr<::parquet::RowGroupReader> row_group, int num_leaf_columns,
+        const std::map<int, ParquetPageSkipPlan>* page_skip_plans,
+        ParquetPageSkipProfile page_skip_profile, const cctz::time_zone* timezone,
+        bool enable_strict_mode, ParquetColumnReaderProfile column_reader_profile)
+        : _row_group(std::move(row_group)),
+          _record_readers(static_cast<size_t>(num_leaf_columns)),
+          _page_skip_plans(page_skip_plans),
+          _page_skip_profile(page_skip_profile),
+          _timezone(timezone),
+          _enable_strict_mode(enable_strict_mode),
+          _column_reader_profile(column_reader_profile) {}
+
+std::unique_ptr<ParquetColumnReader> ParquetColumnReaderFactory::create_row_position_column_reader(
+        int64_t row_group_first_row) const {
+    return std::make_unique<RowPositionColumnReader>(row_group_first_row, _column_reader_profile);
+}
+
+std::unique_ptr<ParquetColumnReader> ParquetColumnReaderFactory::create_global_rowid_column_reader(
+        const format::GlobalRowIdContext& context, int64_t row_group_first_row) const {
+    return std::make_unique<GlobalRowIdColumnReader>(context, row_group_first_row,
+                                                     _column_reader_profile);
+}
+
+Status ParquetColumnReaderFactory::make_scalar_column_reader(
+        const ParquetColumnSchema& column_schema,
+        std::shared_ptr<::parquet::internal::RecordReader> record_reader, bool use_page_skip_plan,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    const auto* page_skip_plan =
+            use_page_skip_plan ? find_page_skip_plan(_page_skip_plans, column_schema.leaf_column_id)
+                               : nullptr;
+    *reader = std::make_unique<ScalarColumnReader>(column_schema, std::move(record_reader),
+                                                   page_skip_plan, _timezone, _enable_strict_mode,
+                                                   _column_reader_profile);
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create_scalar_column_reader(
+        const ParquetColumnSchema& column_schema, bool is_nested,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    if (!column_schema.type_descriptor.unsupported_reason.empty()) {
+        return Status::NotSupported("Unsupported parquet column '{}': {}", column_schema.name,
+                                    column_schema.type_descriptor.unsupported_reason);
+    }
+    if (is_nested && column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE) {
+        return Status::InvalidArgument("Parquet nested scalar reader requires primitive column {}",
+                                       column_schema.name);
+    }
+    if (column_schema.leaf_column_id < 0 ||
+        column_schema.leaf_column_id >= static_cast<int>(_record_readers.size())) {
+        return Status::InvalidArgument("Invalid parquet leaf column id {} for column {}",
+                                       column_schema.leaf_column_id, column_schema.name);
+    }
+    if (column_schema.descriptor == nullptr) {
+        return Status::InvalidArgument("Parquet column descriptor is null for column {}",
+                                       column_schema.name);
+    }
+    if (!is_nested && (column_schema.descriptor->max_repetition_level() != 0 ||
+                       column_schema.descriptor->max_definition_level() > 1)) {
+        return Status::NotSupported(
+                "Current parquet scalar reader only supports flat primitive columns; column {} is "
+                "not supported",
+                column_schema.name);
+    }
+    if (is_nested && !supports_nested_scalar_record_reader(column_schema)) {
+        return Status::NotSupported(
+                "Current parquet nested scalar reader does not support column {}",
+                column_schema.name);
+    }
+    if (!is_nested && !column_schema.type_descriptor.supports_record_reader) {
+        return Status::NotSupported("Current parquet scalar reader does not support column {}",
+                                    column_schema.name);
+    }
+    std::shared_ptr<::parquet::internal::RecordReader> record_reader;
+    // Nested readers implement skip() by materializing rows into a scratch column. If Arrow
+    // page filtering is also installed, those scratch reads can consume the next selected row
+    // after a page-index range gap. Keep page filtering on flat scalar readers only.
+    RETURN_IF_ERROR(get_record_reader(column_schema.leaf_column_id, column_schema.descriptor,
+                                      column_schema.name, !is_nested, &record_reader));
+    return make_scalar_column_reader(column_schema, std::move(record_reader), !is_nested, reader);
+}
+
+// 惰性创建并缓存 Arrow RecordReader（按 leaf_column_id 索引）。
+//
+// 多个 Doris reader 可能通过不同嵌套路径共享同一个物理列（例如 MAP 的 key 和 value
+// 是独立的物理列，分别被 key_reader 和 value_reader 持有，但它们不共享 RecordReader）。
+// 真正的共享发生在同一个物理列被 STRUCT 的多个子字段同时需要时。
+//
+// 创建过程：
+//   1. RowGroupReader::GetColumnPageReader(leaf_column_id) → Arrow PageReader
+//   2. install_data_page_filter() — 安装 page index 裁剪的 page 级过滤器
+//   3. LevelInfo::ComputeLevelInfo() + RecordReader::Make() — 创建 RecordReader
+//   4. SetPageReader() — 绑定 PageReader
+Status ParquetColumnReaderFactory::get_record_reader(
+        int leaf_column_id, const ::parquet::ColumnDescriptor* descriptor, const std::string& name,
+        bool install_page_filter,
+        std::shared_ptr<::parquet::internal::RecordReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    if (_row_group == nullptr) {
+        return Status::InternalError("Parquet row group reader is not initialized for column {}",
+                                     name);
+    }
+    if (leaf_column_id < 0 || leaf_column_id >= static_cast<int>(_record_readers.size())) {
+        return Status::InvalidArgument("Invalid parquet leaf column id {} for column {}",
+                                       leaf_column_id, name);
+    }
+    if (descriptor == nullptr) {
+        return Status::InvalidArgument("Parquet column descriptor is null for column {}", name);
+    }
+    // 惰性创建：只有第一次访问时才初始化 RecordReader
+    if (_record_readers[leaf_column_id] == nullptr) {
+        try {
+            auto page_reader = _row_group->GetColumnPageReader(leaf_column_id);
+            if (install_page_filter) {
+                install_data_page_filter(page_reader, _page_skip_plans, leaf_column_id,
+                                         _page_skip_profile);
+            }
+            const auto level_info = ::parquet::internal::LevelInfo::ComputeLevelInfo(descriptor);
+            _record_readers[leaf_column_id] = ::parquet::internal::RecordReader::Make(
+                    descriptor, level_info, ::arrow::default_memory_pool(),
+                    /*read_dictionary=*/false,
+                    /*read_dense_for_nullable=*/false);
+            _record_readers[leaf_column_id]->SetPageReader(std::move(page_reader));
+        } catch (const ::parquet::ParquetException& e) {
+            return Status::Corruption("Failed to create parquet record reader for column {}: {}",
+                                      name, e.what());
+        } catch (const std::exception& e) {
+            return Status::InternalError("Failed to create parquet record reader for column {}: {}",
+                                         name, e.what());
+        }
+    }
+    if (_record_readers[leaf_column_id] == nullptr) {
+        return Status::Corruption("Failed to create parquet record reader for column {}", name);
+    }
+    *reader = _record_readers[leaf_column_id];
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create_struct_column_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    std::vector<std::unique_ptr<ParquetColumnReader>> child_readers;
+    child_readers.reserve(column_schema.children.size());
+    std::vector<int> child_output_indices;
+    child_output_indices.reserve(column_schema.children.size());
+    DataTypes projected_child_types;
+    Strings projected_child_names;
+    for (size_t child_idx = 0; child_idx < column_schema.children.size(); ++child_idx) {
+        const auto& child_schema = column_schema.children[child_idx];
+        const auto* child_projection =
+                format::find_child_projection(projection, child_schema->local_id);
+        if (!format::is_child_projected(projection, child_schema->local_id)) {
+            continue;
+        }
+        std::unique_ptr<ParquetColumnReader> child_reader;
+        RETURN_IF_ERROR(create_column_reader(*child_schema, child_projection, true, &child_reader));
+        child_output_indices.push_back(static_cast<int>(projected_child_types.size()));
+        projected_child_types.push_back(make_nullable(child_reader->type()));
+        projected_child_names.push_back(child_reader->name());
+        child_readers.push_back(std::move(child_reader));
+    }
+    if (format::is_partial_projection(projection) &&
+        projected_child_types.size() != projection->children.size()) {
+        return Status::InvalidArgument(
+                "Parquet STRUCT projection for column {} contains invalid child",
+                column_schema.name);
+    }
+    if (projected_child_types.empty() && !column_schema.children.empty()) {
+        return Status::NotSupported("Parquet STRUCT projection for column {} contains no children",
+                                    column_schema.name);
+    }
+    DataTypePtr type = column_schema.type;
+    if (format::is_partial_projection(projection)) {
+        type = std::make_shared<DataTypeStruct>(projected_child_types, projected_child_names);
+        if (column_schema.type != nullptr && column_schema.type->is_nullable()) {
+            type = make_nullable(type);
+        }
+    }
+    *reader = std::make_unique<StructColumnReader>(
+            column_schema, std::move(type), std::move(child_readers),
+            std::move(child_output_indices), _column_reader_profile);
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create_list_column_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    if (column_schema.children.size() != 1) {
+        return Status::NotSupported("Unsupported parquet LIST layout for column {}",
+                                    column_schema.name);
+    }
+    std::unique_ptr<ParquetColumnReader> element_reader;
+    const auto& element_schema = *column_schema.children[0];
+    const auto* element_projection =
+            format::find_child_projection(projection, element_schema.local_id);
+    if (format::is_partial_projection(projection) && element_projection == nullptr) {
+        return Status::NotSupported("Parquet LIST projection for column {} contains no element",
+                                    column_schema.name);
+    }
+    RETURN_IF_ERROR(
+            create_column_reader(element_schema, element_projection, true, &element_reader));
+    DataTypePtr type = column_schema.type;
+    if (format::is_partial_projection(element_projection)) {
+        type = std::make_shared<DataTypeArray>(element_reader->type());
+        if (column_schema.type != nullptr && column_schema.type->is_nullable()) {
+            type = make_nullable(type);
+        }
+    }
+    *reader = std::make_unique<ListColumnReader>(column_schema, std::move(type),
+                                                 std::move(element_reader), _column_reader_profile);
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create_map_column_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    if (column_schema.children.size() != 2) {
+        return Status::NotSupported("Unsupported parquet MAP layout for column {}",
+                                    column_schema.name);
+    }
+    const auto& key_schema = *column_schema.children[0];
+    const auto& value_schema = *column_schema.children[1];
+    const auto* value_projection = format::find_child_projection(projection, value_schema.local_id);
+    if (format::is_partial_projection(projection)) {
+        if (value_projection == nullptr) {
+            return Status::NotSupported("Parquet MAP projection for column {} contains no value",
+                                        column_schema.name);
+        }
+        for (const auto& child_projection : projection->children) {
+            if (child_projection.local_id() == key_schema.local_id) {
+                continue;
+            }
+            if (child_projection.local_id() != value_schema.local_id) {
+                return Status::InvalidArgument(
+                        "Parquet MAP projection for column {} contains invalid child",
+                        column_schema.name);
+            }
+        }
+    }
+    std::unique_ptr<ParquetColumnReader> key_reader;
+    // MAP materialization always needs the full key stream. It owns entry existence, offsets and
+    // key equality semantics, so MAP projection is defined only as value-subtree pruning.
+    RETURN_IF_ERROR(create_column_reader(key_schema, nullptr, true, &key_reader));
+    std::unique_ptr<ParquetColumnReader> value_reader;
+    RETURN_IF_ERROR(create_column_reader(value_schema, value_projection, true, &value_reader));
+    DataTypePtr type = column_schema.type;
+    if (format::is_partial_projection(value_projection)) {
+        type = std::make_shared<DataTypeMap>(make_nullable(key_reader->type()),
+                                             make_nullable(value_reader->type()));
+        if (column_schema.type != nullptr && column_schema.type->is_nullable()) {
+            type = make_nullable(type);
+        }
+    }
+    *reader =
+            std::make_unique<MapColumnReader>(column_schema, std::move(type), std::move(key_reader),
+                                              std::move(value_reader), _column_reader_profile);
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create(const ParquetColumnSchema& column_schema,
+                                          const format::LocalColumnIndex* projection,
+                                          std::unique_ptr<ParquetColumnReader>* reader) const {
+    return create_column_reader(column_schema, projection, false, reader);
+}
+
+Status ParquetColumnReaderFactory::create_column_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        bool is_nested, std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    switch (column_schema.kind) {
+    case ParquetColumnSchemaKind::PRIMITIVE:
+        if (is_nested) {
+            if (format::is_partial_projection(projection)) {
+                return Status::InvalidArgument("Parquet scalar projection is invalid for column {}",
+                                               column_schema.name);
+            }
+            return create_scalar_column_reader(column_schema, true, reader);
+        }
+        return create_scalar_column_reader(column_schema, false, reader);
+    case ParquetColumnSchemaKind::STRUCT:
+        return create_struct_column_reader(column_schema, projection, reader);
+    case ParquetColumnSchemaKind::LIST:
+        return create_list_column_reader(column_schema, projection, reader);
+    case ParquetColumnSchemaKind::MAP:
+        return create_map_column_reader(column_schema, projection, reader);
+    }
+    return Status::NotSupported("Unsupported parquet column schema kind for column {}",
+                                column_schema.name);
+}
+
+ParquetColumnReader::ParquetColumnReader(const ParquetColumnSchema& schema, const DataTypePtr type,
+                                         ParquetColumnReaderProfile profile)
+        : _profile(profile),
+          _field_id(schema.local_id),
+          _leaf_column_id(schema.leaf_column_id),
+          _nullable_definition_level(schema.nullable_definition_level),
+          _repeated_repetition_level(schema.repeated_repetition_level),
+          _definition_level(schema.definition_level),
+          _repetition_level(schema.repetition_level),
+          _repeated_ancestor_definition_level(schema.repeated_ancestor_definition_level),
+          _type(std::move(type)),
+          _name(schema.name) {}
+
+Status ParquetColumnReader::load_nested_batch(int64_t) {
+    return Status::NotSupported("Parquet nested batch load is not supported for column {}", _name);
+}
+
+Status ParquetColumnReader::build_nested_column(int64_t, MutableColumnPtr&, int64_t*) {
+    return Status::NotSupported("Parquet nested column build is not supported for column {}",
+                                _name);
+}
+
+Status ParquetColumnReader::skip_nested_column(int64_t rows) {
+    auto scratch_column = _type->create_column();
+    int64_t values_read = 0;
+    RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &values_read));
+    if (values_read != rows) {
+        return Status::Corruption("Failed to skip nested parquet column {}: skipped {} of {} rows",
+                                  _name, values_read, rows);
+    }
+    return Status::OK();
+}
+
+const std::vector<int16_t>& ParquetColumnReader::nested_definition_levels() const {
+    static const std::vector<int16_t> empty;
+    return empty;
+}
+
+const std::vector<int16_t>& ParquetColumnReader::nested_repetition_levels() const {
+    static const std::vector<int16_t> empty;
+    return empty;
+}
+
+int64_t ParquetColumnReader::nested_levels_written() const {
+    return 0;
+}
+
+bool ParquetColumnReader::is_or_has_repeated_child() const {
+    return _repetition_level > 0;
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/column_reader.h b/be/src/format_v2/parquet/reader/column_reader.h
new file mode 100644
index 00000000000000..d17d02e50b3fec
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/column_reader.h
@@ -0,0 +1,266 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type.h"
+#include "format_v2/column_data.h"
+#include "format_v2/parquet/parquet_profile.h"
+#include "format_v2/parquet/parquet_type.h"
+#include "format_v2/parquet/selection_vector.h"
+#include "runtime/runtime_profile.h"
+
+namespace parquet {
+class ColumnDescriptor;
+class RowGroupReader;
+
+namespace internal {
+class RecordReader;
+} // namespace internal
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris {
+class IColumn;
+} // namespace doris
+
+namespace doris::format::parquet {
+struct ParquetColumnSchema;
+
+// Doris 的 Parquet column reader 抽象基类。
+//
+// 该类包装 Arrow Parquet RecordReader，负责将 file-local Parquet leaf column 读取成
+// Doris-owned column。它不理解 Iceberg/global schema，也不处理 table-level
+// cast/default/generated/partition 语义。
+//
+// 对外提供两组接口：
+//
+// ① 平铺读取路径（top-level primitive / 复杂类型的整体读取）：
+//      read()  — 从当前位置全量读取 rows 行
+//      skip()  — row-level 跳过
+//      select() — 按 SelectionVector 部分读取（late materialization 的关键）
+//
+// ② 嵌套读取协议（LIST/MAP/STRUCT 内部的父子协作）：
+//      load_nested_batch()     — 加载一批 def/rep levels + values
+//      build_nested_column()   — 从 levels 重建嵌套结构并填充值
+//      skip_nested_column()    — 跳过一批嵌套数据
+//      这个两步协议将 level 解码与值物化分离，让复杂 reader 可以先确定容器结构再按需填充值。
+class ParquetColumnReader {
+public:
+    virtual ~ParquetColumnReader() = default;
+
+    // ========== 标识字段 ==========
+
+    // Reader 在 file_schema 树中的 id。
+    // 顶层 reader 返回 root column ordinal，嵌套 reader 返回父节点下的 child ordinal。
+    virtual int file_column_id() const { return _field_id; }
+
+    // 该 reader 对应的 Parquet 物理 leaf column id。
+    // 用于访问 ColumnDescriptor、RecordReader、ColumnChunk metadata 和 statistics。
+    // 例如 MAP<INT, STRING>：顶层 MAP 节点的 parquet_leaf_column_id == file_column_id，
+    // 但其子节点 a.key 的 parquet_leaf_column_id == 0（key 列在文件中的物理序号）。
+    virtual int parquet_leaf_column_id() const { return _leaf_column_id; }
+
+    // ========== Level 字段 ==========
+    // 使本节点自身变为 nullable 的 definition level 阈值。
+    // 复杂 reader 用此值区分"我的值是 NULL"和"我有值但内容为空"。
+    int16_t nullable_definition_level() const { return _nullable_definition_level; }
+    // 最近 repeated 祖先的 repetition level。
+    // LIST/MAP reader 用此值从孩子 rep level 流中判断"新元素开始"。
+    int16_t repeated_repetition_level() const { return _repeated_repetition_level; }
+
+    virtual const DataTypePtr& type() const { return _type; }
+    virtual const std::string& name() const { return _name; }
+    const ParquetColumnReaderProfile& profile() const { return _profile; }
+
+    // ========== ① 平铺读取接口 ==========
+
+    // 全量读取：从当前位置读 rows 行，写入 column。
+    virtual Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) = 0;
+
+    // 跳过 rows 行。必须使用 row-level skip（推进游标），不能退化为 value-level skip。
+    virtual Status skip(int64_t rows);
+
+    // 部分读取：跳过 batch 内未选中的行，只输出 SelectionVector 中标记的行。
+    // 用于 late materialization —— predicate 列全量读，non-predicate 列按 selection 读。
+    // 该方法只允许 skip + read 推进游标，不允许退化为整批 read + filter。
+    virtual Status select(const SelectionVector& sel, uint16_t selected_rows, int64_t batch_rows,
+                          MutableColumnPtr& column);
+
+    // ========== ② 嵌套读取协议 ==========
+    // 复杂 reader（LIST/MAP/STRUCT）通过这个两步协议与子 reader 协作。
+
+    // 第一步：加载一批嵌套数据。递归调用子 reader，最终到达 leaf reader 的
+    // ParquetLeafReader::read_nested_batch()，返回 def/rep levels + values。
+    virtual Status load_nested_batch(int64_t rows);
+
+    // 第二步：从已加载的 levels 重建嵌套结构（offsets + null_map）并填充值。
+    // length_upper_bound 是预估值，用于提前 reserve 空间。
+    virtual Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                       int64_t* values_read);
+
+    // 跳过 rows 行嵌套数据。递归调用子 reader 的 skip_nested_column。
+    virtual Status skip_nested_column(int64_t rows);
+
+    // 返回已加载的嵌套 definition/repetition levels（由子 reader 或自身填充）。
+    virtual const std::vector<int16_t>& nested_definition_levels() const;
+    virtual const std::vector<int16_t>& nested_repetition_levels() const;
+    virtual int64_t nested_levels_written() const;
+    // 该 reader 自身或其子树中是否包含 repeated 子节点。
+    virtual bool is_or_has_repeated_child() const;
+    virtual void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level);
+
+    // ========== 嵌套构建游标 ==========
+    // 复杂 reader 在 build_nested_column 时分多轮调用子 reader，游标跟踪当前处理到的
+    // def/rep level 数组位置，避免重复处理同一批 level。
+    int64_t nested_build_level_cursor() const { return _nested_build_level_cursor; }
+    void set_nested_build_level_cursor(int64_t cursor) {
+        DORIS_CHECK(cursor >= 0);
+        _nested_build_level_cursor = cursor;
+    }
+    void reset_nested_build_level_cursor() { _nested_build_level_cursor = 0; }
+
+protected:
+    ParquetColumnReader(const ParquetColumnSchema& schema, const DataTypePtr type,
+                        ParquetColumnReaderProfile profile = {});
+    ParquetColumnReader() = default;
+    void update_reader_read_rows(int64_t rows) const;
+    void update_reader_skip_rows(int64_t rows) const;
+
+    ParquetColumnReaderProfile _profile;
+    const int _field_id = -1;                     // 在父节点中的 child ordinal
+    const int _leaf_column_id = -1;               // Parquet 物理 leaf column id (-1 = 非叶子)
+    const int16_t _nullable_definition_level = 0; // 本节点 nullable 的 def level 阈值
+    const int16_t _repeated_repetition_level = 0; // 最近 repeated 祖先的 rep level
+    const int16_t _definition_level = 0;          // 累计到本节点的 def level
+    const int16_t _repetition_level = 0;          // 累计到本节点的 rep level
+    const int16_t _repeated_ancestor_definition_level = 0; // 最近 repeated 祖先的 def level
+    const DataTypePtr _type;                               // Doris 目标类型
+    const std::string _name;                               // 列名（用于报错信息）
+    int64_t _nested_build_level_cursor = 0; // 嵌套构建游标（当前处理到的 level 位置）
+};
+
+// 为一个 Parquet RowGroup 创建 Doris Column Reader 的工厂。
+//
+// 工厂持有 RowGroup 级别的共享状态：
+// - Arrow RecordReader 实例（按 leaf_column_id 缓存，同一物理列可能被多个 reader 共享）
+// - Page skip plans 和 page skip profile（page index 裁剪结果）
+// - 标量物化选项：timezone、strict mode 等
+//
+// 外部调用方只请求顶层列或虚拟扫描列，嵌套子列的递归构造保持私有，
+// ParquetScanScheduler 和 ParquetReader 不感知物理 schema 细节。
+//
+// Projection 支持：当只需要复杂类型的部分子列时（如 MAP 只读 value），
+// factory 通过 LocalColumnIndex 参数传递 projection 路径，
+// 只为被 projected 的子列创建 reader，跳过不需要的部分。
+class ParquetColumnReaderFactory {
+public:
+    ParquetColumnReaderFactory(std::shared_ptr<::parquet::RowGroupReader> row_group,
+                               int num_leaf_columns,
+                               const std::map<int, ParquetPageSkipPlan>* page_skip_plans = nullptr,
+                               ParquetPageSkipProfile page_skip_profile = {},
+                               const cctz::time_zone* timezone = nullptr,
+                               bool enable_strict_mode = false,
+                               ParquetColumnReaderProfile column_reader_profile = {});
+
+    // 为顶层列 schema 创建 reader。projection 可选，为 nullptr 时读取全部子列，
+    // 非 nullptr 时只读取被 projected 的子树部分。
+    Status create(const ParquetColumnSchema& column_schema,
+                  const format::LocalColumnIndex* projection,
+                  std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    // 便捷重载：projection = nullptr（读取全部子列）。
+    Status create(const ParquetColumnSchema& column_schema,
+                  std::unique_ptr<ParquetColumnReader>* reader) const {
+        return create(column_schema, nullptr, reader);
+    }
+
+    // 创建虚拟列 reader：生成行位置序号。
+    std::unique_ptr<ParquetColumnReader> create_row_position_column_reader(
+            int64_t row_group_first_row) const;
+    // 创建虚拟列 reader：生成全局唯一 RowId。
+    std::unique_ptr<ParquetColumnReader> create_global_rowid_column_reader(
+            const format::GlobalRowIdContext& context, int64_t row_group_first_row) const;
+
+private:
+    // 创建基本类型叶子的 reader。
+    // is_nested=true 表示该叶子在复杂类型内部，允许携带 def/rep levels。
+    // is_nested=false 表示顶层平铺列，需要做额外的 flat layout 校验。
+    Status create_scalar_column_reader(const ParquetColumnSchema& column_schema, bool is_nested,
+                                       std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    // 创建 STRUCT reader。递归为 projected 子列创建 reader。
+    // 部分 projection 时重建 DataTypeStruct，使物化结果只包含被 projected 的子字段。
+    Status create_struct_column_reader(const ParquetColumnSchema& column_schema,
+                                       const format::LocalColumnIndex* projection,
+                                       std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    // 创建 LIST reader，持有单个 element reader。
+    // element 被部分 projection 时，重建 DataTypeArray 使用 projected 的 element type。
+    Status create_list_column_reader(const ParquetColumnSchema& column_schema,
+                                     const format::LocalColumnIndex* projection,
+                                     std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    // 创建 MAP reader，持有 key reader + value reader。
+    // Schema 构建时已折叠 key_value/entry wrapper，children 直接是 [key, value]。
+    // 部分 MAP projection 仅对 value 子树做裁剪。key 流始终完整读取，
+    // 因为它拥有 entry 的存在性、offsets 和 key equality 语义。
+    Status create_map_column_reader(const ParquetColumnSchema& column_schema,
+                                    const format::LocalColumnIndex* projection,
+                                    std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    // 私有递归分发器。根据 ParquetColumnSchema::kind 路由到对应的 create_* 方法。
+    // is_nested 为 true 表示该节点属于复杂 reader 的子节点，控制 primitive leaf 的校验逻辑；
+    // 复杂 reader 总是从规范化的 ParquetColumnSchema 子树创建。
+    Status create_column_reader(const ParquetColumnSchema& column_schema,
+                                const format::LocalColumnIndex* projection, bool is_nested,
+                                std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    // 惰性创建并缓存 Arrow RecordReader（按 leaf_column_id 索引）。
+    // 多个 Doris reader 可能通过不同嵌套路径共享同一个物理列的数据流，
+    // 因此 RecordReader 的生命周期绑定到 RowGroup 工厂。
+    Status get_record_reader(int leaf_column_id, const ::parquet::ColumnDescriptor* descriptor,
+                             const std::string& name, bool install_page_filter,
+                             std::shared_ptr<::parquet::internal::RecordReader>* reader) const;
+
+    // 在 schema 校验和 RecordReader 查找完成后，最终构造 ScalarColumnReader。
+    Status make_scalar_column_reader(
+            const ParquetColumnSchema& column_schema,
+            std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+            bool use_page_skip_plan, std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    std::shared_ptr<::parquet::RowGroupReader> _row_group; // Arrow RowGroup 读取器
+    mutable std::vector<std::shared_ptr<::parquet::internal::RecordReader>>
+            _record_readers; // RecordReader 缓存(按 leaf_column_id)
+    const std::map<int, ParquetPageSkipPlan>* _page_skip_plans = nullptr; // page index 裁剪结果
+    ParquetPageSkipProfile _page_skip_profile;                            // page skip profile
+    const cctz::time_zone* _timezone = nullptr;                           // 时区
+    bool _enable_strict_mode = false;                                     // 严格模式
+    ParquetColumnReaderProfile _column_reader_profile;                    // column reader profile
+};
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp b/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp
new file mode 100644
index 00000000000000..ae391615f4a8e9
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/global_rowid_column_reader.h"
+
+#include <memory>
+
+#include "common/cast_set.h"
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/column/column_string.h"
+#include "core/data_type/data_type_string.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "storage/utils.h"
+
+namespace doris::format::parquet {
+
+GlobalRowIdColumnReader::GlobalRowIdColumnReader(format::GlobalRowIdContext context,
+                                                 int64_t row_group_first_row,
+                                                 ParquetColumnReaderProfile profile)
+        : ParquetColumnReader(ParquetColumnSchema {.name = BeConsts::GLOBAL_ROWID_COL},
+                              std::make_shared<DataTypeString>(), profile),
+          _context(context),
+          _row_group_first_row(row_group_first_row) {}
+
+int GlobalRowIdColumnReader::file_column_id() const {
+    return format::GLOBAL_ROWID_COLUMN_ID;
+}
+
+int GlobalRowIdColumnReader::parquet_leaf_column_id() const {
+    return -1;
+}
+
+const DataTypePtr& GlobalRowIdColumnReader::type() const {
+    return _type;
+}
+
+const std::string& GlobalRowIdColumnReader::name() const {
+    return _name;
+}
+
+Status GlobalRowIdColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    if (column.get() == nullptr || rows_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet global rowid read result pointer");
+    }
+    if (rows < 0) {
+        return Status::InvalidArgument("Invalid parquet global rowid read rows {}", rows);
+    }
+    for (int64_t row = 0; row < rows; ++row) {
+        append_row_id(cast_set<uint32_t>(_row_group_first_row + _next_row_position + row), column);
+    }
+    _next_row_position += rows;
+    *rows_read = rows;
+    return Status::OK();
+}
+
+Status GlobalRowIdColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    _next_row_position += rows;
+    return Status::OK();
+}
+
+void GlobalRowIdColumnReader::append_row_id(uint32_t row_id, MutableColumnPtr& column) const {
+    // 编码为 17-byte GlobalRowLoacationV2: version(1) + backend_id(8) + file_id(4) + row_id(4)
+    auto* string_column = assert_cast<ColumnString*>(column.get());
+    GlobalRowLoacationV2 location(_context.version, _context.backend_id, _context.file_id, row_id);
+    string_column->insert_data(reinterpret_cast<const char*>(&location),
+                               sizeof(GlobalRowLoacationV2));
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/global_rowid_column_reader.h b/be/src/format_v2/parquet/reader/global_rowid_column_reader.h
new file mode 100644
index 00000000000000..0b37fa9b56052c
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/global_rowid_column_reader.h
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "format_v2/column_data.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+// 虚拟列 reader：生成行的全局唯一 RowId。
+//
+// 不对应任何 Parquet 物理列，不持有 RecordReader。
+// RowId 编码格式：<version:1byte><backend_id:8bytes><file_id:4bytes><row_id:4bytes>，
+// 共 17 bytes，以 String 类型输出。
+//
+// row_id = _row_group_first_row + _next_row_position，随 read() 递增。
+// 用于 TopN filter 等需要跨文件唯一定位行的场景。
+class GlobalRowIdColumnReader final : public ParquetColumnReader {
+public:
+    GlobalRowIdColumnReader(format::GlobalRowIdContext context, int64_t row_group_first_row,
+                            ParquetColumnReaderProfile profile = {});
+
+    int file_column_id() const override;
+    int parquet_leaf_column_id() const override;
+    const DataTypePtr& type() const override;
+    const std::string& name() const override;
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+
+private:
+    // 将单个 row_id 编码为 17-byte RowId 字符串并追加到 column。
+    void append_row_id(uint32_t row_id, MutableColumnPtr& column) const;
+
+    format::GlobalRowIdContext _context; // RowId 前缀（version + backend_id + file_id）
+    int64_t _row_group_first_row = 0;    // 当前 RG 在文件中的起始行号
+    int64_t _next_row_position = 0;      // 下一个待输出的行位置
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/list_column_reader.cpp b/be/src/format_v2/parquet/reader/list_column_reader.cpp
new file mode 100644
index 00000000000000..0a2ef18bcc734e
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/list_column_reader.cpp
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/list_column_reader.h"
+
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_nullable.h"
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+
+namespace doris::format::parquet {
+namespace {
+
+void remove_nullable_wrapper_if_not_expected(const DataTypePtr& output_type,
+                                             MutableColumnPtr* column) {
+    DORIS_CHECK(column != nullptr);
+    if (output_type->is_nullable()) {
+        return;
+    }
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(**column)) {
+        *column = nullable_column->get_nested_column_ptr();
+    }
+}
+
+} // namespace
+
+Status ListColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    return build_nested_column(rows, column, rows_read);
+}
+
+Status ListColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    auto scratch_column = _type->create_column();
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    int64_t rows_read = 0;
+    RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read));
+    if (rows_read != rows) {
+        return Status::Corruption("Failed to skip parquet LIST column {}: skipped {} of {} rows",
+                                  _name, rows_read, rows);
+    }
+    update_reader_skip_rows(rows);
+    return Status::OK();
+}
+
+Status ListColumnReader::load_nested_batch(int64_t rows) {
+    DORIS_CHECK(_element_reader != nullptr);
+    reset_nested_build_level_cursor();
+    return _element_reader->load_nested_batch(rows);
+}
+
+// LIST 的嵌套构建核心逻辑：
+//
+// 从 element reader 的 def/rep levels 重建 ColumnArray：
+//
+// 1. 遍历 def/rep levels，解析每个顶层行：
+//    - rep_level == _repetition_level  → 继续当前 element（entry_count++）
+//    - rep_level < _repetition_level   → 新顶层行开始
+//    - def_level < _definition_level-1 → 该 LIST 本身为 NULL
+//    - def_level >= _definition_level  → entry 非空（至少 1 个元素）
+// 2. 委托 element reader 的 build_nested_column() 填充所有元素值
+// 3. append_offsets() + append_parent_nulls() 写入 ColumnArray 结构
+Status ListColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                             int64_t* values_read) {
+    if (column.get() == nullptr || values_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet list build result pointer for column {}",
+                                       _name);
+    }
+    DORIS_CHECK(_element_reader != nullptr);
+    auto* array_column = array_column_from_output(column);
+    DORIS_CHECK(array_column != nullptr);
+    auto* parent_null_map = null_map_from_nullable_output(column);
+    auto nested_column = array_column->get_data_ptr()->assert_mutable();
+    const auto& element_output_type =
+            assert_cast<const DataTypeArray&>(*remove_nullable(_type)).get_nested_type();
+    remove_nullable_wrapper_if_not_expected(element_output_type, &nested_column);
+
+    const auto& def_levels = _element_reader->nested_definition_levels();
+    const auto& rep_levels = _element_reader->nested_repetition_levels();
+    const int64_t levels_written = _element_reader->nested_levels_written();
+    std::vector<uint64_t> entry_counts;
+    NullMap parent_nulls;
+    *values_read = 0;
+    int64_t level_idx = nested_build_level_cursor();
+    const int16_t min_parent_definition_level =
+            static_cast<int16_t>(_definition_level - 1 - (_type->is_nullable() ? 1 : 0));
+    while (level_idx < levels_written) {
+        const int16_t def_level = def_levels[level_idx];
+        const int16_t rep_level = rep_levels[level_idx];
+        const bool starts_parent = rep_level < _repetition_level;
+        if (starts_parent && *values_read >= length_upper_bound) {
+            break;
+        }
+        ++level_idx;
+        if (rep_level > _repetition_level || def_level < min_parent_definition_level ||
+            (!starts_parent && def_level < _repeated_ancestor_definition_level)) {
+            continue;
+        }
+        if (rep_level == _repetition_level) {
+            if (entry_counts.empty()) {
+                return Status::Corruption("Invalid repeated level for parquet LIST column {}",
+                                          _name);
+            }
+            if (def_level >= _definition_level) {
+                ++entry_counts.back();
+            }
+            continue;
+        }
+
+        const bool parent_is_null = def_level < _definition_level - 1;
+        if (parent_is_null && parent_null_map == nullptr) {
+            return Status::Corruption("Parquet LIST column {} contains null for non-nullable LIST",
+                                      _name);
+        }
+        parent_nulls.push_back(parent_is_null);
+        entry_counts.push_back(def_level >= _definition_level ? 1 : 0);
+        ++*values_read;
+    }
+    set_nested_build_level_cursor(level_idx);
+
+    uint64_t total_entries = 0;
+    int64_t child_value_count = 0;
+    if (!_element_reader->is_or_has_repeated_child()) {
+        for (const auto entry_count : entry_counts) {
+            total_entries += entry_count;
+        }
+        RETURN_IF_ERROR(_element_reader->build_nested_column(static_cast<int64_t>(total_entries),
+                                                             nested_column, &child_value_count));
+    } else {
+        uint64_t pending_entries = 0;
+        auto flush_pending_entries = [&]() -> Status {
+            if (pending_entries == 0) {
+                return Status::OK();
+            }
+            int64_t span_child_value_count = 0;
+            RETURN_IF_ERROR(_element_reader->build_nested_column(
+                    static_cast<int64_t>(pending_entries), nested_column, &span_child_value_count));
+            if (span_child_value_count != static_cast<int64_t>(pending_entries)) {
+                return Status::Corruption(
+                        "Parquet LIST column {} built {} child values, expected {}", _name,
+                        span_child_value_count, pending_entries);
+            }
+            child_value_count += span_child_value_count;
+            pending_entries = 0;
+            return Status::OK();
+        };
+
+        for (const auto entry_count : entry_counts) {
+            total_entries += entry_count;
+            if (entry_count > 0) {
+                pending_entries += entry_count;
+                continue;
+            }
+            RETURN_IF_ERROR(flush_pending_entries());
+            _element_reader->advance_nested_build_level_cursor_past_parent(_repetition_level);
+        }
+        RETURN_IF_ERROR(flush_pending_entries());
+    }
+    if (child_value_count != static_cast<int64_t>(total_entries)) {
+        return Status::Corruption("Parquet LIST column {} built {} child values, expected {}",
+                                  _name, child_value_count, total_entries);
+    }
+    array_column->get_data_ptr() = std::move(nested_column);
+    append_offsets(array_column->get_offsets(), entry_counts);
+    append_parent_nulls(parent_null_map, parent_nulls);
+    return Status::OK();
+}
+
+const std::vector<int16_t>& ListColumnReader::nested_definition_levels() const {
+    DORIS_CHECK(_element_reader != nullptr);
+    return _element_reader->nested_definition_levels();
+}
+
+const std::vector<int16_t>& ListColumnReader::nested_repetition_levels() const {
+    DORIS_CHECK(_element_reader != nullptr);
+    return _element_reader->nested_repetition_levels();
+}
+
+int64_t ListColumnReader::nested_levels_written() const {
+    DORIS_CHECK(_element_reader != nullptr);
+    return _element_reader->nested_levels_written();
+}
+
+bool ListColumnReader::is_or_has_repeated_child() const {
+    return true;
+}
+
+void ListColumnReader::advance_nested_build_level_cursor_past_parent(
+        int16_t parent_repetition_level) {
+    DORIS_CHECK(_element_reader != nullptr);
+    ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    _element_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/list_column_reader.h b/be/src/format_v2/parquet/reader/list_column_reader.h
new file mode 100644
index 00000000000000..68438f7d30c58a
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/list_column_reader.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+// LIST（数组）列的读取器，持有单个 element reader。
+//
+// 实现策略：
+//   LIST 的物理 rep/def level 流由 element reader 提供。ListColumnReader 消费这些
+//   levels 来重建 ColumnArray 的 offsets 和 null_map，然后将实际值委托给 element reader。
+//
+// 嵌套协议流程：
+//   1. load_nested_batch() → element reader 加载 def/rep levels
+//   2. build_nested_column() → 从 rep levels 计算每行的 entry_count，
+//      通过 append_offsets() 写入 ColumnArray offsets，
+//      从 def levels 判断 LIST 本身是否为 NULL，
+//      然后委托 element reader 的 build_nested_column() 填充值。
+//
+// 平铺 read() 也走同样的逻辑，只是入口不同。
+class ListColumnReader final : public ParquetColumnReader {
+public:
+    ListColumnReader(const ParquetColumnSchema& schema, DataTypePtr type,
+                     std::unique_ptr<ParquetColumnReader> element_reader,
+                     ParquetColumnReaderProfile profile = {})
+            : ParquetColumnReader(schema, type, profile),
+              _element_reader(std::move(element_reader)) {}
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+    bool is_or_has_repeated_child() const override;
+    void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override;
+
+private:
+    std::unique_ptr<ParquetColumnReader>
+            _element_reader; // 元素 reader（递归，可能为 Scalar/Struct/List/Map）
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/map_column_reader.cpp b/be/src/format_v2/parquet/reader/map_column_reader.cpp
new file mode 100644
index 00000000000000..8c000d1b53b00e
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/map_column_reader.cpp
@@ -0,0 +1,253 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/map_column_reader.h"
+
+#include <cstdint>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+
+namespace doris::format::parquet {
+namespace {
+
+void remove_nullable_wrapper_if_not_expected(const DataTypePtr& output_type,
+                                             MutableColumnPtr* column) {
+    DORIS_CHECK(column != nullptr);
+    if (output_type->is_nullable()) {
+        return;
+    }
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(**column)) {
+        *column = nullable_column->get_nested_column_ptr();
+    }
+}
+
+} // namespace
+
+Status MapColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    return build_nested_column(rows, column, rows_read);
+}
+
+Status MapColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    auto scratch_column = _type->create_column();
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    int64_t rows_read = 0;
+    RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read));
+    if (rows_read != rows) {
+        return Status::Corruption("Failed to skip parquet MAP column {}: skipped {} of {} rows",
+                                  _name, rows_read, rows);
+    }
+    update_reader_skip_rows(rows);
+    return Status::OK();
+}
+
+Status MapColumnReader::load_nested_batch(int64_t rows) {
+    DORIS_CHECK(_key_reader != nullptr);
+    DORIS_CHECK(_value_reader != nullptr);
+    reset_nested_build_level_cursor();
+    RETURN_IF_ERROR(_key_reader->load_nested_batch(rows));
+    return _value_reader->load_nested_batch(rows);
+}
+
+// MAP 的嵌套构建核心逻辑：
+//
+// 从 key reader 的 def/rep levels 重建 ColumnMap：
+//
+// 1. 遍历 key reader 的 def/rep levels，解析 entry 结构（同 LIST，key stream 提供 shape）。
+// 2. 委托 key reader 的 build_nested_column() 填充所有 key 值。
+// 3. key null 校验：检查 key 列中是否存在 NULL，有则报错（MAP key 不允许 NULL）。
+// 4. value 填充分两条路径：
+//    a. ScalarColumnReader 路径：value 与 key 在 level 流中一一对应（same rep level），
+//       通过 append_nested_value() 逐 entry 填充 value。
+//    b. 复杂 value 路径（如 MAP<INT, ARRAY<INT>>）：value 拥有自己的嵌套 shape，
+//       直接 build_nested_column(total_entries) 递归填充。
+// 5. append_offsets() + append_parent_nulls() 写入 ColumnMap 结构。
+Status MapColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                            int64_t* values_read) {
+    if (column.get() == nullptr || values_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet map build result pointer for column {}",
+                                       _name);
+    }
+    DORIS_CHECK(_key_reader != nullptr);
+    DORIS_CHECK(_value_reader != nullptr);
+    auto* map_column = map_column_from_output(column);
+    DORIS_CHECK(map_column != nullptr);
+    auto* parent_null_map = null_map_from_nullable_output(column);
+    auto key_column = map_column->get_keys_ptr()->assert_mutable();
+    auto value_column = map_column->get_values_ptr()->assert_mutable();
+    const auto& map_output_type = assert_cast<const DataTypeMap&>(*remove_nullable(_type));
+    remove_nullable_wrapper_if_not_expected(map_output_type.get_key_type(), &key_column);
+    remove_nullable_wrapper_if_not_expected(map_output_type.get_value_type(), &value_column);
+
+    const auto& def_levels = _key_reader->nested_definition_levels();
+    const auto& rep_levels = _key_reader->nested_repetition_levels();
+    const int64_t levels_written = _key_reader->nested_levels_written();
+
+    std::vector<uint64_t> entry_counts;
+    std::vector<int64_t> map_level_indices;
+    NullMap parent_nulls;
+    *values_read = 0;
+    int64_t level_idx = nested_build_level_cursor();
+    const int16_t min_parent_definition_level =
+            static_cast<int16_t>(_definition_level - 1 - (_type->is_nullable() ? 1 : 0));
+    while (level_idx < levels_written) {
+        const int16_t def_level = def_levels[level_idx];
+        const int16_t rep_level = rep_levels[level_idx];
+        const bool starts_parent = rep_level < _repetition_level;
+        if (starts_parent && *values_read >= length_upper_bound) {
+            break;
+        }
+        const int64_t current_level_idx = level_idx;
+        ++level_idx;
+        if (rep_level > _repetition_level || def_level < min_parent_definition_level ||
+            (!starts_parent && def_level < _repeated_ancestor_definition_level)) {
+            continue;
+        }
+        map_level_indices.push_back(current_level_idx);
+        if (rep_level == _repetition_level) {
+            if (entry_counts.empty()) {
+                return Status::Corruption("Invalid repeated level for parquet MAP column {}",
+                                          _name);
+            }
+            if (def_level >= _definition_level) {
+                ++entry_counts.back();
+            }
+            continue;
+        }
+
+        const bool parent_is_null = def_level < _definition_level - 1;
+        if (parent_is_null && parent_null_map == nullptr) {
+            return Status::Corruption("Parquet MAP column {} contains null for non-nullable MAP",
+                                      _name);
+        }
+        parent_nulls.push_back(parent_is_null);
+        entry_counts.push_back(def_level >= _definition_level ? 1 : 0);
+        ++*values_read;
+    }
+    set_nested_build_level_cursor(level_idx);
+
+    uint64_t total_entries = 0;
+    for (const auto entry_count : entry_counts) {
+        total_entries += entry_count;
+    }
+    const size_t key_start = key_column->size();
+    int64_t key_value_count = 0;
+    RETURN_IF_ERROR(_key_reader->build_nested_column(static_cast<int64_t>(total_entries),
+                                                     key_column, &key_value_count));
+    if (key_value_count != static_cast<int64_t>(total_entries)) {
+        return Status::Corruption("Parquet MAP column {} built {} keys, expected {}", _name,
+                                  key_value_count, total_entries);
+    }
+    if (const auto* nullable_key_column = check_and_get_column<ColumnNullable>(*key_column);
+        nullable_key_column != nullptr &&
+        nullable_key_column->has_null(key_start, nullable_key_column->size())) {
+        return Status::Corruption("Parquet MAP column {} contains null key", _name);
+    }
+    int64_t value_count = 0;
+    if (auto* scalar_value_reader = dynamic_cast<ScalarColumnReader*>(_value_reader.get())) {
+        const auto& value_def_levels = scalar_value_reader->nested_definition_levels();
+        const auto& value_rep_levels = scalar_value_reader->nested_repetition_levels();
+        const int64_t value_levels_written = scalar_value_reader->nested_levels_written();
+        int64_t value_level_idx = scalar_value_reader->nested_build_level_cursor();
+        for (const int64_t key_level_idx : map_level_indices) {
+            while (value_level_idx < value_levels_written &&
+                   (value_rep_levels[value_level_idx] > _repetition_level ||
+                    value_def_levels[value_level_idx] < min_parent_definition_level ||
+                    (value_rep_levels[value_level_idx] >= _repetition_level &&
+                     value_def_levels[value_level_idx] < _repeated_ancestor_definition_level))) {
+                ++value_level_idx;
+            }
+            if (value_level_idx >= value_levels_written) {
+                return Status::Corruption(
+                        "Parquet MAP column {} value stream ended before key stream", _name);
+            }
+            // MAP is encoded as a repeated key/value struct. The key stream owns entry existence,
+            // but the value stream still has one shape slot for every consumed MAP slot. Consume
+            // value slots in lockstep with key slots so shape-only slots from empty/null maps do
+            // not become scalar values.
+            if (value_rep_levels[value_level_idx] != rep_levels[key_level_idx]) {
+                return Status::Corruption(
+                        "Parquet MAP column {} value repetition level is not aligned with key "
+                        "stream",
+                        _name);
+            }
+            if (def_levels[key_level_idx] >= _definition_level) {
+                RETURN_IF_ERROR(
+                        scalar_value_reader->append_nested_value(value_level_idx, value_column));
+                ++value_count;
+            }
+            ++value_level_idx;
+        }
+        scalar_value_reader->set_nested_build_level_cursor(value_level_idx);
+    } else {
+        // Complex MAP values own their nested shape below the entry slot, so they can recursively
+        // materialize exactly one child value for each MAP entry.
+        RETURN_IF_ERROR(_value_reader->build_nested_column(static_cast<int64_t>(total_entries),
+                                                           value_column, &value_count));
+    }
+    if (value_count != static_cast<int64_t>(total_entries)) {
+        return Status::Corruption("Parquet MAP column {} built {} values, expected {}", _name,
+                                  value_count, total_entries);
+    }
+
+    map_column->get_keys_ptr() = std::move(key_column);
+    map_column->get_values_ptr() = std::move(value_column);
+    append_offsets(map_column->get_offsets(), entry_counts);
+    append_parent_nulls(parent_null_map, parent_nulls);
+    return Status::OK();
+}
+
+const std::vector<int16_t>& MapColumnReader::nested_definition_levels() const {
+    DORIS_CHECK(_key_reader != nullptr);
+    return _key_reader->nested_definition_levels();
+}
+
+const std::vector<int16_t>& MapColumnReader::nested_repetition_levels() const {
+    DORIS_CHECK(_key_reader != nullptr);
+    return _key_reader->nested_repetition_levels();
+}
+
+int64_t MapColumnReader::nested_levels_written() const {
+    DORIS_CHECK(_key_reader != nullptr);
+    return _key_reader->nested_levels_written();
+}
+
+bool MapColumnReader::is_or_has_repeated_child() const {
+    return true;
+}
+
+void MapColumnReader::advance_nested_build_level_cursor_past_parent(
+        int16_t parent_repetition_level) {
+    DORIS_CHECK(_key_reader != nullptr);
+    DORIS_CHECK(_value_reader != nullptr);
+    ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    _key_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    _value_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/map_column_reader.h b/be/src/format_v2/parquet/reader/map_column_reader.h
new file mode 100644
index 00000000000000..753b1193714bf0
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/map_column_reader.h
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+// MAP 列的读取器，持有 key reader 和 value reader。
+//
+// key reader 始终完整读取（不做 projection 裁剪），因为它拥有：
+//   - entry 的存在性：null key → entry 无效
+//   - offsets 信息：从 key 的 rep levels 确定每个顶层行有多少个 entry
+//   - key 唯一性语义：重复 key 的行为由引擎层决定
+//
+// 嵌套协议流程：
+//   1. load_nested_batch() → 分别加载 key reader 和 value reader
+//   2. build_nested_column() →
+//      a. 从 key reader 的 rep levels 计算 entry_counts → 设置 ColumnMap offsets
+//      b. 从 key reader 的 def levels 判断 MAP 本身和每个 entry 的 null 状态
+//      c. 校验：key 为 NULL 的 entry 被标记为无效（兼容 Hive 的非标准 optional key）
+//      d. 委托 key reader 的 build_nested_column() 填充 keys
+//      e. 委托 value reader 的 build_nested_column() 填充 values
+//
+// MapColumnReader 是 ScalarColumnReader 的 friend，可以直接访问其内部方法
+// 来逐个读取 key value 做 entry 校验。
+class MapColumnReader final : public ParquetColumnReader {
+public:
+    MapColumnReader(const ParquetColumnSchema& schema, DataTypePtr type,
+                    std::unique_ptr<ParquetColumnReader> key_reader,
+                    std::unique_ptr<ParquetColumnReader> value_reader,
+                    ParquetColumnReaderProfile profile = {})
+            : ParquetColumnReader(schema, type, profile),
+              _key_reader(std::move(key_reader)),
+              _value_reader(std::move(value_reader)) {}
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+    bool is_or_has_repeated_child() const override;
+    void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override;
+
+private:
+    std::unique_ptr<ParquetColumnReader> _key_reader; // key 列 reader（始终完整读取）
+    std::unique_ptr<ParquetColumnReader> _value_reader; // value 列 reader（可按 projection 裁剪）
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/nested_column_materializer.cpp b/be/src/format_v2/parquet/reader/nested_column_materializer.cpp
new file mode 100644
index 00000000000000..9ea14a75cdb0a5
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/nested_column_materializer.cpp
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+
+#include <cstdint>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+
+namespace doris::format::parquet {
+
+// Doris 顶层列总是 Nullable 包装的：ColumnNullable → ColumnArray/ColumnMap/ColumnStruct。
+// 这些函数封装了穿透 Nullable wrapper 的逻辑，让调用方可以直接拿到嵌套 column。
+
+ColumnArray* array_column_from_output(MutableColumnPtr& column) {
+    // 穿透外层 ColumnNullable → 取内部 ColumnArray
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+        return assert_cast<ColumnArray*>(&nullable_column->get_nested_column());
+    }
+    // 非 nullable 路径（嵌套在 struct 内部时可能出现）
+    return assert_cast<ColumnArray*>(column.get());
+}
+
+ColumnMap* map_column_from_output(MutableColumnPtr& column) {
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+        return assert_cast<ColumnMap*>(&nullable_column->get_nested_column());
+    }
+    return assert_cast<ColumnMap*>(column.get());
+}
+
+ColumnStruct* struct_column_from_output(MutableColumnPtr& column) {
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+        return assert_cast<ColumnStruct*>(&nullable_column->get_nested_column());
+    }
+    return assert_cast<ColumnStruct*>(column.get());
+}
+
+NullMap* null_map_from_nullable_output(MutableColumnPtr& column) {
+    // 只有被 ColumnNullable 包装时才存在 null_map
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+        return &nullable_column->get_null_map_data();
+    }
+    // 嵌套在 required 父节点内部 → 没有独立的 null_map，父级负责标记
+    return nullptr;
+}
+
+void append_offsets(ColumnArray::Offsets64& offsets, const std::vector<uint64_t>& entry_counts) {
+    // offsets 是累积值：offsets[i] = sum(entry_counts[0..i])
+    // 最后一个 offset = 当前已累积的总元素数，用来作为下一次追加的起点
+    offsets.reserve(offsets.size() + entry_counts.size());
+    uint64_t current_offset = offsets.empty() ? 0 : offsets.back();
+    for (const auto entry_count : entry_counts) {
+        current_offset += entry_count;
+        offsets.push_back(current_offset);
+    }
+}
+
+void append_parent_nulls(NullMap* dst, const NullMap& src) {
+    if (dst == nullptr) {
+        return; // 目标列不是 nullable → 无需写入 null 标记
+    }
+    dst->insert(src.begin(), src.end());
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/nested_column_materializer.h b/be/src/format_v2/parquet/reader/nested_column_materializer.h
new file mode 100644
index 00000000000000..e95ce3e73d665f
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/nested_column_materializer.h
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "core/column/column.h"
+#include "core/column/column_array.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_struct.h"
+
+namespace doris::format::parquet {
+
+// ============================================================================
+// Doris Column 嵌套类型访问辅助函数
+//
+// 复杂 reader（StructColumnReader、ListColumnReader、MapColumnReader）通过
+// 这些函数从输出 Column 中获取嵌套部分的指针。
+//
+// 背景：Doris 的顶层列总是 Nullable 包装的（ColumnNullable），嵌套列本身
+// （ColumnArray、ColumnMap、ColumnStruct）在 ColumnNullable 内部。这些函数
+// 封装了"穿透 Nullable wrapper → 获取嵌套 column"的逻辑。
+// ============================================================================
+
+// 从输出列获取 ColumnArray* — 自动穿透外层的 ColumnNullable wrapper。
+ColumnArray* array_column_from_output(MutableColumnPtr& column);
+
+// 从输出列获取 ColumnMap* — 自动穿透外层的 ColumnNullable wrapper。
+ColumnMap* map_column_from_output(MutableColumnPtr& column);
+
+// 从输出列获取 ColumnStruct* — 自动穿透外层的 ColumnNullable wrapper。
+ColumnStruct* struct_column_from_output(MutableColumnPtr& column);
+
+// 从输出列获取 NullMap* — 自动穿透外层的 ColumnNullable wrapper。
+// 如果输出列不是 ColumnNullable（嵌套在非 nullable 的 struct 内部），返回 nullptr。
+NullMap* null_map_from_nullable_output(MutableColumnPtr& column);
+
+// 将 entry_counts 数组追加为 ColumnArray 的 offsets。
+// entry_counts[i] 表示第 i 个顶层行包含的元素数量，
+// offsets[i] = offsets[i-1] + entry_counts[i]。
+// 例如 entry_counts = [3, 0, 2] → offsets = [3, 3, 5]。
+void append_offsets(ColumnArray::Offsets64& offsets, const std::vector<uint64_t>& entry_counts);
+
+// 将 src 中的 null 标记追加到 dst 之后。
+// 用于 struct/literal null 嵌套场景：当父级为 NULL 时，其子列也需要对应行标记为 NULL。
+void append_parent_nulls(NullMap* dst, const NullMap& src);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp b/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp
new file mode 100644
index 00000000000000..80e2f74d23d777
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp
@@ -0,0 +1,716 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/parquet_leaf_reader.h"
+
+#include <arrow/array/array_binary.h>
+#include <parquet/api/schema.h>
+#include <parquet/column_reader.h>
+#include <parquet/exception.h>
+
+#include <algorithm>
+#include <bit>
+#include <cmath>
+#include <cstring>
+#include <exception>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type_serde/decoded_column_view.h"
+#include "core/string_ref.h"
+#include "runtime/runtime_profile.h"
+#include "util/simd/bits.h"
+
+namespace doris::format::parquet {
+namespace {
+
+// 将 ParquetTimeUnit 转换为 DataTypeSerde 层的 DecodedTimeUnit。
+DecodedTimeUnit decoded_time_unit(ParquetTimeUnit time_unit) {
+    switch (time_unit) {
+    case ParquetTimeUnit::MILLIS:
+        return DecodedTimeUnit::MILLIS;
+    case ParquetTimeUnit::MICROS:
+        return DecodedTimeUnit::MICROS;
+    case ParquetTimeUnit::NANOS:
+        return DecodedTimeUnit::NANOS;
+    case ParquetTimeUnit::UNKNOWN:
+    default:
+        return DecodedTimeUnit::UNKNOWN;
+    }
+}
+
+// 返回指定 DecodedValueKind 的单个值的字节大小。
+// Binary/FIXED_BINARY 返回 error（它们不是固定宽度类型）。
+Status decoded_fixed_value_size(const std::string& column_name, DecodedValueKind value_kind,
+                                size_t* value_size) {
+    switch (value_kind) {
+    case DecodedValueKind::BOOL:
+        *value_size = sizeof(bool);
+        return Status::OK();
+    case DecodedValueKind::INT32:
+        *value_size = sizeof(int32_t);
+        return Status::OK();
+    case DecodedValueKind::UINT32:
+        *value_size = sizeof(uint32_t);
+        return Status::OK();
+    case DecodedValueKind::INT64:
+        *value_size = sizeof(int64_t);
+        return Status::OK();
+    case DecodedValueKind::UINT64:
+        *value_size = sizeof(uint64_t);
+        return Status::OK();
+    case DecodedValueKind::INT96:
+        *value_size = 12;
+        return Status::OK();
+    case DecodedValueKind::FLOAT:
+        *value_size = sizeof(float);
+        return Status::OK();
+    case DecodedValueKind::DOUBLE:
+        *value_size = sizeof(double);
+        return Status::OK();
+    case DecodedValueKind::BINARY:
+    case DecodedValueKind::FIXED_BINARY:
+        return Status::InvalidArgument("Parquet binary value kind has no fixed value size for {}",
+                                       column_name);
+    }
+    return Status::InternalError("Unknown decoded value kind for column {}", column_name);
+}
+
+// 从 BinaryRecordReader 获取 Arrow Array chunks。
+// GetBuilderChunks() 会将 Arrow 内部 builder 的所有权转移出来并 reset，
+// 因此每个 batch 只能调用一次。
+Status get_binary_chunks(const std::string& column_name,
+                         ::parquet::internal::RecordReader& record_reader,
+                         std::vector<std::shared_ptr<::arrow::Array>>* chunks) {
+    auto* binary_reader = dynamic_cast<::parquet::internal::BinaryRecordReader*>(&record_reader);
+    if (binary_reader == nullptr) {
+        return Status::InternalError("Parquet binary record reader is not available for column {}",
+                                     column_name);
+    }
+    *chunks = binary_reader->GetBuilderChunks();
+    return Status::OK();
+}
+
+// 将 Arrow BinaryArray / FixedSizeBinaryArray 的 chunks 转换为 Doris StringRef 向量。
+//
+// read_dense_for_nullable 模式：Arrow 只输出了非 NULL 的紧凑值，本函数按 null_map
+// 将它们展开为与 records_read 对齐的稀疏数组（NULL 行填 nullptr + 0）。
+//
+// 非 dense 模式：直接按行一一对应转换。
+Status build_binary_values(const std::string& column_name,
+                           const std::vector<std::shared_ptr<::arrow::Array>>& chunks,
+                           int64_t records_read, const NullMap* null_map,
+                           bool read_dense_for_nullable, std::vector<StringRef>* binary_values) {
+    std::vector<StringRef> compact_values;
+    auto* values = read_dense_for_nullable ? &compact_values : binary_values;
+    values->reserve(records_read);
+    for (const auto& chunk : chunks) {
+        if (chunk == nullptr) {
+            return Status::Corruption(
+                    "Parquet binary record reader returned null chunk for column {}", column_name);
+        }
+        if (auto* binary_array = dynamic_cast<::arrow::BinaryArray*>(chunk.get())) {
+            for (int64_t row_idx = 0; row_idx < binary_array->length(); ++row_idx) {
+                if (binary_array->IsNull(row_idx)) {
+                    values->emplace_back(static_cast<const char*>(nullptr), 0);
+                    continue;
+                }
+                int32_t length = 0;
+                const uint8_t* value = binary_array->GetValue(row_idx, &length);
+                values->emplace_back(reinterpret_cast<const char*>(value), length);
+            }
+        } else if (auto* fixed_array = dynamic_cast<::arrow::FixedSizeBinaryArray*>(chunk.get())) {
+            for (int64_t row_idx = 0; row_idx < fixed_array->length(); ++row_idx) {
+                if (fixed_array->IsNull(row_idx)) {
+                    values->emplace_back(static_cast<const char*>(nullptr), 0);
+                    continue;
+                }
+                values->emplace_back(reinterpret_cast<const char*>(fixed_array->GetValue(row_idx)),
+                                     fixed_array->byte_width());
+            }
+        } else {
+            return Status::InternalError("Unexpected Arrow binary array type for column {}",
+                                         column_name);
+        }
+    }
+    if (read_dense_for_nullable) {
+        if (null_map == nullptr || null_map->size() != static_cast<size_t>(records_read)) {
+            return Status::Corruption(
+                    "Invalid dense nullable parquet null map for column {}: rows={}, null_map={}",
+                    column_name, records_read, null_map == nullptr ? 0 : null_map->size());
+        }
+        const int64_t non_null_count = static_cast<int64_t>(simd::count_zero_num(
+                reinterpret_cast<const int8_t*>(null_map->data()), null_map->size()));
+        if (compact_values.size() != static_cast<size_t>(non_null_count)) {
+            return Status::Corruption(
+                    "Invalid dense nullable parquet binary values for column {}: values={}, "
+                    "records={}, nulls={}",
+                    column_name, compact_values.size(), records_read,
+                    records_read - non_null_count);
+        }
+        binary_values->reserve(records_read);
+        size_t value_idx = 0;
+        for (int64_t record_idx = 0; record_idx < records_read; ++record_idx) {
+            if ((*null_map)[record_idx] != 0) {
+                binary_values->emplace_back(static_cast<const char*>(nullptr), 0);
+                continue;
+            }
+            binary_values->emplace_back(compact_values[value_idx++]);
+        }
+        return Status::OK();
+    }
+    if (binary_values->size() != static_cast<size_t>(records_read)) {
+        return Status::Corruption(
+                "Invalid parquet binary record read result for column {}: rows={}, records={}",
+                column_name, binary_values->size(), records_read);
+    }
+    return Status::OK();
+}
+
+// IEEE 754 half-precision (16-bit) → single-precision (32-bit) 转换。
+// Parquet FLOAT16 用 FIXED_LEN_BYTE_ARRAY(2) 存储，Doris 没有原生 Float16 类型，
+// 需要提升为 Float32。
+float half_to_float(uint16_t value) {
+    const uint32_t sign = (value & 0x8000U) << 16;
+    const uint32_t exponent = (value & 0x7C00U) >> 10;
+    const uint32_t mantissa = value & 0x03FFU;
+
+    if (exponent == 0) {
+        if (mantissa == 0) {
+            return std::bit_cast<float>(sign);
+        }
+        const float subnormal = std::ldexp(static_cast<float>(mantissa), -24);
+        return sign == 0 ? subnormal : -subnormal;
+    }
+    if (exponent == 0x1FU) {
+        return std::bit_cast<float>(sign | 0x7F800000U | (mantissa << 13));
+    }
+    return std::bit_cast<float>(sign | ((exponent + 112U) << 23) | (mantissa << 13));
+}
+
+// 将 Parquet FLOAT16 的 binary values 批量解码为 float 向量。
+// 每个 FLOAT16 值占 2 bytes，调用 half_to_float 逐值转换。
+Status build_float16_values(const std::string& column_name,
+                            const ParquetTypeDescriptor& type_descriptor,
+                            const std::vector<StringRef>& binary_values, int64_t row_count,
+                            std::vector<float>* float_values) {
+    if (type_descriptor.fixed_length != 2) {
+        return Status::Corruption("Invalid parquet Float16 length for column {}: {}", column_name,
+                                  type_descriptor.fixed_length);
+    }
+    if (binary_values.size() != static_cast<size_t>(row_count)) {
+        return Status::Corruption(
+                "Invalid parquet Float16 value count for column {}: values={}, rows={}",
+                column_name, binary_values.size(), row_count);
+    }
+    float_values->resize(static_cast<size_t>(row_count));
+    for (int64_t row = 0; row < row_count; ++row) {
+        const auto& binary_value = binary_values[static_cast<size_t>(row)];
+        if (binary_value.data == nullptr && binary_value.size == 0) {
+            (*float_values)[static_cast<size_t>(row)] = 0;
+            continue;
+        }
+        if (binary_value.data == nullptr || binary_value.size != 2) {
+            return Status::Corruption(
+                    "Invalid parquet Float16 value for column {} at row {}: data={}, size={}",
+                    column_name, row, binary_value.data == nullptr ? "null" : "non-null",
+                    binary_value.size);
+        }
+        uint16_t raw_value = 0;
+        std::memcpy(&raw_value, binary_value.data, sizeof(raw_value));
+        (*float_values)[static_cast<size_t>(row)] = half_to_float(raw_value);
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+// 将 RecordReader 的内部状态捕获为不可变的 ParquetLeafBatch。
+//
+// 该函数在 RecordReader::ReadRecords() 之后立即调用，将 Arrow 返回的
+// level/value buffer 指针（或 binary chunks 的所有权）快照到 batch 中。
+// 之后 batch 可以被多次读取（如先 build_null_map 再 append_values），
+// 不受 RecordReader 后续操作的干扰。
+Status ParquetLeafReader::collect_batch(::parquet::internal::RecordReader& record_reader,
+                                        ParquetLeafBatch* batch) const {
+    DORIS_CHECK(batch != nullptr);
+    batch->_def_levels = nullptr;
+    batch->_rep_levels = nullptr;
+    batch->_fixed_values = nullptr;
+    batch->_binary_chunks.clear();
+    // 根据 type_descriptor 确定 value_kind，控制后续 value 读取路径
+    batch->_value_kind = decoded_value_kind(_type_descriptor);
+    batch->_consumed_level_count = record_reader.levels_position();
+    batch->_decoded_level_count = record_reader.levels_written();
+    if (_descriptor->max_definition_level() > 0) {
+        batch->_def_levels = record_reader.def_levels();
+    }
+    if (_descriptor->max_repetition_level() > 0) {
+        batch->_rep_levels = record_reader.rep_levels();
+    }
+    batch->_read_dense_for_nullable = record_reader.read_dense_for_nullable();
+    batch->_values_written = record_reader.values_written();
+
+    // 固定宽度类型：values buffer 指针直接可用
+    if (!batch->is_binary_value()) {
+        batch->_fixed_values = record_reader.values();
+        return Status::OK();
+    }
+
+    // Binary 类型：必须通过 GetBuilderChunks() 获取所有权。
+    // GetBuilderChunks() 会转移 Arrow builder 所有权并 reset builder，
+    // 所以只能调用一次——这里就是那一次。
+    RETURN_IF_ERROR(get_binary_chunks(_name, record_reader, &batch->_binary_chunks));
+    // 从 chunks 重新计算 values_written（因为二进制值的计数方式不同）
+    batch->_values_written = 0;
+    for (const auto& chunk : batch->_binary_chunks) {
+        if (chunk == nullptr) {
+            return Status::Corruption(
+                    "Parquet binary record reader returned null chunk for column {}", _name);
+        }
+        batch->_values_written += chunk->length();
+    }
+    return Status::OK();
+}
+
+// 将 batch 中的值写入目标 Doris Column。
+//
+// 数据准备阶段（DecodedColumnView 填充前）：
+//   根据物理存储格式将值准备为 DataTypeSerde 可消费的形式：
+//   - FLOAT16: binary → half_to_float → float_values
+//   - Binary 类型: Arrow chunks → StringRef[]
+//   - 固定宽度 dense nullable: Arrow 紧凑值 → 间隔排列的 spaced_values
+//   - 固定宽度非 dense: 直接使用 batch._fixed_values 指针
+//
+// 物化阶段（DataTypeSerde::read_column_from_decoded_values）：
+//   根据 type_descriptor 的信息（decimal precision/scale、timestamp unit、timezone 等）
+//   将原始 bytes 转换为 Doris 的最终类型表示。
+Status ParquetLeafReader::append_values(const ParquetLeafBatch& batch, int64_t row_count,
+                                        const NullMap* null_map, MutableColumnPtr& column) const {
+    std::vector<StringRef> binary_values;
+    std::vector<uint8_t> spaced_values;
+    std::vector<float> float_values;
+    DecodedColumnView view;
+    view.value_kind = batch._value_kind;
+    view.time_unit = decoded_time_unit(_type_descriptor.time_unit);
+    view.row_count = row_count;
+    view.logical_integer_bit_width = _type_descriptor.integer_bit_width;
+    view.logical_integer_is_signed = !_type_descriptor.is_unsigned_integer;
+    view.decimal_precision = _type_descriptor.decimal_precision;
+    view.decimal_scale = _type_descriptor.decimal_scale;
+    view.fixed_length = _type_descriptor.fixed_length;
+    view.timestamp_is_adjusted_to_utc = _type_descriptor.timestamp_is_adjusted_to_utc;
+    view.timezone = _timezone;
+    view.enable_strict_mode = _enable_strict_mode;
+    view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data();
+    const bool read_dense_for_nullable = batch._read_dense_for_nullable && view.null_map != nullptr;
+
+    // 数据准备：根据物理存储格式填充 view
+    if (_type_descriptor.extra_type_info == ParquetExtraTypeInfo::FLOAT16) {
+        // FLOAT16: FIXED_LEN_BYTE_ARRAY(2) → half_to_float → float 向量
+        RETURN_IF_ERROR(build_binary_values(_name, batch._binary_chunks, row_count, null_map,
+                                            read_dense_for_nullable, &binary_values));
+        RETURN_IF_ERROR(build_float16_values(_name, _type_descriptor, binary_values, row_count,
+                                             &float_values));
+        view.value_kind = DecodedValueKind::FLOAT;
+        view.values = reinterpret_cast<const uint8_t*>(float_values.data());
+    } else if (batch.is_binary_value()) {
+        // STRING / DECIMAL_BYTE_ARRAY / ENUM / JSON 等
+        RETURN_IF_ERROR(build_binary_values(_name, batch._binary_chunks, row_count, null_map,
+                                            read_dense_for_nullable, &binary_values));
+        view.binary_values = &binary_values;
+    } else if (read_dense_for_nullable) {
+        // 固定宽度 + dense nullable: 需要展开为间隔排列
+        RETURN_IF_ERROR(build_spaced_fixed_values(batch, row_count, null_map, &spaced_values));
+        view.values = spaced_values.data();
+    } else {
+        // 固定宽度 + 非 nullable 或 非 dense: values 指针直接可用
+        view.values = batch._fixed_values;
+    }
+
+    if (_decoded_value_appender != nullptr) {
+        return _decoded_value_appender(column, view);
+    }
+
+    {
+        SCOPED_TIMER(_profile.materialization_time);
+        // 通过 DataTypeSerde 完成类型感知的值写入。
+        // 对于 nullable 类型，serde 会直接写入 null_map + nested_column。
+        // 对于非 nullable 类型（嵌套场景），当前实现临时走 ColumnNullable 兼容路径。
+        if (!_type->is_nullable()) {
+            if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column);
+                nullable_column != nullptr) {
+                auto& nested_column = nullable_column->get_nested_column();
+                auto& tmp_null_map = nullable_column->get_null_map_data();
+                const auto old_nested_size = nested_column.size();
+                const auto old_null_map_size = tmp_null_map.size();
+                auto st = _type->get_serde()->read_column_from_decoded_values(nested_column, view);
+                if (!st.ok()) {
+                    nested_column.resize(old_nested_size);
+                    return st;
+                }
+                tmp_null_map.resize(old_null_map_size + nested_column.size() - old_nested_size);
+                memset(tmp_null_map.data() + old_null_map_size, 0,
+                       tmp_null_map.size() - old_null_map_size);
+            } else {
+                RETURN_IF_ERROR(_type->get_serde()->read_column_from_decoded_values(*column, view));
+            }
+        } else {
+            RETURN_IF_ERROR(_type->get_serde()->read_column_from_decoded_values(*column, view));
+        }
+    }
+    return Status::OK();
+}
+
+// 判断当前 value_kind 是否为 binary 类型（需要走 Arrow chunks 路径而非 fixed_values 指针）。
+bool ParquetLeafBatch::is_binary_value() const {
+    return _value_kind == DecodedValueKind::BINARY || _value_kind == DecodedValueKind::FIXED_BINARY;
+}
+
+// 为 dense nullable 模式构建间隔排列的固定宽度值数组。
+//
+// Arrow RecordReader 在 read_dense_for_nullable 模式下只写非 NULL 值（紧凑排列，
+// 不包含 NULL 行的占位），本函数按 null_map 将紧凑值展开：
+//   - NULL 行：对应位置保留为 0（不会被 read_column_from_decoded_values 读取）
+//   - 非 NULL 行：从 compact buffer 中取下一个值写入对应位置
+//
+// 例如：null_map = [0,1,0,1,0]，values = [v0,v1,v2]
+//         展开后（逻辑上）= [v0, -, v1, -, v2]
+Status ParquetLeafReader::build_spaced_fixed_values(const ParquetLeafBatch& batch,
+                                                    int64_t row_count, const NullMap* null_map,
+                                                    std::vector<uint8_t>* spaced_values) const {
+    DORIS_CHECK(null_map != nullptr);
+    DORIS_CHECK(spaced_values != nullptr);
+    size_t value_size = 0;
+    RETURN_IF_ERROR(decoded_fixed_value_size(_name, batch._value_kind, &value_size));
+    spaced_values->resize(static_cast<size_t>(row_count) * value_size);
+    const auto non_null_count = static_cast<int64_t>(simd::count_zero_num(
+            reinterpret_cast<const int8_t*>(null_map->data()), null_map->size()));
+    // 完整性校验：紧凑值数量必须等于非 NULL 行数
+    if (batch._values_written != non_null_count) {
+        return Status::Corruption(
+                "Invalid dense nullable parquet values for column {}: values={}, records={}, "
+                "nulls={}",
+                _name, batch._values_written, row_count, row_count - non_null_count);
+    }
+    auto* dst = spaced_values->data();
+    int64_t value_idx = 0;
+    for (int64_t record_idx = 0; record_idx < row_count; ++record_idx) {
+        if ((*null_map)[record_idx] != 0) {
+            continue; // NULL 行：跳过，对应位置保持为 0
+        }
+        // 非 NULL 行：从紧凑 buffer 中取出下一个值，按 value_size 拷贝到对应行偏移
+        std::memcpy(dst + static_cast<size_t>(record_idx) * value_size,
+                    batch._fixed_values + static_cast<size_t>(value_idx) * value_size, value_size);
+        ++value_idx;
+    }
+    return Status::OK();
+}
+
+ParquetLeafReader::ParquetLeafReader(
+        const ::parquet::ColumnDescriptor* descriptor, ParquetTypeDescriptor type_descriptor,
+        DataTypePtr type, std::string name,
+        std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+        ParquetColumnReaderProfile profile, const cctz::time_zone* timezone,
+        bool enable_strict_mode,
+        std::function<Status(MutableColumnPtr&, const DecodedColumnView&)> decoded_value_appender)
+        : _descriptor(descriptor),
+          _type_descriptor(type_descriptor),
+          _type(std::move(type)),
+          _name(std::move(name)),
+          _record_reader(std::move(record_reader)),
+          _profile(profile),
+          _timezone(timezone),
+          _enable_strict_mode(enable_strict_mode),
+          _decoded_value_appender(std::move(decoded_value_appender)) {}
+
+// 从 Arrow RecordReader 读取 batch_rows 行，并将结果捕获到 ParquetLeafBatch 中。
+//
+// 步骤：
+// 1. Reset + Reserve: 准备 RecordReader 的内部缓冲区
+// 2. ReadRecords(batch_rows): 触发一次 data page 读取和解码（Dremel levels + values）
+// 3. collect_batch(): 将解码结果快照到 ParquetLeafBatch
+Status ParquetLeafReader::read_batch(int64_t batch_rows, ParquetLeafBatch* batch,
+                                     int64_t* rows_read) const {
+    if (batch == nullptr || rows_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet leaf batch result pointer for column {}",
+                                       _name);
+    }
+    if (_record_reader == nullptr) {
+        return Status::InternalError("Parquet record reader is not initialized for column {}",
+                                     _name);
+    }
+
+    try {
+        _record_reader->Reset();
+        _record_reader->Reserve(batch_rows);
+        {
+            SCOPED_TIMER(_profile.arrow_read_records_time);
+            // ReadRecords 返回实际读到的记录数，可能小于 batch_rows（到达 column chunk 末尾）
+            *rows_read = _record_reader->ReadRecords(batch_rows);
+        }
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to read parquet records for column {}: {}", _name,
+                                  e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to read parquet records for column {}: {}", _name,
+                                     e.what());
+    }
+    if (*rows_read < 0 || *rows_read > batch_rows) {
+        return Status::Corruption("Invalid parquet record read result for column {}: {}", _name,
+                                  *rows_read);
+    }
+    return collect_batch(*_record_reader, batch);
+}
+
+// 根据 batch 中的 definition levels 构建 Doris NullMap。
+//
+// 规则：def_level == max_definition_level → 非 NULL（null_map=0），否则为 NULL（null_map=1）。
+// 如果该列没有 optional/repeated 祖先（max_definition_level == 0），则所有值都非 NULL，
+// 直接返回 OK（不设置 null_map），由调用方按无 NULL 处理。
+Status ParquetLeafReader::build_null_map(const ParquetLeafBatch& batch, int64_t records_read,
+                                         NullMap* null_map) const {
+    // 无 optional 祖先 → 所有值都非 NULL，不需要 null_map
+    if (_descriptor->max_definition_level() == 0) {
+        return Status::OK();
+    }
+    auto* def_levels = batch.def_levels();
+    if (def_levels == nullptr && records_read > 0) {
+        return Status::Corruption(
+                "Parquet record reader returned null definition levels for nullable column {}",
+                _name);
+    }
+    const int16_t max_definition_level = _descriptor->max_definition_level();
+    null_map->resize(records_read);
+    auto* __restrict dst = null_map->data();
+    const auto* __restrict src = def_levels;
+    for (int64_t record_idx = 0; record_idx < records_read; ++record_idx) {
+        dst[record_idx] = src[record_idx] != max_definition_level;
+    }
+    return Status::OK();
+}
+
+// 嵌套叶子的一步式读取：read levels + values → 解析 value layout → 物化 values_column。
+//
+// 这是 ParquetLeafReader 中最复杂的函数。它处理 Arrow RecordReader 在不同场景下
+// 按不同方式写入 level/value 的复杂情况。
+//
+// 整体流程：
+//
+// 1. 调用 read_batch() 获取原始的 level/value 数据。
+//
+// 2. 将 def/rep levels 拷贝到 batch 中（因为 RecordReader 的 level 数组可能被后续读取覆盖）。
+//
+// 3. 解析 value layout — Arrow RecordReader 在不同场景下按不同粒度和方式写入 value：
+//    - LEVELS:            value 与 level 一一对应（每个 level slot 恰好有一个 value）
+//    - VALUE_SLOTS:       value 数量 == 满足 value_slot_definition_level 的 slot 数
+//    - LEAF_VALUES:       value 数量 == def_level == max_definition_level 的 slot 数（真正的叶值）
+//    - PAYLOAD_VALUE_SLOTS: value 数量 == 降级后的 payload_slot_definition_level slot 数
+//      这种降级发生在 Arrow 为 NULL 祖先写入 value placeholder 时（见下文）。
+//
+// 4. 构建 value_indices[] 映射：level_idx → value buffer 中的位置（-1 表示该 slot 无 value）。
+//
+// 5. 构建 value_nulls[]：标记每个 value 是否为 NULL。
+//
+// 6. 调用 append_values() 物化 values_column（非 nullable 的基本类型）。
+//
+// 关于 PAYLOAD_VALUE_SLOTS 降级（count_value_slots 的 while 循环）：
+//   Arrow 的 RecordReader 有时会为不满足 Doris 物化阈值的 NULL 祖先写入 value placeholder。
+//   例如 MAP value 在 def_level 不足时，Arrow 仍可能分配一个 value slot 但写入占位值。
+//   此时 values_written > value_slot_count（按标准 threshold 计算的 slot 数）。
+//   代码尝试逐步降低 payload_slot_definition_level，直到找到匹配 value 数目的 threshold，
+//   确保 value_indices 映射和 values_written 对齐，不会把占位值错配给真实 slot。
+Status ParquetLeafReader::read_nested_batch(int64_t batch_rows, int16_t value_slot_definition_level,
+                                            ParquetNestedScalarBatch* batch,
+                                            int16_t value_slot_repetition_level) const {
+    ParquetLeafBatch leaf_batch;
+    int64_t records_read = 0;
+    RETURN_IF_ERROR(read_batch(batch_rows, &leaf_batch, &records_read));
+    return build_nested_batch_from_leaf_batch(leaf_batch, records_read, value_slot_definition_level,
+                                              batch, value_slot_repetition_level);
+}
+
+Status ParquetLeafReader::build_nested_batch_from_leaf_batch(
+        const ParquetLeafBatch& leaf_batch, int64_t records_read,
+        int16_t value_slot_definition_level, ParquetNestedScalarBatch* batch,
+        int16_t value_slot_repetition_level) const {
+    if (batch == nullptr) {
+        return Status::InvalidArgument("Nested scalar batch is null for column {}", _name);
+    }
+    *batch = ParquetNestedScalarBatch();
+    batch->value_slot_definition_level = value_slot_definition_level;
+    batch->value_slot_repetition_level = value_slot_repetition_level;
+
+    batch->records_read = records_read;
+    if (_type->is_nullable() && leaf_batch.read_dense_for_nullable()) {
+        return Status::NotSupported(
+                "Dense nullable parquet nested reader is not supported for column {}", _name);
+    }
+    batch->levels_written = leaf_batch.consumed_level_count();
+    const int64_t values_written = leaf_batch.values_written();
+    if (batch->levels_written > leaf_batch.decoded_level_count()) {
+        return Status::Corruption(
+                "Invalid nested parquet level position for column {}: position={}, levels={}",
+                _name, batch->levels_written, leaf_batch.decoded_level_count());
+    }
+    if (batch->levels_written == 0 && batch->records_read > 0 &&
+        values_written == batch->records_read && _descriptor->max_definition_level() == 0 &&
+        _descriptor->max_repetition_level() == 0) {
+        batch->levels_written = batch->records_read;
+    }
+    if (batch->levels_written < batch->records_read || values_written < 0 ||
+        values_written > batch->levels_written) {
+        return Status::Corruption(
+                "Invalid nested parquet read result for column {}: rows={}, levels={}, values={}",
+                _name, batch->records_read, batch->levels_written, values_written);
+    }
+    if (batch->levels_written == 0) {
+        return Status::OK();
+    }
+
+    auto* def_levels = leaf_batch.def_levels();
+    if (def_levels == nullptr && _descriptor->max_definition_level() > 0) {
+        return Status::Corruption(
+                "Nested parquet reader returned null definition levels for column {}", _name);
+    }
+    batch->def_levels.resize(static_cast<size_t>(batch->levels_written));
+    if (_descriptor->max_definition_level() == 0 || def_levels == nullptr) {
+        std::fill(batch->def_levels.begin(), batch->def_levels.end(),
+                  _descriptor->max_definition_level());
+    } else {
+        std::copy(def_levels, def_levels + batch->levels_written, batch->def_levels.begin());
+    }
+
+    auto* rep_levels = leaf_batch.rep_levels();
+    if (rep_levels == nullptr && _descriptor->max_repetition_level() > 0) {
+        return Status::Corruption(
+                "Nested parquet reader returned null repetition levels for column {}", _name);
+    }
+    batch->rep_levels.resize(static_cast<size_t>(batch->levels_written));
+    if (_descriptor->max_repetition_level() == 0 || rep_levels == nullptr) {
+        std::fill(batch->rep_levels.begin(), batch->rep_levels.end(), 0);
+    } else {
+        std::copy(rep_levels, rep_levels + batch->levels_written, batch->rep_levels.begin());
+    }
+
+    const int16_t leaf_definition_level = _descriptor->max_definition_level();
+    // Arrow's RecordReader may emit value placeholders for null ancestors that are below the
+    // Doris materialization threshold. Those slots must still advance the payload value index;
+    // otherwise the next defined child level points at the placeholder instead of its real value.
+    auto count_value_slots = [&](int16_t slot_definition_level) {
+        int64_t slot_count = 0;
+        for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) {
+            if (batch->def_levels[level_idx] >= slot_definition_level &&
+                batch->rep_levels[level_idx] <= value_slot_repetition_level) {
+                ++slot_count;
+            }
+        }
+        return slot_count;
+    };
+
+    const int64_t value_slot_count = count_value_slots(value_slot_definition_level);
+    int16_t payload_slot_definition_level = value_slot_definition_level;
+    int64_t payload_value_slot_count = value_slot_count;
+    while (payload_slot_definition_level > 0 && payload_value_slot_count < values_written) {
+        --payload_slot_definition_level;
+        payload_value_slot_count = count_value_slots(payload_slot_definition_level);
+    }
+
+    int64_t leaf_value_count = 0;
+    for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) {
+        if (batch->def_levels[level_idx] < value_slot_definition_level ||
+            batch->rep_levels[level_idx] > value_slot_repetition_level) {
+            continue;
+        }
+        if (batch->def_levels[level_idx] == leaf_definition_level) {
+            ++leaf_value_count;
+        }
+    }
+
+    enum class ValueLayout { LEVELS, VALUE_SLOTS, LEAF_VALUES, PAYLOAD_VALUE_SLOTS };
+    ValueLayout value_layout = ValueLayout::LEAF_VALUES;
+    if (values_written == batch->levels_written) {
+        value_layout = ValueLayout::LEVELS;
+    } else if (values_written == value_slot_count) {
+        value_layout = ValueLayout::VALUE_SLOTS;
+    } else if (values_written == leaf_value_count) {
+        value_layout = ValueLayout::LEAF_VALUES;
+    } else if (values_written == payload_value_slot_count) {
+        value_layout = ValueLayout::PAYLOAD_VALUE_SLOTS;
+    } else {
+        return Status::Corruption(
+                "Nested parquet reader returned inconsistent value count for column {}: values={}, "
+                "levels={}, slots={}, leaf_values={}, payload_slots={}, "
+                "payload_slot_definition_level={}",
+                _name, values_written, batch->levels_written, value_slot_count, leaf_value_count,
+                payload_value_slot_count, payload_slot_definition_level);
+    }
+
+    batch->value_indices.resize(static_cast<size_t>(batch->levels_written), -1);
+    NullMap value_nulls(static_cast<size_t>(values_written), 1);
+    int64_t value_idx = 0;
+    const int16_t decoded_slot_definition_level = value_layout == ValueLayout::PAYLOAD_VALUE_SLOTS
+                                                          ? payload_slot_definition_level
+                                                          : value_slot_definition_level;
+    for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) {
+        if (batch->def_levels[level_idx] < decoded_slot_definition_level ||
+            batch->rep_levels[level_idx] > value_slot_repetition_level) {
+            continue;
+        }
+        const bool has_leaf_value = batch->def_levels[level_idx] == leaf_definition_level;
+        int64_t decoded_value_idx = -1;
+        if (value_layout == ValueLayout::LEVELS) {
+            decoded_value_idx = level_idx;
+        } else if (value_layout == ValueLayout::VALUE_SLOTS) {
+            decoded_value_idx = value_idx++;
+        } else if (value_layout == ValueLayout::PAYLOAD_VALUE_SLOTS) {
+            decoded_value_idx = value_idx++;
+        } else {
+            if (!has_leaf_value) {
+                continue;
+            }
+            decoded_value_idx = value_idx++;
+        }
+        DORIS_CHECK(decoded_value_idx >= 0);
+        DORIS_CHECK(decoded_value_idx < values_written);
+        if (has_leaf_value) {
+            batch->value_indices[static_cast<size_t>(level_idx)] = decoded_value_idx;
+            value_nulls[static_cast<size_t>(decoded_value_idx)] = 0;
+        }
+    }
+    if (value_layout != ValueLayout::LEVELS && value_idx != values_written) {
+        return Status::Corruption(
+                "Nested parquet reader value cursor stopped early for column {}: values={}, "
+                "visited={}",
+                _name, values_written, value_idx);
+    }
+
+    const auto value_type = remove_nullable(_type);
+    batch->values_column = value_type->create_column();
+    if (values_written > 0) {
+        ParquetLeafReader value_reader(_descriptor, _type_descriptor, value_type, _name,
+                                       _record_reader, _profile, _timezone, _enable_strict_mode);
+        RETURN_IF_ERROR(value_reader.append_values(leaf_batch, values_written, &value_nulls,
+                                                   batch->values_column));
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/parquet_leaf_reader.h b/be/src/format_v2/parquet/reader/parquet_leaf_reader.h
new file mode 100644
index 00000000000000..a7b8f8c541634e
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/parquet_leaf_reader.h
@@ -0,0 +1,220 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/column/column.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type_serde/decoded_column_view.h"
+#include "format_v2/parquet/parquet_profile.h"
+#include "format_v2/parquet/parquet_type.h"
+
+namespace parquet {
+class ColumnDescriptor;
+
+namespace internal {
+class RecordReader;
+} // namespace internal
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace arrow {
+class Array;
+} // namespace arrow
+
+namespace doris::format::parquet {
+
+struct ParquetLeafReaderTestAccess;
+
+// 嵌套标量叶子的读取结果，将 Dremel 编码的 shape 和实际 value 分离。
+//
+// 设计意图：复杂 reader（LIST/MAP/STRUCT）先消费 shape（def_levels + rep_levels + value_indices）
+// 重建容器结构（offsets + null_map），再按 value_indices 将 values_column 写入容器。
+//
+// 例子：MAP<STRING, INT> 的 value 列在 rep_level=[0,1,0] 表示 2 个 entry 的 3 个 slot。
+//       复杂 reader 先根据 rep_levels 确定 offsets=[0,2]，再根据 value_indices 把 value 写入对应 slot。
+//
+// 字段说明：
+//   records_read         - 本批读到的顶层记录数（从 ReadRecords 返回）
+//   levels_written       - 本批实际产生的 level 数（= consumed_level_count）
+//   value_slot_definition_level - 有资格包含 value 的 slot 的最小 def level（由父 reader 设定）
+//   value_slot_repetition_level - 有资格包含 value 的 slot 的最大 rep level（由父 reader 设定）
+//   def_levels[]         - definition levels 的拷贝
+//   rep_levels[]         - repetition levels 的拷贝
+//   value_indices[]      - level_idx → value buffer 中的下标（-1 表示该 slot 无 value，如 NULL key）
+//   values_column        - 物化后的 value 列（非 nullable 的类型）
+struct ParquetNestedScalarBatch {
+    int64_t records_read = 0;
+    int64_t levels_written = 0;
+    int16_t value_slot_definition_level = 0;
+    int16_t value_slot_repetition_level = std::numeric_limits<int16_t>::max();
+    std::vector<int16_t> def_levels;
+    std::vector<int16_t> rep_levels;
+    std::vector<int64_t> value_indices;
+    MutableColumnPtr values_column;
+
+    bool empty() const { return levels_written == 0; }
+};
+
+// Arrow RecordReader 一次 ReadRecords() 后的批次结果视图。
+//
+// 该类将 Arrow RecordReader 的内部状态"快照化"为一个不可变的视图，解决两个问题：
+// 1. BinaryRecordReader 通过 GetBuilderChunks() 返回值的所有权，而固定宽度类型通过 values()。
+//    统一为 ParquetLeafBatch 后，外部只需判断 is_binary_value() 选择数据源。
+// 2. Arrow RecordReader 的数据是 one-shot transfer 语义（GetBuilderChunks 会 reset builder），
+//    ParquetLeafBatch 将数据捕获后，允许多次读取（如先 build_null_map 再 append_values）。
+//
+// 字段说明：
+//   _consumed_level_count  - 本次读取前 RecordReader 已经消费的 level 数（= levels_position）
+//   _decoded_level_count   - 本次读取后 RecordReader 解码出的 level 总数（= levels_written）
+//   _values_written        - 本批写出的 value 个数
+//   _def_levels / _rep_levels - 指向 RecordReader 内部 level 数组的指针（非拥有）
+//   _fixed_values          - 固定宽度类型的 value buffer 指针（非拥有）
+//   _binary_chunks         - Binary 类型的 Arrow Array chunks（拥有所有权）
+//   _read_dense_for_nullable - RecordReader 是否为 nullable 列启用了 dense 模式
+class ParquetLeafBatch {
+public:
+    int64_t consumed_level_count() const { return _consumed_level_count; }
+    int64_t decoded_level_count() const { return _decoded_level_count; }
+    int64_t values_written() const { return _values_written; }
+    bool read_dense_for_nullable() const { return _read_dense_for_nullable; }
+    const int16_t* def_levels() const { return _def_levels; }
+    const int16_t* rep_levels() const { return _rep_levels; }
+
+private:
+    friend class ParquetLeafReader;
+
+    bool is_binary_value() const;
+
+    DecodedValueKind _value_kind = DecodedValueKind::INT32;
+    int64_t _consumed_level_count = 0;
+    int64_t _decoded_level_count = 0;
+    int64_t _values_written = 0;
+    const int16_t* _def_levels = nullptr;
+    const int16_t* _rep_levels = nullptr;
+    const uint8_t* _fixed_values = nullptr;
+    bool _read_dense_for_nullable = false;
+    std::vector<std::shared_ptr<::arrow::Array>> _binary_chunks;
+};
+
+// Parquet 原始类型叶子的值读取器。每个 ScalarColumnReader 在读取时创建一个临时 ParquetLeafReader。
+//
+// 职责：包装 Arrow 的 RecordReader，将其解码出的 type-erased level/value buffer 转换为 Doris
+// Column 可以消费的形式。
+//
+// 该类不持有任何可变状态（除 _record_reader 的 shared_ptr），因此是 const 可调用的。
+// RecordReader 本身由 ParquetColumnReaderFactory 按 leaf_column_id 缓存和共享。
+//
+// 对外提供两组接口：
+//
+// ① 平铺列（top-level primitive column）的读取路径：
+//      read_batch() → build_null_map() + append_values()
+//      适用于非嵌套的基本类型列，如 SELECT id, name FROM t。
+//
+// ② 嵌套叶子（nested LIST/MAP/STRUCT 内的 primitive leaf）的读取路径：
+//      read_nested_batch()
+//      一步完成"读取 level/value → 解析 value slot 映射 → 物化 values_column"，
+//      返回 ParquetNestedScalarBatch 供父 reader 组装容器结构。
+class ParquetLeafReader {
+public:
+    ParquetLeafReader(const ::parquet::ColumnDescriptor* descriptor,
+                      ParquetTypeDescriptor type_descriptor, DataTypePtr type, std::string name,
+                      std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+                      ParquetColumnReaderProfile profile = {},
+                      const cctz::time_zone* timezone = nullptr, bool enable_strict_mode = false,
+                      std::function<Status(MutableColumnPtr&, const DecodedColumnView&)>
+                              decoded_value_appender = nullptr);
+
+    // ①a. 从 Arrow RecordReader 读取 batch_rows 行，将结果捕获到 ParquetLeafBatch 中。
+    // 调用方拿到 batch 后可以多次访问 level 和 value 信息。
+    Status read_batch(int64_t batch_rows, ParquetLeafBatch* batch, int64_t* rows_read) const;
+
+    // ①b. 根据 batch 中的 definition levels 构建 Doris NullMap。
+    // def_level == max_definition_level → 非 NULL，否则为 NULL。
+    // 如果该列没有 optional/repeated 祖先（max_definition_level == 0），直接返回 OK。
+    Status build_null_map(const ParquetLeafBatch& batch, int64_t records_read,
+                          NullMap* null_map) const;
+
+    // ①c. 将 batch 中的值写入目标 Doris Column。
+    // - 固定宽度类型：直接从 _fixed_values 指针读取
+    // - Binary 类型：从 _binary_chunks 构造 StringRef 向量
+    // - FLOAT16 类型：从 binary 解码后转为 float
+    // - dense nullable 模式：先按 null_map 展开为间隔排列的 values 再写入
+    // 值转换（如 INT64 timestamp → DateTime）通过 DataTypeSerde::read_column_from_decoded_values 完成。
+    Status append_values(const ParquetLeafBatch& batch, int64_t row_count, const NullMap* null_map,
+                         MutableColumnPtr& column) const;
+
+    // ② 嵌套叶子的一步式读取。内部调用 read_batch() 获取 level/value，
+    // 然后解析 value layout（Arrow 的 RecordReader 在不同场景下按不同粒度写入 value：
+    // LEVELS / VALUE_SLOTS / LEAF_VALUES / PAYLOAD_VALUE_SLOTS），
+    // 构建 value_indices 映射和 value_nulls，最后调用 append_values() 物化 values_column。
+    //
+    // value_slot_definition_level: 有资格容纳 value 的 slot 的最小 def level。
+    //   例如 MAP key 的 value_slot_definition_level = key 的 max_dl（只有 def>=max_dl 的 slot 才有 key 值）。
+    // value_slot_repetition_level: 有资格容纳 value 的 slot 的最大 rep level。
+    //   用于过滤属于其他 repeated 层级（如嵌套 LIST inside MAP）的 slot。
+    Status read_nested_batch(
+            int64_t batch_rows, int16_t value_slot_definition_level,
+            ParquetNestedScalarBatch* batch,
+            int16_t value_slot_repetition_level = std::numeric_limits<int16_t>::max()) const;
+
+private:
+    friend struct ParquetLeafReaderTestAccess;
+
+    // 将 RecordReader 的内部状态捕获为不可变的 ParquetLeafBatch。
+    // 分别处理固定宽度类型（values()）和 binary 类型（GetBuilderChunks()）。
+    Status collect_batch(::parquet::internal::RecordReader& record_reader,
+                         ParquetLeafBatch* batch) const;
+
+    // 为 dense nullable 模式构建间隔排列的固定宽度值数组。
+    // Arrow RecordReader 在 read_dense_for_nullable 模式下只写非 NULL 值（紧凑排列），
+    // 本函数按 null_map 将它们展开为与行一一对应的间隔排列格式。
+    Status build_spaced_fixed_values(const ParquetLeafBatch& batch, int64_t row_count,
+                                     const NullMap* null_map,
+                                     std::vector<uint8_t>* spaced_values) const;
+
+    Status build_nested_batch_from_leaf_batch(const ParquetLeafBatch& leaf_batch,
+                                              int64_t records_read,
+                                              int16_t value_slot_definition_level,
+                                              ParquetNestedScalarBatch* batch,
+                                              int16_t value_slot_repetition_level) const;
+
+    const ::parquet::ColumnDescriptor* _descriptor =
+            nullptr;                        // Arrow 列描述符（physical_type, max_dl, max_rl）
+    ParquetTypeDescriptor _type_descriptor; // 类型编码信息（decimal 精度、timestamp 单位等）
+    DataTypePtr _type;                      // Doris 目标类型
+    std::string _name;                      // 列名（用于报错信息）
+    std::shared_ptr<::parquet::internal::RecordReader>
+            _record_reader;                     // Arrow 物理列读取器（共享所有权）
+    ParquetColumnReaderProfile _profile;        // Profile 计数器
+    const cctz::time_zone* _timezone = nullptr; // 时区（timestamp 转换用）
+    bool _enable_strict_mode = false;           // 严格模式（类型不匹配时是否报错）
+    std::function<Status(MutableColumnPtr&, const DecodedColumnView&)> _decoded_value_appender;
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/row_position_column_reader.cpp b/be/src/format_v2/parquet/reader/row_position_column_reader.cpp
new file mode 100644
index 00000000000000..85bd8e9dcaf165
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/row_position_column_reader.cpp
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/row_position_column_reader.h"
+
+#include <memory>
+
+#include "core/assert_cast.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+
+namespace doris::format::parquet {
+
+// 构造函数：创建一个匿名 ParquetColumnSchema（只有 name），类型为 Int64。
+// 虚拟列不对应任何 Parquet 物理列，因此不需要完整的 schema 信息。
+RowPositionColumnReader::RowPositionColumnReader(int64_t row_group_first_row,
+                                                 ParquetColumnReaderProfile profile)
+        : ParquetColumnReader(ParquetColumnSchema {.name = format::ROW_POSITION_COLUMN_NAME},
+                              std::make_shared<DataTypeInt64>(), profile),
+          _row_group_first_row(row_group_first_row) {}
+
+int RowPositionColumnReader::file_column_id() const {
+    return format::ROW_POSITION_COLUMN_ID;
+}
+
+int RowPositionColumnReader::parquet_leaf_column_id() const {
+    return -1;
+}
+
+const DataTypePtr& RowPositionColumnReader::type() const {
+    return _type;
+}
+
+const std::string& RowPositionColumnReader::name() const {
+    return _name;
+}
+
+Status RowPositionColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    if (column.get() == nullptr || rows_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet row position read result pointer");
+    }
+    if (rows < 0) {
+        return Status::InvalidArgument("Invalid parquet row position read rows {}", rows);
+    }
+    auto* vector_column = assert_cast<ColumnInt64*>(column.get());
+    auto& data = vector_column->get_data();
+    const auto old_size = data.size();
+    data.resize(old_size + rows);
+    for (int64_t row = 0; row < rows; ++row) {
+        data[old_size + row] = _row_group_first_row + _next_row_position + row;
+    }
+    _next_row_position += rows;
+    *rows_read = rows;
+    return Status::OK();
+}
+
+Status RowPositionColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    _next_row_position += rows;
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/row_position_column_reader.h b/be/src/format_v2/parquet/reader/row_position_column_reader.h
new file mode 100644
index 00000000000000..52aa629d4992bf
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/row_position_column_reader.h
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+// 虚拟列 reader：生成文件中当前行的位置序号（从 RowGroup 起始行号开始递增）。
+//
+// 不对应任何 Parquet 物理列，不持有 RecordReader。
+// read() 直接写入从 _row_group_first_row + _next_row_position 开始的连续 Int64 值。
+// skip() 只推进 _next_row_position 游标。
+//
+// 用于需要知道行在文件中位置的场景（如 Iceberg 的 file_row_position）。
+class RowPositionColumnReader final : public ParquetColumnReader {
+public:
+    explicit RowPositionColumnReader(int64_t row_group_first_row,
+                                     ParquetColumnReaderProfile profile = {});
+
+    int file_column_id() const override;
+    int parquet_leaf_column_id() const override;
+    const DataTypePtr& type() const override;
+    const std::string& name() const override;
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+
+private:
+    int64_t _row_group_first_row = 0; // 当前 RG 在文件中的起始行号
+    int64_t _next_row_position = 0;   // 下一个待输出的行位置
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/scalar_column_reader.cpp b/be/src/format_v2/parquet/reader/scalar_column_reader.cpp
new file mode 100644
index 00000000000000..2e13bc7c51685b
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/scalar_column_reader.cpp
@@ -0,0 +1,325 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+
+#include <parquet/api/reader.h>
+
+#include <algorithm>
+#include <exception>
+#include <utility>
+
+#include "core/column/column.h"
+#include "core/column/column_nullable.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "util/simd/bits.h"
+
+namespace doris::format::parquet {
+namespace {
+
+// 嵌套标量值游标 — 从 ParquetNestedScalarBatch 中按 level_idx 查找对应的 value。
+// MapColumnReader 利用它为每个 key slot 找到对应的 value 行。
+class ParquetNestedScalarValueCursor {
+public:
+    explicit ParquetNestedScalarValueCursor(const ParquetNestedScalarBatch* batch) { reset(batch); }
+
+    void reset(const ParquetNestedScalarBatch* batch) {
+        DORIS_CHECK(batch != nullptr);
+        _batch = batch;
+    }
+
+    Status value_index(const std::string& column_name, int64_t level_idx, int64_t* value_idx) {
+        DORIS_CHECK(_batch != nullptr);
+        DORIS_CHECK(value_idx != nullptr);
+        DORIS_CHECK(level_idx < _batch->levels_written);
+        DORIS_CHECK(level_idx >= 0);
+        DORIS_CHECK(static_cast<size_t>(level_idx) < _batch->value_indices.size());
+        const int64_t computed_value_idx = _batch->value_indices[static_cast<size_t>(level_idx)];
+        if (computed_value_idx < 0) {
+            return Status::Corruption("Nested parquet value is absent for column {}", column_name);
+        }
+        DORIS_CHECK(_batch->values_column.get() != nullptr);
+        if (computed_value_idx >= _batch->values_column->size()) {
+            return Status::Corruption("Nested parquet value index is out of range for column {}",
+                                      column_name);
+        }
+        *value_idx = computed_value_idx;
+        return Status::OK();
+    }
+
+private:
+    const ParquetNestedScalarBatch* _batch = nullptr;
+};
+
+// 将嵌套 batch 中 level_idx 位置的值追加到目标 column。
+// 如果目标 column 是 ColumnNullable，将值写入 nested column 并 push_back(0) 到 null map。
+Status append_scalar_batch_value(const ScalarColumnReader& column_reader,
+                                 const ParquetNestedScalarBatch& batch, int64_t level_idx,
+                                 ParquetNestedScalarValueCursor* value_cursor,
+                                 MutableColumnPtr& column) {
+    DORIS_CHECK(value_cursor != nullptr);
+    int64_t value_idx = -1;
+    RETURN_IF_ERROR(value_cursor->value_index(column_reader.name(), level_idx, &value_idx));
+    auto* nullable_column = check_and_get_column<ColumnNullable>(*column);
+    if (nullable_column != nullptr) {
+        nullable_column->get_nested_column().insert_from(*batch.values_column,
+                                                         static_cast<size_t>(value_idx));
+        nullable_column->get_null_map_data().push_back(0);
+        return Status::OK();
+    }
+    column->insert_from(*batch.values_column, static_cast<size_t>(value_idx));
+    return Status::OK();
+}
+
+} // namespace
+
+ScalarColumnReader::ScalarColumnReader(
+        const ParquetColumnSchema& column_schema,
+        std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+        const ParquetPageSkipPlan* page_skip_plan, const cctz::time_zone* timezone,
+        bool enable_strict_mode, ParquetColumnReaderProfile profile)
+        : ParquetColumnReader(column_schema, column_schema.type, profile),
+          _descriptor(column_schema.descriptor),
+          _type_descriptor(column_schema.type_descriptor),
+          _record_reader(std::move(record_reader)),
+          _page_skip_plan(page_skip_plan),
+          _timezone(timezone),
+          _enable_strict_mode(enable_strict_mode),
+          _nested_batch(std::make_unique<ParquetNestedScalarBatch>()) {}
+
+ScalarColumnReader::~ScalarColumnReader() = default;
+
+// 平铺读取：直接从 Arrow RecordReader 读 rows 行 → 构建 null_map → 物化到 Doris Column。
+//
+// 这是 ScalarColumnReader 最核心的路径，对应 ParquetLeafReader 的 ① 平铺读取流程。
+Status ScalarColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    if (column.get() == nullptr || rows_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet column read result pointer for column {}",
+                                       _name);
+    }
+    if (_record_reader == nullptr) {
+        return Status::InternalError("Parquet record reader is not initialized for column {}",
+                                     _name);
+    }
+    auto reader = leaf_reader();
+    ParquetLeafBatch leaf_batch;
+    RETURN_IF_ERROR(reader.read_batch(rows, &leaf_batch, rows_read));
+
+    NullMap null_map;
+    RETURN_IF_ERROR(reader.build_null_map(leaf_batch, *rows_read, &null_map));
+    const auto value_kind = decoded_value_kind(_type_descriptor);
+    const bool is_binary_value =
+            value_kind == DecodedValueKind::BINARY || value_kind == DecodedValueKind::FIXED_BINARY;
+    if (!is_binary_value && leaf_batch.read_dense_for_nullable() && !null_map.empty()) {
+        const int64_t non_null_count = static_cast<int64_t>(simd::count_zero_num(
+                reinterpret_cast<const int8_t*>(null_map.data()), null_map.size()));
+        const int64_t null_count = *rows_read - non_null_count;
+        if (leaf_batch.values_written() != non_null_count) {
+            return Status::Corruption(
+                    "Invalid dense nullable parquet record read result for column {}: values={}, "
+                    "records={}, nulls={}",
+                    _name, leaf_batch.values_written(), *rows_read, null_count);
+        }
+    } else if (!is_binary_value && !leaf_batch.read_dense_for_nullable() &&
+               leaf_batch.values_written() != *rows_read) {
+        return Status::Corruption(
+                "Invalid parquet record read result for column {}: values={}, records={}", _name,
+                leaf_batch.values_written(), *rows_read);
+    }
+
+    RETURN_IF_ERROR(reader.append_values(leaf_batch, *rows_read, &null_map, column));
+    advance_rows_read(*rows_read);
+    update_reader_read_rows(*rows_read);
+    return Status::OK();
+}
+
+Status ScalarColumnReader::skip_records(int64_t rows) {
+    if (_record_reader == nullptr) {
+        return Status::InternalError("Parquet record reader is not initialized for column {}",
+                                     _name);
+    }
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    int64_t skipped_rows = 0;
+    try {
+        _record_reader->Reset();
+        while (skipped_rows < rows) {
+            const int64_t skipped = _record_reader->SkipRecords(rows - skipped_rows);
+            if (skipped <= 0) {
+                return Status::Corruption(
+                        "Failed to skip parquet records for column {}: skipped {} of {} rows",
+                        _name, skipped_rows, rows);
+            }
+            skipped_rows += skipped;
+        }
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to skip parquet records for column {}: {}", _name,
+                                  e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to skip parquet records for column {}: {}", _name,
+                                     e.what());
+    }
+    update_reader_skip_rows(rows);
+    return Status::OK();
+}
+
+int64_t ScalarColumnReader::page_filtered_rows_to_skip(int64_t rows) const {
+    if (_page_skip_plan == nullptr || rows <= 0) {
+        return 0;
+    }
+    const int64_t skip_end = _row_group_rows_read + rows;
+    int64_t filtered_rows = 0;
+    for (const auto& range : _page_skip_plan->skipped_ranges) {
+        const int64_t range_end = range.start + range.length;
+        if (range_end <= _row_group_rows_read) {
+            continue;
+        }
+        if (range.start >= skip_end) {
+            break;
+        }
+        const int64_t start = std::max(range.start, _row_group_rows_read);
+        const int64_t end = std::min(range_end, skip_end);
+        if (start < end) {
+            // Scheduler gap skips are derived from page-index selected_ranges. A page-filtered
+            // range can only overlap such a gap when the whole data page is outside every selected
+            // range, so partial overlap would mean the planner and scheduler are out of sync.
+            DORIS_CHECK(start == range.start);
+            DORIS_CHECK(end == range_end);
+            filtered_rows += end - start;
+        }
+    }
+    return filtered_rows;
+}
+
+void ScalarColumnReader::advance_rows_read(int64_t rows) {
+    DORIS_CHECK(rows >= 0);
+    _row_group_rows_read += rows;
+}
+
+// 跳过 rows 行。分为两个阶段：
+// 1. page_filtered_rows_to_skip() — 计算落在 page skip range 内的行数（已由 page index 跳过）
+// 2. skip_records() — 对剩余行调用 RecordReader::SkipRecords()
+// 这两个阶段的行数之和等于 rows。
+Status ScalarColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+
+    const int64_t page_filtered_rows = page_filtered_rows_to_skip(rows);
+    DORIS_CHECK(page_filtered_rows <= rows);
+    const int64_t record_reader_skip_rows = rows - page_filtered_rows;
+    RETURN_IF_ERROR(skip_records(record_reader_skip_rows));
+    advance_rows_read(rows);
+    return Status::OK();
+}
+
+// 嵌套协议的 load 阶段：调用 ParquetLeafReader::read_nested_batch()。
+//
+// 关键参数 materialized_slot_definition_level：
+// Nullable 标量叶子需要为 NULL 占位符也保留 value slot。
+// 如果 _type->is_nullable()，将 slot threshold 降低 1 级（_definition_level - 1），
+// 这样即使 def_level < _definition_level（即 NULL）的 slot 也会有一个 value index。
+// The value index stream must advance on those null slots, otherwise later payload values shift.
+Status ScalarColumnReader::load_nested_batch(int64_t rows) {
+    DORIS_CHECK(_nested_batch != nullptr);
+    reset_nested_build_level_cursor();
+    const int16_t materialized_slot_definition_level =
+            static_cast<int16_t>(_definition_level - (_type->is_nullable() ? 1 : 0));
+    RETURN_IF_ERROR(leaf_reader().read_nested_batch(rows, materialized_slot_definition_level,
+                                                    _nested_batch.get(), _repetition_level));
+    advance_rows_read(_nested_batch->records_read);
+    update_reader_read_rows(_nested_batch->records_read);
+    return Status::OK();
+}
+
+Status ScalarColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                               int64_t* values_read) {
+    if (column.get() == nullptr || values_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet nested scalar build result for column {}",
+                                       _name);
+    }
+    DORIS_CHECK(_nested_batch != nullptr);
+    ParquetNestedScalarValueCursor value_cursor(_nested_batch.get());
+    const int16_t materialized_slot_definition_level = _nested_batch->value_slot_definition_level;
+    *values_read = 0;
+    int64_t level_idx = nested_build_level_cursor();
+    while (level_idx < _nested_batch->levels_written && *values_read < length_upper_bound) {
+        const int64_t current_level_idx = level_idx;
+        const int16_t def_level = _nested_batch->def_levels[current_level_idx];
+        const int16_t rep_level = _nested_batch->rep_levels[current_level_idx];
+        ++level_idx;
+        if (def_level < materialized_slot_definition_level || rep_level > _repetition_level) {
+            continue;
+        }
+        if (def_level == _definition_level) {
+            RETURN_IF_ERROR(append_scalar_batch_value(*this, *_nested_batch, current_level_idx,
+                                                      &value_cursor, column));
+        } else {
+            if (!_type->is_nullable() && def_level >= _nullable_definition_level) {
+                return Status::Corruption(
+                        "Parquet scalar column {} contains null for non-nullable field", _name);
+            }
+            column->insert_default();
+        }
+        ++*values_read;
+    }
+    set_nested_build_level_cursor(level_idx);
+    return Status::OK();
+}
+
+Status ScalarColumnReader::append_nested_value(int64_t level_idx, MutableColumnPtr& column) const {
+    if (column.get() == nullptr) {
+        return Status::InvalidArgument("Invalid parquet nested scalar append result for column {}",
+                                       _name);
+    }
+    DORIS_CHECK(_nested_batch != nullptr);
+    DORIS_CHECK(level_idx >= 0);
+    DORIS_CHECK(level_idx < _nested_batch->levels_written);
+    ParquetNestedScalarValueCursor value_cursor(_nested_batch.get());
+    const int16_t def_level = _nested_batch->def_levels[level_idx];
+    if (def_level == _definition_level) {
+        return append_scalar_batch_value(*this, *_nested_batch, level_idx, &value_cursor, column);
+    }
+    if (!_type->is_nullable()) {
+        return Status::Corruption("Parquet MAP column {} contains null for non-nullable value",
+                                  _name);
+    }
+    column->insert_default();
+    return Status::OK();
+}
+
+const std::vector<int16_t>& ScalarColumnReader::nested_definition_levels() const {
+    DORIS_CHECK(_nested_batch != nullptr);
+    return _nested_batch->def_levels;
+}
+
+const std::vector<int16_t>& ScalarColumnReader::nested_repetition_levels() const {
+    DORIS_CHECK(_nested_batch != nullptr);
+    return _nested_batch->rep_levels;
+}
+
+int64_t ScalarColumnReader::nested_levels_written() const {
+    DORIS_CHECK(_nested_batch != nullptr);
+    return _nested_batch->levels_written;
+}
+
+bool ScalarColumnReader::is_or_has_repeated_child() const {
+    return _repetition_level > 0;
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/scalar_column_reader.h b/be/src/format_v2/parquet/reader/scalar_column_reader.h
new file mode 100644
index 00000000000000..24131377b60a16
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/scalar_column_reader.h
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "format_v2/parquet/parquet_type.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/parquet/reader/parquet_leaf_reader.h"
+
+namespace parquet {
+class ColumnDescriptor;
+
+namespace internal {
+class RecordReader;
+} // namespace internal
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris::format::parquet {
+
+struct ScalarColumnReaderTestAccess;
+
+// 基本类型列的读取器，直接持有 Arrow RecordReader 并通过 ParquetLeafReader 读写值。
+//
+// 这是所有 ColumnReader 中唯一直接与 Arrow RecordReader 交互的 reader。
+// 它同时服务于两种场景：
+//   1. 顶层平铺列（如 SELECT id, name FROM t）→ 通过 read()/skip()/select()
+//   2. 复杂类型内部的叶子（如 MAP 的 key/value、LIST 的 element）→ 通过嵌套协议
+//      load_nested_batch() / build_nested_column()
+//
+// MapColumnReader 被声明为 friend，因为它需要直接访问 descriptor() 和 leaf_reader()
+// 来读取 key 列的 values 用于 entry 存在性校验。
+class ScalarColumnReader final : public ParquetColumnReader {
+    friend class MapColumnReader;
+    friend struct ScalarColumnReaderTestAccess;
+
+public:
+    ScalarColumnReader(const ParquetColumnSchema& column_schema,
+                       std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+                       const ParquetPageSkipPlan* page_skip_plan = nullptr,
+                       const cctz::time_zone* timezone = nullptr, bool enable_strict_mode = false,
+                       ParquetColumnReaderProfile profile = {});
+    ~ScalarColumnReader() override;
+
+    // ========== ① 平铺读取 ==========
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+
+    // ========== ② 嵌套协议 ==========
+
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+    bool is_or_has_repeated_child() const override;
+
+private:
+    // 将嵌套 batch 中单个 level_idx 对应的值写入目标 column。
+    // 被 MapColumnReader 用来逐个填充 value。
+    Status append_nested_value(int64_t level_idx, MutableColumnPtr& column) const;
+
+    const ::parquet::ColumnDescriptor* descriptor() const { return _descriptor; }
+
+    // 创建临时 ParquetLeafReader。每次调用都创建新实例（轻量级，只持有 shared_ptr）。
+    ParquetLeafReader leaf_reader() const {
+        return ParquetLeafReader(_descriptor, _type_descriptor, _type, _name, _record_reader,
+                                 _profile, _timezone, _enable_strict_mode);
+    }
+
+    void advance_rows_read(int64_t rows);
+    Status skip_records(int64_t rows);
+    // 计算 page skip 优化下当前 batch 内可以通过 page skip 跳过的行数。
+    int64_t page_filtered_rows_to_skip(int64_t rows) const;
+
+    const ::parquet::ColumnDescriptor* _descriptor = nullptr;          // Arrow 列描述符
+    ParquetTypeDescriptor _type_descriptor;                            // 类型编码信息
+    std::shared_ptr<::parquet::internal::RecordReader> _record_reader; // Arrow 物理列读取器
+    const ParquetPageSkipPlan* _page_skip_plan = nullptr; // page index 裁剪结果（可为 nullptr）
+    const cctz::time_zone* _timezone = nullptr;           // 时区
+    bool _enable_strict_mode = false;                     // 严格模式
+    int64_t _row_group_rows_read = 0;                     // 当前 RG 已读行数（游标）
+    std::unique_ptr<ParquetNestedScalarBatch> _nested_batch; // 嵌套读取的中间结果
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/struct_column_reader.cpp b/be/src/format_v2/parquet/reader/struct_column_reader.cpp
new file mode 100644
index 00000000000000..e84b5dc2298ed9
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/struct_column_reader.cpp
@@ -0,0 +1,279 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/struct_column_reader.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "core/column/column_struct.h"
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+
+namespace doris::format::parquet {
+
+// 选择提供 shape 信息的子 reader：返回第一个非 repeated 的子 reader。
+// 如果所有子 reader 都是 repeated 的，返回第一个子 reader。
+// shape source 的 def/rep levels 决定 struct 的 null 状态和嵌套边界。
+ParquetColumnReader* StructColumnReader::shape_source_reader() const {
+    for (const auto& child : _children) {
+        auto* child_reader = child.get();
+        DORIS_CHECK(child_reader != nullptr);
+        if (!child_reader->is_or_has_repeated_child()) {
+            return child_reader;
+        }
+    }
+    if (_children.empty()) {
+        return nullptr;
+    }
+    return _children[0].get();
+}
+
+Status StructColumnReader::advance_child_past_null_parent(ParquetColumnReader* child_reader,
+                                                          int64_t parent_level_idx) const {
+    DORIS_CHECK(child_reader != nullptr);
+    const int64_t next_child_cursor = parent_level_idx + 1;
+    if (auto* scalar_child = dynamic_cast<ScalarColumnReader*>(child_reader)) {
+        if (next_child_cursor > scalar_child->nested_levels_written()) {
+            return Status::Corruption(
+                    "Parquet STRUCT child {} ended before null parent row in column {}",
+                    scalar_child->name(), _name);
+        }
+        scalar_child->set_nested_build_level_cursor(
+                std::max(scalar_child->nested_build_level_cursor(), next_child_cursor));
+        return Status::OK();
+    }
+    if (auto* struct_child = dynamic_cast<StructColumnReader*>(child_reader);
+        struct_child != nullptr && !struct_child->is_or_has_repeated_child()) {
+        if (next_child_cursor > struct_child->nested_levels_written()) {
+            return Status::Corruption(
+                    "Parquet STRUCT child {} ended before null parent row in column {}",
+                    struct_child->name(), _name);
+        }
+        struct_child->set_nested_build_level_cursor(
+                std::max(struct_child->nested_build_level_cursor(), next_child_cursor));
+        for (auto& grandchild : struct_child->_children) {
+            RETURN_IF_ERROR(struct_child->advance_child_past_null_parent(grandchild.get(),
+                                                                         parent_level_idx));
+        }
+        return Status::OK();
+    }
+
+    int64_t child_cursor = child_reader->nested_build_level_cursor();
+    const auto& child_rep_levels = child_reader->nested_repetition_levels();
+    const int64_t child_levels_written = child_reader->nested_levels_written();
+    while (child_cursor < child_levels_written) {
+        const int16_t child_rep_level = child_rep_levels[child_cursor];
+        ++child_cursor;
+        if (!child_reader->is_or_has_repeated_child() || child_rep_level <= _repetition_level) {
+            break;
+        }
+    }
+    child_reader->set_nested_build_level_cursor(child_cursor);
+    return Status::OK();
+}
+
+Status StructColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    return build_nested_column(rows, column, rows_read);
+}
+
+Status StructColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    auto scratch_column = _type->create_column();
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    int64_t rows_read = 0;
+    RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read));
+    if (rows_read != rows) {
+        return Status::Corruption("Failed to skip parquet STRUCT column {}: skipped {} of {} rows",
+                                  _name, rows_read, rows);
+    }
+    update_reader_skip_rows(rows);
+    return Status::OK();
+}
+
+Status StructColumnReader::load_nested_batch(int64_t rows) {
+    reset_nested_build_level_cursor();
+    for (auto& child_reader : _children) {
+        DORIS_CHECK(child_reader != nullptr);
+        RETURN_IF_ERROR(child_reader->load_nested_batch(rows));
+    }
+    return Status::OK();
+}
+
+// STRUCT 的嵌套构建核心逻辑：
+//
+// 整体策略：STRUCT 拥有行对齐责任。子 reader 只消费 STRUCT 非 NULL 父行的 level stream，
+// NULL 父行为所有子列填充 default 占位符。
+//
+// 流程：
+// 1. 从 shape_source_reader 的 def/rep levels 解析 struct 的 null 状态，
+//    构建 parent_nulls[] 和 parent_level_indices[]。
+// 2. 遍历所有子 reader，按 parent_nulls 分批次调用子 reader 的 build_nested_column()：
+//    - 连续的非 NULL 父行 → 批量 build（如 100 个 present → build 100 个子列行）
+//    - NULL 父行 → 子列 insert_default()，并推进子 reader 的游标跳过该 NULL slot
+// 3. 将 parent_nulls 写入 struct 外层 ColumnNullable 的 null_map。
+//
+// 对于 NULL 父行下 ScalarColumnReader 子列的特殊处理：
+// Scalar 子列的 level stream 中 NULL struct parent 也占一个 slot（但低于 value threshold），
+// 需要将子 cursor 精确推进到 parent_level_idx + 1 以跳过该 slot。
+Status StructColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                               int64_t* values_read) {
+    if (column.get() == nullptr || values_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet struct build result pointer for column {}",
+                                       _name);
+    }
+    if (_children.empty()) {
+        column->resize(column->size() + static_cast<size_t>(length_upper_bound));
+        *values_read = length_upper_bound;
+        return Status::OK();
+    }
+    auto* struct_column = struct_column_from_output(column);
+    DORIS_CHECK(struct_column != nullptr);
+    auto* parent_null_map = null_map_from_nullable_output(column);
+    auto* shape_reader = shape_source_reader();
+    DORIS_CHECK(shape_reader != nullptr);
+    const auto& def_levels = shape_reader->nested_definition_levels();
+    const auto& rep_levels = shape_reader->nested_repetition_levels();
+    const int64_t levels_written = shape_reader->nested_levels_written();
+
+    NullMap parent_nulls;
+    std::vector<int64_t> parent_level_indices;
+    *values_read = 0;
+    int64_t level_idx = nested_build_level_cursor();
+    while (level_idx < levels_written) {
+        const int64_t current_level_idx = level_idx;
+        const int16_t def_level = def_levels[level_idx];
+        const int16_t rep_level = rep_levels[level_idx];
+        const bool starts_parent =
+                !shape_reader->is_or_has_repeated_child() || rep_level <= _repetition_level;
+        if (starts_parent && *values_read >= length_upper_bound) {
+            break;
+        }
+        ++level_idx;
+        if (def_level < _repeated_ancestor_definition_level) {
+            continue;
+        }
+        if (shape_reader->is_or_has_repeated_child() && rep_level > _repetition_level) {
+            continue;
+        }
+        const bool parent_is_null = def_level < _nullable_definition_level;
+        if (parent_is_null && parent_null_map == nullptr) {
+            return Status::Corruption(
+                    "Parquet STRUCT column {} contains null for non-nullable struct", _name);
+        }
+        parent_nulls.push_back(parent_is_null);
+        parent_level_indices.push_back(current_level_idx);
+        ++*values_read;
+    }
+    set_nested_build_level_cursor(level_idx);
+
+    std::vector<MutableColumnPtr> child_columns;
+    child_columns.reserve(struct_column->get_columns().size());
+    for (size_t child_idx = 0; child_idx < struct_column->get_columns().size(); ++child_idx) {
+        child_columns.push_back(struct_column->get_column_ptr(child_idx)->assert_mutable());
+    }
+    for (size_t child_idx = 0; child_idx < _children.size(); ++child_idx) {
+        const int output_idx = _child_output_indices[child_idx];
+        if (output_idx < 0) {
+            continue;
+        }
+        // STRUCT owns row alignment. Child readers consume only present parent rows from their
+        // level streams; null STRUCT parents become default placeholders in every child column.
+        // This mirrors Arrow's separation between struct validity and child array materialization,
+        // and avoids asking scalar/list/map children to invent values for an absent parent.
+        int64_t pending_present_rows = 0;
+        int64_t total_child_rows = 0;
+        auto flush_present_rows = [&]() -> Status {
+            if (pending_present_rows == 0) {
+                return Status::OK();
+            }
+            int64_t child_rows = 0;
+            RETURN_IF_ERROR(_children[child_idx]->build_nested_column(
+                    pending_present_rows, child_columns[output_idx], &child_rows));
+            if (child_rows != pending_present_rows) {
+                return Status::Corruption(
+                        "Parquet STRUCT child {} built {} rows, expected {} for column {}",
+                        _children[child_idx]->name(), child_rows, pending_present_rows, _name);
+            }
+            total_child_rows += child_rows;
+            pending_present_rows = 0;
+            return Status::OK();
+        };
+        for (size_t parent_idx = 0; parent_idx < parent_nulls.size(); ++parent_idx) {
+            const auto parent_is_null = parent_nulls[parent_idx];
+            if (!parent_is_null) {
+                ++pending_present_rows;
+                continue;
+            }
+            RETURN_IF_ERROR(flush_present_rows());
+            child_columns[output_idx]->insert_default();
+            RETURN_IF_ERROR(advance_child_past_null_parent(_children[child_idx].get(),
+                                                           parent_level_indices[parent_idx]));
+            ++total_child_rows;
+        }
+        RETURN_IF_ERROR(flush_present_rows());
+        if (total_child_rows != *values_read) {
+            return Status::Corruption(
+                    "Parquet STRUCT child {} built {} rows, expected {} for column {}",
+                    _children[child_idx]->name(), total_child_rows, *values_read, _name);
+        }
+    }
+    for (size_t child_idx = 0; child_idx < child_columns.size(); ++child_idx) {
+        struct_column->get_column_ptr(child_idx) = std::move(child_columns[child_idx]);
+    }
+    append_parent_nulls(parent_null_map, parent_nulls);
+    return Status::OK();
+}
+
+const std::vector<int16_t>& StructColumnReader::nested_definition_levels() const {
+    auto* shape_reader = shape_source_reader();
+    DORIS_CHECK(shape_reader != nullptr);
+    return shape_reader->nested_definition_levels();
+}
+
+const std::vector<int16_t>& StructColumnReader::nested_repetition_levels() const {
+    auto* shape_reader = shape_source_reader();
+    DORIS_CHECK(shape_reader != nullptr);
+    return shape_reader->nested_repetition_levels();
+}
+
+int64_t StructColumnReader::nested_levels_written() const {
+    auto* shape_reader = shape_source_reader();
+    DORIS_CHECK(shape_reader != nullptr);
+    return shape_reader->nested_levels_written();
+}
+
+bool StructColumnReader::is_or_has_repeated_child() const {
+    auto* shape_reader = shape_source_reader();
+    return shape_reader != nullptr && shape_reader->is_or_has_repeated_child();
+}
+
+void StructColumnReader::advance_nested_build_level_cursor_past_parent(
+        int16_t parent_repetition_level) {
+    ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    for (auto& child : _children) {
+        DORIS_CHECK(child != nullptr);
+        child->advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    }
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/struct_column_reader.h b/be/src/format_v2/parquet/reader/struct_column_reader.h
new file mode 100644
index 00000000000000..5dac7f2c1664c4
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/struct_column_reader.h
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+// STRUCT 列的读取器，持有多个子 reader 并按需协调读取。
+//
+// 实现策略：平铺读取和嵌套协议都委托给子 reader 完成。
+// - read(): 分别让每个被 projected 的子 reader 读取对应行数，组装 ColumnStruct。
+//   如果子 reader 自身是复杂类型（如 LIST inside STRUCT），递归调用其 read()。
+// - 嵌套协议：选择一个 shape source reader（第一个包含足够 level 信息的子 reader），
+//   从它读取 def/rep levels 来确定 struct 的 null 状态，然后让所有子 reader 同步构建。
+//
+// _child_output_indices 用于部分 projection 场景：
+//   例如 STRUCT<a INT, b STRING, c INT> 只读 a 和 c，
+//   则 _children = [a_reader, c_reader], _child_output_indices = [0, 2]。
+//   build_nested_column 时按 indices 将子列写入 ColumnStruct 的正确位置。
+class StructColumnReader final : public ParquetColumnReader {
+public:
+    StructColumnReader(const ParquetColumnSchema& schema, DataTypePtr type,
+                       std::vector<std::unique_ptr<ParquetColumnReader>> children,
+                       std::vector<int> child_output_indices,
+                       ParquetColumnReaderProfile profile = {})
+            : ParquetColumnReader(schema, type, profile),
+              _children(std::move(children)),
+              _child_output_indices(std::move(child_output_indices)) {
+        DCHECK_EQ(_children.size(), _child_output_indices.size());
+    }
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+    bool is_or_has_repeated_child() const override;
+    void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override;
+
+private:
+    // 选择提供 shape 信息的子 reader（第一个非空或包含 repeated 子节点的 reader）。
+    // shape source 的 def/rep levels 用于确定 struct 的 null 状态和嵌套边界。
+    ParquetColumnReader* shape_source_reader() const;
+    Status advance_child_past_null_parent(ParquetColumnReader* child_reader,
+                                          int64_t parent_level_idx) const;
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> _children; // 被 projected 的子 reader 列表
+    std::vector<int> _child_output_indices; // 子 reader → struct 输出位置的映射
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/selection_vector.h b/be/src/format_v2/parquet/selection_vector.h
new file mode 100644
index 00000000000000..8d2794d1df1345
--- /dev/null
+++ b/be/src/format_v2/parquet/selection_vector.h
@@ -0,0 +1,176 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "common/check.h"
+#include "common/status.h"
+
+namespace doris::format::parquet {
+
+struct RowRange {
+    int64_t start = 0;
+    int64_t length = 0;
+};
+
+struct ParquetPageSkipPlan {
+    int leaf_column_id = -1;
+    // Page ordinal is the data-page ordinal in the column chunk. It intentionally excludes
+    // dictionary pages, matching Arrow PageReader::set_data_page_filter().
+    std::vector<uint8_t> skipped_pages;
+    std::vector<int64_t> skipped_page_compressed_sizes;
+    // Row ranges covered by skipped data pages. ScalarColumnReader uses these ranges to avoid
+    // calling RecordReader::SkipRecords() again for pages already skipped by Arrow.
+    std::vector<RowRange> skipped_ranges;
+
+    bool empty() const { return skipped_ranges.empty(); }
+
+    bool should_skip_page(size_t page_idx) const {
+        return page_idx < skipped_pages.size() && skipped_pages[page_idx] != 0;
+    }
+
+    int64_t skipped_page_compressed_size(size_t page_idx) const {
+        DCHECK_LT(page_idx, skipped_page_compressed_sizes.size());
+        return skipped_page_compressed_sizes[page_idx];
+    }
+};
+
+// 批次内选中行的行号视图，类似 DuckDB 的 SelectionVector。
+//
+// 用途：在 late materialization 中，predicate 列全量读取后执行 conjuncts，
+// 生成 SelectionVector 标记哪些行命中。Non-predicate 列再通过 select() 只读命中行。
+//
+// 两种模式：
+//   - 未绑定 data (_data == nullptr)：identity selection，get_index(i) == i
+//   - 已绑定 data：_data[i] 是第 i 个选中行的 batch 内行号
+//
+// selection_to_ranges() 将选中的行号合并为连续的 RowRange 列表，
+// 供 ColumnReader 做 range-based skip + read。
+class SelectionVector {
+public:
+    using Index = uint16_t;
+
+    SelectionVector() = default;
+
+    explicit SelectionVector(size_t count) { resize(count); }
+
+    SelectionVector(Index* data, size_t count) { initialize(data, count); }
+
+    void initialize(Index* data, size_t count) {
+        _owned.clear();
+        _data = data;
+        _size = count;
+    }
+
+    void resize(size_t count) {
+        _owned.resize(count);
+        _data = _owned.data();
+        _size = count;
+        for (size_t idx = 0; idx < count; ++idx) {
+            _data[idx] = static_cast<Index>(idx);
+        }
+    }
+
+    void clear() {
+        _owned.clear();
+        _data = nullptr;
+        _size = 0;
+    }
+
+    size_t size() const { return _size; }
+
+    bool is_set() const { return _data != nullptr; }
+
+    Index* data() { return _data; }
+
+    const Index* data() const { return _data; }
+
+    size_t get_index(size_t idx) const {
+        if (_data == nullptr) {
+            return idx;
+        }
+        return _data[idx];
+    }
+
+    void set_index(size_t idx, Index value) { _data[idx] = value; }
+
+    Status verify(size_t count, int64_t batch_rows) const {
+        if (batch_rows < 0) {
+            return Status::InvalidArgument("Negative parquet selection batch rows {}", batch_rows);
+        }
+        if (std::cmp_greater(count, batch_rows)) {
+            return Status::InvalidArgument("Parquet selection count {} exceeds batch rows {}",
+                                           count, batch_rows);
+        }
+        if (_data != nullptr && count > _size) {
+            return Status::InvalidArgument("Parquet selection count {} exceeds vector size {}",
+                                           count, _size);
+        }
+        size_t previous = 0;
+        for (size_t idx = 0; idx < count; ++idx) {
+            const size_t current = get_index(idx);
+            if (std::cmp_greater_equal(current, batch_rows)) {
+                return Status::InvalidArgument(
+                        "Parquet selection index {} out of range [0, {}) at position {}", current,
+                        batch_rows, idx);
+            }
+            if (idx > 0 && current <= previous) {
+                return Status::InvalidArgument(
+                        "Parquet selection index {} is not strictly greater than previous {} at "
+                        "position {}",
+                        current, previous, idx);
+            }
+            previous = current;
+        }
+        return Status::OK();
+    }
+
+private:
+    std::vector<Index> _owned;
+    Index* _data = nullptr;
+    size_t _size = 0;
+};
+
+inline std::vector<RowRange> selection_to_ranges(const SelectionVector& selection,
+                                                 uint16_t selected_rows) {
+    std::vector<RowRange> ranges;
+    if (selected_rows == 0) {
+        return ranges;
+    }
+
+    int64_t range_start = selection.get_index(0);
+    int64_t previous = selection.get_index(0);
+    for (uint16_t selection_idx = 1; selection_idx < selected_rows; ++selection_idx) {
+        const int64_t current = selection.get_index(selection_idx);
+        if (current == previous + 1) {
+            previous = current;
+            continue;
+        }
+        ranges.push_back(RowRange {.start = range_start, .length = previous - range_start + 1});
+        range_start = current;
+        previous = current;
+    }
+    ranges.push_back(RowRange {.start = range_start, .length = previous - range_start + 1});
+    return ranges;
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/schema_projection.cpp b/be/src/format_v2/schema_projection.cpp
new file mode 100644
index 00000000000000..342f4c91898c92
--- /dev/null
+++ b/be/src/format_v2/schema_projection.cpp
@@ -0,0 +1,147 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/schema_projection.h"
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "core/assert_cast.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+
+namespace doris::format {
+namespace {
+
+// Rebuild the complex DataType for one already-pruned semantic ColumnDefinition node.
+//
+// The caller has already matched the projection against ColumnDefinition::children and preserved
+// the file-local child order. This helper only mirrors those projected semantic children into the
+// node type. It intentionally does not understand physical format wrappers. In particular, a MAP
+// node is expected to have semantic children [key, value], even if the underlying format stores a
+// wrapper such as Parquet key_value/entry.
+Status rebuild_semantic_projected_type(const DataTypePtr& original_type,
+                                       const std::vector<ColumnDefinition>& projected_children,
+                                       DataTypePtr* projected_type) {
+    DORIS_CHECK(original_type != nullptr);
+    DORIS_CHECK(projected_type != nullptr);
+
+    DataTypePtr nested_projected_type;
+    const auto primitive_type = remove_nullable(original_type)->get_primitive_type();
+    switch (primitive_type) {
+    case TYPE_STRUCT: {
+        DataTypes child_types;
+        Strings child_names;
+        child_types.reserve(projected_children.size());
+        child_names.reserve(projected_children.size());
+        for (const auto& child : projected_children) {
+            child_types.push_back(child.type);
+            child_names.push_back(child.name);
+        }
+        nested_projected_type = std::make_shared<DataTypeStruct>(child_types, child_names);
+        break;
+    }
+    case TYPE_ARRAY:
+        DORIS_CHECK(projected_children.size() == 1);
+        nested_projected_type = std::make_shared<DataTypeArray>(projected_children[0].type);
+        break;
+    case TYPE_MAP: {
+        DORIS_CHECK(remove_nullable(original_type)->get_primitive_type() == TYPE_MAP);
+        const auto* original_map_type =
+                assert_cast<const DataTypeMap*>(remove_nullable(original_type).get());
+        DataTypePtr key_type = original_map_type->get_key_type();
+        DataTypePtr value_type;
+        for (const auto& child : projected_children) {
+            // Partial MAP projection only prunes the value subtree. The key stream must remain
+            // complete because it defines entry existence and offsets when materializing ColumnMap;
+            // the projected DataTypeMap also preserves the original key type instead of rebuilding
+            // it from children. If a caller includes key in the semantic child list, ignore it
+            // here; the presence of a value child still decides the projected value shape.
+            if (child.file_local_id() == 0 || child.name == "key") {
+                continue;
+            }
+            if (child.file_local_id() == 1 || child.name == "value") {
+                value_type = child.type;
+            }
+        }
+        if (value_type == nullptr) {
+            return Status::NotSupported("MAP projection for type {} contains no value child",
+                                        original_type->get_name());
+        }
+        nested_projected_type = std::make_shared<DataTypeMap>(key_type, value_type);
+        break;
+    }
+    default:
+        return Status::InvalidArgument("Cannot project children from non-complex type {}",
+                                       original_type->get_name());
+    }
+
+    *projected_type = original_type->is_nullable() ? make_nullable(nested_projected_type)
+                                                   : nested_projected_type;
+    return Status::OK();
+}
+
+} // namespace
+
+Status project_column_definition(const ColumnDefinition& field, const LocalColumnIndex& projection,
+                                 ColumnDefinition* projected_field) {
+    if (projected_field == nullptr) {
+        return Status::InvalidArgument("projected_field is null");
+    }
+    *projected_field = field;
+    if (projection.project_all_children || projection.children.empty()) {
+        return Status::OK();
+    }
+
+    projected_field->children.clear();
+    for (const auto& child_projection : projection.children) {
+        if (child_projection.local_id() == -1) {
+            return Status::InvalidArgument("Empty projection path for field {}", field.name);
+        }
+        const auto child_it =
+                std::ranges::find_if(field.children, [&](const ColumnDefinition& child) {
+                    return child.file_local_id() == child_projection.local_id();
+                });
+        if (child_it == field.children.end()) {
+            return Status::InvalidArgument("Invalid projection child id {} for field {}",
+                                           child_projection.local_id(), field.name);
+        }
+    }
+    for (const auto& child : field.children) {
+        const auto child_projection_it =
+                std::ranges::find_if(projection.children, [&](const LocalColumnIndex& child_proj) {
+                    return child_proj.local_id() == child.file_local_id();
+                });
+        if (child_projection_it == projection.children.end()) {
+            continue;
+        }
+        ColumnDefinition projected_child;
+        RETURN_IF_ERROR(project_column_definition(child, *child_projection_it, &projected_child));
+        projected_field->children.push_back(std::move(projected_child));
+    }
+    if (projected_field->children.empty()) {
+        return Status::NotSupported("Projection for field {} contains no children", field.name);
+    }
+
+    return rebuild_semantic_projected_type(field.type, projected_field->children,
+                                           &projected_field->type);
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/schema_projection.h b/be/src/format_v2/schema_projection.h
new file mode 100644
index 00000000000000..c2125d66931631
--- /dev/null
+++ b/be/src/format_v2/schema_projection.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "format_v2/file_reader.h"
+
+namespace doris::format {
+
+// Build a projected file-local semantic schema node from a full schema node and a nested
+// LocalColumnIndex projection.
+//
+// This module is deliberately about semantic ColumnDefinition trees, not physical file-format
+// trees. FileReader::get_schema() returns file-local columns after type conversion to Doris
+// DataType, and their children must follow Doris semantics:
+//
+//   STRUCT children = fields
+//   ARRAY children = [element]
+//   MAP children = [key, value]
+//
+// Format-specific wrappers, such as Parquet MAP key_value/entry nodes, are intentionally hidden
+// from this API. A format reader that needs those wrappers for its physical reader tree should
+// translate the semantic projection back to its physical layout internally.
+//
+// The function does three things:
+// - Copies `field` metadata to `projected_field`.
+// - Recursively prunes children according to `projection.children`, matching children by
+//   ColumnDefinition::file_local_id() rather than vector ordinal. The root projection id is not
+//   interpreted here because the caller has already selected `field`.
+// - Rebuilds the node DataType from the projected semantic children so the returned definition is
+//   self-consistent. STRUCT uses projected child names/types, ARRAY uses the projected element
+//   type, and MAP preserves the original key type while rebuilding the projected value type.
+//
+// A full projection copies `field` unchanged. Partial MAP projection only uses the value child for
+// type rebuilding. MAP is materialized as offsets + keys + values, so the reader must still read
+// the complete key stream to build entry shape and offsets. If the semantic projection includes
+// the key child, it is ignored here; key-only MAP projections are rejected because they do not
+// define a value shape.
+Status project_column_definition(const ColumnDefinition& field, const LocalColumnIndex& projection,
+                                 ColumnDefinition* projected_field);
+
+} // namespace doris::format
diff --git a/be/src/format_v2/table/hive_reader.cpp b/be/src/format_v2/table/hive_reader.cpp
new file mode 100644
index 00000000000000..ad4b75b00856ea
--- /dev/null
+++ b/be/src/format_v2/table/hive_reader.cpp
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/hive_reader.h"
+
+#include <utility>
+
+#include "common/consts.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/file_reader.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format::hive {
+namespace {
+
+TFileFormatType::type format_type_from_context(const format::ProjectedColumnBuildContext& context) {
+    DORIS_CHECK(context.scan_params != nullptr);
+    if (context.range != nullptr && context.range->__isset.format_type) {
+        return context.range->format_type;
+    }
+    return context.scan_params->format_type;
+}
+
+bool use_column_position_mapping(const format::ProjectedColumnBuildContext& context) {
+    if (context.runtime_state == nullptr || context.scan_params == nullptr) {
+        return false;
+    }
+    switch (format_type_from_context(context)) {
+    case TFileFormatType::FORMAT_PARQUET:
+        return !context.runtime_state->query_options().hive_parquet_use_column_names;
+    default:
+        return false;
+    }
+}
+
+bool is_file_column_position_slot(const TFileScanSlotInfo& slot_info,
+                                  const std::string& column_name) {
+    if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) ||
+        column_name == BeConsts::ICEBERG_ROWID_COL) {
+        return false;
+    }
+    if (slot_info.__isset.is_file_slot) {
+        return slot_info.is_file_slot;
+    }
+    return !slot_info.__isset.category || slot_info.category != TColumnCategory::PARTITION_KEY;
+}
+
+} // namespace
+
+Status HiveReader::init(format::TableReadOptions&& options) {
+    const format::FileFormat file_format = options.format;
+    RETURN_IF_ERROR(format::TableReader::init(std::move(options)));
+
+    // Hive-specific behavior: choose the column matching mode based on file format and the
+    // matching session variable.
+    //   - hive_orc_use_column_names / hive_parquet_use_column_names == true
+    //     => BY_NAME (modern Hive default, match by column name)
+    //   - those options == false
+    //     => BY_INDEX (mainly for Hive1 ORC `_col0` / `_col1`, match by top-level position;
+    //                  Parquet exposes the same switch for consistency)
+    // The base init path does not accept file-format-specific mapper configuration, so the mapper
+    // must be replaced here after the base initialization completes.
+    DORIS_CHECK(_runtime_state != nullptr);
+    const auto& query_options = _runtime_state->query_options();
+    bool use_column_names = true;
+    if (file_format == format::FileFormat::ORC) {
+        use_column_names = query_options.hive_orc_use_column_names;
+    } else if (file_format == format::FileFormat::PARQUET) {
+        use_column_names = query_options.hive_parquet_use_column_names;
+    } else if (file_format == format::FileFormat::CSV || file_format == format::FileFormat::TEXT ||
+               file_format == format::FileFormat::JSON) {
+        // Hive CSV/TEXT/JSON readers synthesize a file-local schema from FE-provided file slots
+        // because these formats do not carry embedded column names or field ids. The scan params'
+        // format-specific attributes still tell the physical reader how to read values, while the
+        // table-level mapper can safely match the synthesized file schema by table column name.
+        use_column_names = true;
+    } else {
+        return Status::NotSupported("HiveReader does not support file reader format {}",
+                                    file_format);
+    }
+
+    _mode = use_column_names ? format::TableColumnMappingMode::BY_NAME
+                             : format::TableColumnMappingMode::BY_INDEX;
+    return Status::OK();
+}
+
+Status HiveReader::annotate_projected_column(const TFileScanSlotInfo& slot_info,
+                                             format::ProjectedColumnBuildContext* context,
+                                             format::ColumnDefinition* column) const {
+    RETURN_IF_ERROR(format::TableReader::annotate_projected_column(slot_info, context, column));
+    DORIS_CHECK(context != nullptr);
+    DORIS_CHECK(column != nullptr);
+    if (!use_column_position_mapping(*context) ||
+        !is_file_column_position_slot(slot_info, column->name)) {
+        return Status::OK();
+    }
+    const auto* scan_params = context->scan_params;
+    DORIS_CHECK(scan_params != nullptr);
+    if (!scan_params->__isset.column_idxs ||
+        context->next_file_column_idx >= scan_params->column_idxs.size()) {
+        return Status::InvalidArgument(
+                "Hive positional column mapping is missing file index for column '{}', "
+                "required file slot ordinal={}, column_idxs_size={}",
+                column->name, context->next_file_column_idx,
+                scan_params->__isset.column_idxs ? scan_params->column_idxs.size() : 0);
+    }
+    const auto file_index = scan_params->column_idxs[context->next_file_column_idx];
+    if (file_index < 0) {
+        return Status::InvalidArgument(
+                "Hive positional column mapping has negative file index {} for column '{}'",
+                file_index, column->name);
+    }
+    column->identifier = Field::create_field<TYPE_INT>(file_index);
+    ++context->next_file_column_idx;
+    return Status::OK();
+}
+
+Status HiveReader::validate_projected_columns(
+        const format::ProjectedColumnBuildContext& context) const {
+    if (!use_column_position_mapping(context)) {
+        return Status::OK();
+    }
+    DORIS_CHECK(context.scan_params != nullptr);
+    if (context.scan_params->__isset.column_idxs &&
+        context.next_file_column_idx != context.scan_params->column_idxs.size()) {
+        return Status::InvalidArgument(
+                "Hive positional column mapping has unused file indexes: consumed={}, "
+                "column_idxs_size={}",
+                context.next_file_column_idx, context.scan_params->column_idxs.size());
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::hive
diff --git a/be/src/format_v2/table/hive_reader.h b/be/src/format_v2/table/hive_reader.h
new file mode 100644
index 00000000000000..308bb242c8cf21
--- /dev/null
+++ b/be/src/format_v2/table/hive_reader.h
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "format_v2/table_reader.h"
+
+namespace doris::format::hive {
+
+class HiveReader final : public format::TableReader {
+public:
+    ENABLE_FACTORY_CREATOR(HiveReader);
+    ~HiveReader() final = default;
+
+    Status init(format::TableReadOptions&& options) override;
+    format::TableColumnMappingMode mapping_mode() const override { return _mode; }
+    Status annotate_projected_column(const TFileScanSlotInfo& slot_info,
+                                     format::ProjectedColumnBuildContext* context,
+                                     format::ColumnDefinition* column) const override;
+    Status validate_projected_columns(
+            const format::ProjectedColumnBuildContext& context) const override;
+
+private:
+    format::TableColumnMappingMode _mode = format::TableColumnMappingMode::BY_NAME;
+};
+
+} // namespace doris::format::hive
diff --git a/be/src/format_v2/table/hudi_reader.cpp b/be/src/format_v2/table/hudi_reader.cpp
new file mode 100644
index 00000000000000..d76be201067bd7
--- /dev/null
+++ b/be/src/format_v2/table/hudi_reader.cpp
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/hudi_reader.h"
+
+#include <utility>
+
+#include "exprs/vexpr_context.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/jni/hudi_jni_reader.h"
+#include "format_v2/table/schema_history_util.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::hudi {
+
+Status HudiReader::prepare_split(const format::SplitReadOptions& options) {
+    _split_schema_id = -1;
+    if (options.current_range.__isset.table_format_params &&
+        options.current_range.table_format_params.__isset.hudi_params &&
+        options.current_range.table_format_params.hudi_params.__isset.schema_id) {
+        _split_schema_id = options.current_range.table_format_params.hudi_params.schema_id;
+    }
+    return format::TableReader::prepare_split(options);
+}
+
+format::TableColumnMappingMode HudiReader::mapping_mode() const {
+    return format::can_map_by_history_schema(_scan_params, _split_schema_id)
+                   ? format::TableColumnMappingMode::BY_FIELD_ID
+                   : format::TableColumnMappingMode::BY_NAME;
+}
+
+Status HudiReader::annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
+    DORIS_CHECK(file_schema != nullptr);
+    if (mapping_mode() != format::TableColumnMappingMode::BY_FIELD_ID) {
+        return Status::OK();
+    }
+    return format::annotate_file_schema_from_history(_scan_params, _split_schema_id, file_schema);
+}
+
+Status HudiHybridReader::init(format::TableReadOptions&& options) {
+    return format::TableReader::init(std::move(options));
+}
+
+Status HudiHybridReader::prepare_split(const format::SplitReadOptions& options) {
+    RETURN_IF_ERROR(_ensure_current_split_reader(options));
+    DORIS_CHECK(_current_split_reader != nullptr);
+    return _current_split_reader->prepare_split(options);
+}
+
+Status HudiHybridReader::get_block(Block* block, bool* eos) {
+    DORIS_CHECK(_current_split_reader != nullptr);
+    return _current_split_reader->get_block(block, eos);
+}
+
+Status HudiHybridReader::close() {
+    Status close_status = Status::OK();
+    if (_native_reader != nullptr) {
+        close_status = _native_reader->close();
+    }
+    if (_jni_reader != nullptr) {
+        auto status = _jni_reader->close();
+        if (!status.ok() && close_status.ok()) {
+            close_status = std::move(status);
+        }
+    }
+    _current_split_reader = nullptr;
+    return close_status;
+}
+
+Status HudiHybridReader::_ensure_current_split_reader(const format::SplitReadOptions& options) {
+    DORIS_CHECK(_scan_params != nullptr);
+    if (_is_jni_split(*_scan_params, options.current_range)) {
+        if (_jni_reader == nullptr) {
+            _jni_reader = std::make_unique<format::hudi::HudiJniReader>();
+            RETURN_IF_ERROR(_init_child_reader(_jni_reader.get(), format::FileFormat::JNI));
+        }
+        _current_split_reader = _jni_reader.get();
+    } else {
+        format::FileFormat file_format;
+        RETURN_IF_ERROR(_to_file_format(*_scan_params, options.current_range, &file_format));
+        if (_native_reader == nullptr) {
+            _native_reader = format::hudi::HudiReader::create_unique();
+            RETURN_IF_ERROR(_init_child_reader(_native_reader.get(), file_format));
+        }
+        _current_split_reader = _native_reader.get();
+    }
+    return Status::OK();
+}
+
+Status HudiHybridReader::_init_child_reader(format::TableReader* reader,
+                                            format::FileFormat file_format) {
+    DORIS_CHECK(reader != nullptr);
+    VExprContextSPtrs conjuncts;
+    RETURN_IF_ERROR(_clone_conjuncts(&conjuncts));
+    return reader->init({
+            .projected_columns = _projected_columns,
+            .column_predicates = _table_column_predicates,
+            .conjuncts = std::move(conjuncts),
+            .format = file_format,
+            .scan_params = _scan_params,
+            .io_ctx = _io_ctx,
+            .runtime_state = _runtime_state,
+            .scanner_profile = _scanner_profile,
+            .push_down_agg_type = _push_down_agg_type,
+            .condition_cache_digest = _condition_cache_digest,
+    });
+}
+
+Status HudiHybridReader::_clone_conjuncts(VExprContextSPtrs* conjuncts) const {
+    DORIS_CHECK(conjuncts != nullptr);
+    conjuncts->clear();
+    conjuncts->reserve(_conjuncts.size());
+    for (const auto& conjunct : _conjuncts) {
+        VExprSPtr root;
+        RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root));
+        conjuncts->push_back(VExprContext::create_shared(std::move(root)));
+    }
+    return Status::OK();
+}
+
+TFileFormatType::type HudiHybridReader::_range_format_type(const TFileScanRangeParams& params,
+                                                           const TFileRangeDesc& range) {
+    return range.__isset.format_type ? range.format_type : params.format_type;
+}
+
+bool HudiHybridReader::_is_jni_split(const TFileScanRangeParams& params,
+                                     const TFileRangeDesc& range) {
+    return _range_format_type(params, range) == TFileFormatType::FORMAT_JNI;
+}
+
+Status HudiHybridReader::_to_file_format(const TFileScanRangeParams& params,
+                                         const TFileRangeDesc& range,
+                                         format::FileFormat* file_format) {
+    DORIS_CHECK(file_format != nullptr);
+    const auto format_type = _range_format_type(params, range);
+    switch (format_type) {
+    case TFileFormatType::FORMAT_PARQUET:
+        *file_format = format::FileFormat::PARQUET;
+        return Status::OK();
+    case TFileFormatType::FORMAT_ORC:
+        *file_format = format::FileFormat::ORC;
+        return Status::OK();
+    default:
+        return Status::NotSupported("Unsupported native Hudi file format {}",
+                                    to_string(format_type));
+    }
+}
+
+} // namespace doris::format::hudi
diff --git a/be/src/format_v2/table/hudi_reader.h b/be/src/format_v2/table/hudi_reader.h
new file mode 100644
index 00000000000000..aeaaedf6ab6064
--- /dev/null
+++ b/be/src/format_v2/table/hudi_reader.h
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "format_v2/table_reader.h"
+
+namespace doris::format::hudi {
+
+class HudiReader final : public format::TableReader {
+public:
+    ENABLE_FACTORY_CREATOR(HudiReader);
+    ~HudiReader() final = default;
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+
+#ifdef BE_TEST
+    void TEST_set_scan_params(TFileScanRangeParams* params) { _scan_params = params; }
+    format::TableColumnMappingMode TEST_mapping_mode() const { return mapping_mode(); }
+    Status TEST_annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
+        return annotate_file_schema(file_schema);
+    }
+#endif
+
+protected:
+    format::TableColumnMappingMode mapping_mode() const override;
+    Status annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) override;
+
+private:
+    int64_t _split_schema_id = -1;
+};
+
+// Hudi MOR scans can contain both JNI splits that need log-file merge semantics and native
+// data-file splits without delta logs in the same SplitSource. FileScannerV2 owns one table reader
+// for the scanner lifetime, so this reader keeps native and JNI child readers internally and
+// dispatches each split to the matching child reader.
+class HudiHybridReader final : public format::TableReader {
+public:
+    ~HudiHybridReader() override = default;
+
+    Status init(format::TableReadOptions&& options) override;
+    Status prepare_split(const format::SplitReadOptions& options) override;
+    Status get_block(Block* block, bool* eos) override;
+    Status close() override;
+
+private:
+    Status _ensure_current_split_reader(const format::SplitReadOptions& options);
+    Status _init_child_reader(format::TableReader* reader, format::FileFormat file_format);
+    Status _clone_conjuncts(VExprContextSPtrs* conjuncts) const;
+    static TFileFormatType::type _range_format_type(const TFileScanRangeParams& params,
+                                                    const TFileRangeDesc& range);
+    static bool _is_jni_split(const TFileScanRangeParams& params, const TFileRangeDesc& range);
+    static Status _to_file_format(const TFileScanRangeParams& params, const TFileRangeDesc& range,
+                                  format::FileFormat* file_format);
+
+    std::unique_ptr<format::TableReader> _native_reader; // handle native parquet/orc splits
+    std::unique_ptr<format::TableReader> _jni_reader;    // handle MOR JNI splits
+    format::TableReader* _current_split_reader = nullptr;
+};
+
+} // namespace doris::format::hudi
diff --git a/be/src/format_v2/table/iceberg_reader.cpp b/be/src/format_v2/table/iceberg_reader.cpp
new file mode 100644
index 00000000000000..ccc100f05044cc
--- /dev/null
+++ b/be/src/format_v2/table/iceberg_reader.cpp
@@ -0,0 +1,797 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/iceberg_reader.h"
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <sstream>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_const.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/define_primitive_type.h"
+#include "core/field.h"
+#include "exprs/vslot_ref.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format_v2/expr/cast.h"
+#include "format_v2/expr/equality_delete_predicate.h"
+#include "format_v2/parquet/parquet_reader.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/table_reader.h"
+#include "io/file_factory.h"
+
+namespace doris::format::iceberg {
+
+static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id";
+static constexpr int32_t ROW_LINEAGE_ROW_ID_FIELD_ID = 2147483540;
+
+template <typename T>
+static std::string join_values_for_debug(const std::vector<T>& values) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t idx = 0; idx < values.size(); ++idx) {
+        if (idx > 0) {
+            out << ", ";
+        }
+        out << values[idx];
+    }
+    out << "]";
+    return out.str();
+}
+
+static bool is_projected_row_lineage_row_id(const format::ColumnDefinition& column) {
+    // Iceberg row lineage columns can be bound by field id when a mapper has already been built,
+    // but customize_file_scan_request() is also exercised directly by scan-request tests before the
+    // mapper exists. In that path, inspect the projected table schema so row-position dependencies
+    // are still added for `_row_id`.
+    return column.name == ROW_LINEAGE_ROW_ID ||
+           (column.has_identifier_field_id() &&
+            column.get_identifier_field_id() == ROW_LINEAGE_ROW_ID_FIELD_ID);
+}
+
+static bool is_projected_iceberg_rowid(const format::ColumnDefinition& column) {
+    return column.name == BeConsts::ICEBERG_ROWID_COL;
+}
+
+static std::string iceberg_delete_file_debug_string(const TIcebergDeleteFileDesc& delete_file) {
+    std::ostringstream out;
+    out << "TIcebergDeleteFileDesc{path=" << (delete_file.__isset.path ? delete_file.path : "null")
+        << ", content=" << (delete_file.__isset.content ? delete_file.content : -1)
+        << ", file_format="
+        << (delete_file.__isset.file_format ? static_cast<int>(delete_file.file_format) : -1)
+        << ", position_lower_bound="
+        << (delete_file.__isset.position_lower_bound ? delete_file.position_lower_bound : -1)
+        << ", position_upper_bound="
+        << (delete_file.__isset.position_upper_bound ? delete_file.position_upper_bound : -1)
+        << ", field_ids="
+        << (delete_file.__isset.field_ids ? join_values_for_debug(delete_file.field_ids) : "[]")
+        << ", content_offset="
+        << (delete_file.__isset.content_offset ? delete_file.content_offset : -1)
+        << ", content_size_in_bytes="
+        << (delete_file.__isset.content_size_in_bytes ? delete_file.content_size_in_bytes : -1)
+        << "}";
+    return out.str();
+}
+
+static std::string iceberg_delete_files_debug_string(
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t idx = 0; idx < delete_files.size(); ++idx) {
+        if (idx > 0) {
+            out << ", ";
+        }
+        out << iceberg_delete_file_debug_string(delete_files[idx]);
+    }
+    out << "]";
+    return out.str();
+}
+
+static std::string iceberg_params_debug_string(const std::optional<TIcebergFileDesc>& params) {
+    if (!params.has_value()) {
+        return "null";
+    }
+    const auto& iceberg_params = *params;
+    std::ostringstream out;
+    out << "TIcebergFileDesc{format_version="
+        << (iceberg_params.__isset.format_version ? iceberg_params.format_version : -1)
+        << ", content=" << (iceberg_params.__isset.content ? iceberg_params.content : -1)
+        << ", original_file_path="
+        << (iceberg_params.__isset.original_file_path ? iceberg_params.original_file_path : "null")
+        << ", row_count=" << (iceberg_params.__isset.row_count ? iceberg_params.row_count : -1)
+        << ", partition_spec_id="
+        << (iceberg_params.__isset.partition_spec_id ? iceberg_params.partition_spec_id : 0)
+        << ", has_partition_data_json=" << iceberg_params.__isset.partition_data_json
+        << ", first_row_id="
+        << (iceberg_params.__isset.first_row_id ? iceberg_params.first_row_id : -1)
+        << ", last_updated_sequence_number="
+        << (iceberg_params.__isset.last_updated_sequence_number
+                    ? iceberg_params.last_updated_sequence_number
+                    : -1)
+        << ", delete_file_count="
+        << (iceberg_params.__isset.delete_files ? iceberg_params.delete_files.size() : 0)
+        << ", delete_files="
+        << (iceberg_params.__isset.delete_files
+                    ? iceberg_delete_files_debug_string(iceberg_params.delete_files)
+                    : "[]")
+        << ", has_serialized_split=" << iceberg_params.__isset.serialized_split << "}";
+    return out.str();
+}
+
+IcebergTableReader::PositionDeleteRowsCollector::PositionDeleteRowsCollector(
+        std::string data_file_path, format::DeleteRows* rows)
+        : _data_file_path(std::move(data_file_path)), _rows(rows) {}
+
+Status IcebergTableReader::PositionDeleteRowsCollector::collect(const Block& block,
+                                                                size_t read_rows) {
+    if (read_rows == 0) {
+        return Status::OK();
+    }
+    const auto& file_path_column = assert_cast<const ColumnString&>(
+            *remove_nullable((block.get_by_position(ICEBERG_FILE_PATH_BLOCK_POSITION).column)));
+    const auto& pos_column = assert_cast<const ColumnInt64&>(
+            *remove_nullable(block.get_by_position(ICEBERG_ROW_POS_BLOCK_POSITION).column));
+    for (size_t row = 0; row < read_rows; ++row) {
+        const auto file_path = file_path_column.get_data_at(row).to_string();
+        if (file_path == _data_file_path) {
+            _rows->push_back(pos_column.get_element(row));
+        }
+    }
+    return Status::OK();
+}
+
+Status IcebergTableReader::prepare_split(const format::SplitReadOptions& options) {
+    _row_lineage_columns = {};
+    _iceberg_params.reset();
+    _delete_predicates_initialized = false;
+    _position_delete_rows_storage.clear();
+    _equality_delete_filters.clear();
+    if (options.current_range.__isset.table_format_params &&
+        options.current_range.table_format_params.__isset.iceberg_params) {
+        const auto& iceberg_params = options.current_range.table_format_params.iceberg_params;
+        _iceberg_params = iceberg_params;
+        if (iceberg_params.__isset.first_row_id) {
+            _row_lineage_columns.first_row_id = iceberg_params.first_row_id;
+        }
+        if (iceberg_params.__isset.last_updated_sequence_number) {
+            _row_lineage_columns.last_updated_sequence_number =
+                    iceberg_params.last_updated_sequence_number;
+        }
+    }
+    RETURN_IF_ERROR(TableReader::prepare_split(options));
+    if (_is_table_level_count_active()) {
+        return Status::OK();
+    }
+    RETURN_IF_ERROR(_init_delete_predicates(options.current_range.table_format_params));
+    return Status::OK();
+}
+
+std::string IcebergTableReader::debug_string() const {
+    size_t position_delete_file_count = 0;
+    size_t equality_delete_file_count = 0;
+    size_t deletion_vector_file_count = 0;
+    if (_iceberg_params.has_value() && _iceberg_params->__isset.delete_files) {
+        for (const auto& delete_file : _iceberg_params->delete_files) {
+            if (!delete_file.__isset.content) {
+                continue;
+            }
+            if (delete_file.content == POSITION_DELETE) {
+                ++position_delete_file_count;
+            } else if (delete_file.content == EQUALITY_DELETE) {
+                ++equality_delete_file_count;
+            } else if (delete_file.content == DELETION_VECTOR) {
+                ++deletion_vector_file_count;
+            }
+        }
+    }
+
+    std::ostringstream equality_filters;
+    equality_filters << "[";
+    for (size_t idx = 0; idx < _equality_delete_filters.size(); ++idx) {
+        if (idx > 0) {
+            equality_filters << ", ";
+        }
+        const auto& filter = _equality_delete_filters[idx];
+        equality_filters << "EqualityDeleteFilter{field_ids="
+                         << join_values_for_debug(filter.field_ids) << ", key_types=[";
+        for (size_t type_idx = 0; type_idx < filter.key_types.size(); ++type_idx) {
+            if (type_idx > 0) {
+                equality_filters << ", ";
+            }
+            equality_filters << (filter.key_types[type_idx] == nullptr
+                                         ? "null"
+                                         : filter.key_types[type_idx]->get_name());
+        }
+        equality_filters << "], delete_block_rows=" << filter.delete_block.rows()
+                         << ", delete_block_columns=" << filter.delete_block.columns() << "}";
+    }
+    equality_filters << "]";
+
+    std::ostringstream out;
+    out << "IcebergTableReader{base=" << format::TableReader::debug_string()
+        << ", iceberg_params=" << iceberg_params_debug_string(_iceberg_params)
+        << ", row_lineage_first_row_id=" << _row_lineage_columns.first_row_id
+        << ", row_lineage_last_updated_sequence_number="
+        << _row_lineage_columns.last_updated_sequence_number
+        << ", need_row_lineage_row_id=" << _need_row_lineage_row_id()
+        << ", need_iceberg_rowid=" << _need_iceberg_rowid()
+        << ", row_position_block_position=" << _row_position_block_position
+        << ", delete_predicates_initialized=" << _delete_predicates_initialized
+        << ", position_delete_file_count=" << position_delete_file_count
+        << ", equality_delete_file_count=" << equality_delete_file_count
+        << ", deletion_vector_file_count=" << deletion_vector_file_count
+        << ", position_delete_rows_storage_count=" << _position_delete_rows_storage.size()
+        << ", equality_delete_filter_count=" << _equality_delete_filters.size()
+        << ", equality_delete_filters=" << equality_filters.str() << "}";
+    return out.str();
+}
+
+Status IcebergTableReader::materialize_virtual_columns(Block* table_block) {
+    for (size_t column_idx = 0; column_idx < _data_reader.column_mapper->mappings().size();
+         ++column_idx) {
+        const auto& mapping = _data_reader.column_mapper->mappings()[column_idx];
+        switch (mapping.virtual_column_type) {
+        case format::TableVirtualColumnType::ROW_ID:
+            RETURN_IF_ERROR(_materialize_row_lineage_row_id(table_block, column_idx));
+            break;
+        case format::TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER:
+            RETURN_IF_ERROR(
+                    _materialize_row_lineage_last_updated_sequence_number(table_block, column_idx));
+            break;
+        case format::TableVirtualColumnType::ICEBERG_ROWID:
+            RETURN_IF_ERROR(_materialize_iceberg_rowid(table_block, column_idx));
+            break;
+        case format::TableVirtualColumnType::INVALID:
+            break;
+        }
+    }
+    return Status::OK();
+}
+
+Status IcebergTableReader::customize_file_scan_request(format::FileScanRequest* file_request) {
+    RETURN_IF_ERROR(TableReader::customize_file_scan_request(file_request));
+    if ((_row_lineage_columns.first_row_id >= 0 && _need_row_lineage_row_id()) ||
+        _need_iceberg_rowid()) {
+        RETURN_IF_ERROR(_append_row_position_output_column(file_request));
+    }
+    RETURN_IF_ERROR(_append_equality_delete_predicates(file_request));
+    return Status::OK();
+}
+
+bool IcebergTableReader::_supports_aggregate_pushdown(TPushAggOp::type agg_type) const {
+    if (!TableReader::_supports_aggregate_pushdown(agg_type)) {
+        return false;
+    }
+    return _equality_delete_filters.empty();
+}
+
+Status IcebergTableReader::_parse_deletion_vector_file(const TTableFormatFileDesc& t_desc,
+                                                       DeleteFileDesc* desc,
+                                                       bool* has_delete_file) {
+    DORIS_CHECK(desc != nullptr);
+    DORIS_CHECK(has_delete_file != nullptr);
+    *has_delete_file = false;
+    if (!t_desc.__isset.iceberg_params) {
+        return Status::OK();
+    }
+    const auto& iceberg_params = t_desc.iceberg_params;
+    if (!iceberg_params.__isset.format_version ||
+        iceberg_params.format_version < MIN_SUPPORT_DELETE_FILES_VERSION ||
+        !iceberg_params.__isset.delete_files || iceberg_params.delete_files.empty()) {
+        return Status::OK();
+    }
+
+    const TIcebergDeleteFileDesc* deletion_vector = nullptr;
+    for (const auto& delete_file : iceberg_params.delete_files) {
+        if (!delete_file.__isset.content || delete_file.content != DELETION_VECTOR) {
+            continue;
+        }
+        if (deletion_vector != nullptr) {
+            return Status::DataQualityError("This iceberg data file has multiple DVs.");
+        }
+        deletion_vector = &delete_file;
+    }
+    if (deletion_vector == nullptr) {
+        return Status::OK();
+    }
+    if (!deletion_vector->__isset.content_offset ||
+        !deletion_vector->__isset.content_size_in_bytes) {
+        return Status::InternalError("Deletion vector is missing content offset or length");
+    }
+
+    desc->key = _iceberg_delete_vector_cache_key(*deletion_vector);
+    desc->path = deletion_vector->path;
+    desc->start_offset = deletion_vector->content_offset;
+    desc->size = deletion_vector->content_size_in_bytes;
+    desc->file_size = -1;
+    desc->format = DeleteFileDesc::Format::ICEBERG;
+    *has_delete_file = true;
+    return Status::OK();
+}
+
+Status IcebergTableReader::_init_delete_predicates(const TTableFormatFileDesc& t_desc) {
+    if (!t_desc.__isset.iceberg_params || _delete_predicates_initialized) {
+        _delete_predicates_initialized = true;
+        return Status::OK();
+    }
+    const auto& iceberg_params = t_desc.iceberg_params;
+    if (!iceberg_params.__isset.format_version ||
+        iceberg_params.format_version < MIN_SUPPORT_DELETE_FILES_VERSION ||
+        !iceberg_params.__isset.delete_files || iceberg_params.delete_files.empty()) {
+        _delete_predicates_initialized = true;
+        return Status::OK();
+    }
+
+    std::vector<TIcebergDeleteFileDesc> position_delete_files;
+    std::vector<TIcebergDeleteFileDesc> equality_delete_files;
+    for (const auto& delete_file : iceberg_params.delete_files) {
+        if (!delete_file.__isset.content) {
+            continue;
+        }
+        if (delete_file.content == POSITION_DELETE) {
+            position_delete_files.push_back(delete_file);
+        } else if (delete_file.content == EQUALITY_DELETE) {
+            equality_delete_files.push_back(delete_file);
+        }
+    }
+    // `_delete_rows != nullptr` means DeleteVector is parsed
+    if (_delete_rows != nullptr) {
+        _position_delete_rows_storage = *_delete_rows;
+        _delete_rows = &_position_delete_rows_storage;
+    }
+    // Combine position delete rows from both deletion vector and position delete files, and
+    // initialize equality delete predicates. Position delete files contain row positions of
+    // deleted rows, which can be directly added to `_delete_rows`. Equality delete files contain
+    // values of deleted rows, which require reading the files and building predicates for later
+    // filtering.
+    if (!position_delete_files.empty()) {
+        RETURN_IF_ERROR(_init_position_delete_rows(position_delete_files));
+    }
+    if (!equality_delete_files.empty()) {
+        RETURN_IF_ERROR(_init_equality_delete_predicates(equality_delete_files));
+    }
+
+    _delete_predicates_initialized = true;
+    return Status::OK();
+}
+
+std::string IcebergTableReader::_iceberg_delete_vector_cache_key(
+        const TIcebergDeleteFileDesc& delete_file) {
+    const std::string key_prefix = "iceberg_dv:";
+    std::string key;
+    key.resize(key_prefix.size() + delete_file.path.size() + sizeof(delete_file.content_offset) +
+               sizeof(delete_file.content_size_in_bytes));
+    char* data = key.data();
+    memcpy(data, key_prefix.data(), key_prefix.size());
+    data += key_prefix.size();
+    memcpy(data, delete_file.path.data(), delete_file.path.size());
+    data += delete_file.path.size();
+    memcpy(data, &delete_file.content_offset, sizeof(delete_file.content_offset));
+    data += sizeof(delete_file.content_offset);
+    memcpy(data, &delete_file.content_size_in_bytes, sizeof(delete_file.content_size_in_bytes));
+    return key;
+}
+
+std::shared_ptr<io::FileSystemProperties> IcebergTableReader::_delete_file_system_properties(
+        const TFileScanRangeParams& scan_params) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type =
+            scan_params.__isset.file_type ? scan_params.file_type : TFileType::FILE_LOCAL;
+    system_properties->properties = scan_params.properties;
+    system_properties->hdfs_params = scan_params.hdfs_params;
+    if (scan_params.__isset.broker_addresses) {
+        system_properties->broker_addresses.assign(scan_params.broker_addresses.begin(),
+                                                   scan_params.broker_addresses.end());
+    }
+    return system_properties;
+}
+
+std::unique_ptr<io::FileDescription> IcebergTableReader::_delete_file_description(
+        const TFileRangeDesc& range) {
+    auto file_description = std::make_unique<io::FileDescription>();
+    file_description->path = range.path;
+    file_description->file_size = range.__isset.file_size ? range.file_size : -1;
+    file_description->range_start_offset = range.__isset.start_offset ? range.start_offset : 0;
+    file_description->range_size = range.__isset.size ? range.size : -1;
+    if (range.__isset.fs_name) {
+        file_description->fs_name = range.fs_name;
+    }
+    return file_description;
+}
+
+std::string IcebergTableReader::_data_file_path() const {
+    if (_iceberg_params.has_value() && _iceberg_params->__isset.original_file_path) {
+        return _iceberg_params->original_file_path;
+    }
+    DORIS_CHECK(_current_task != nullptr);
+    DORIS_CHECK(_current_task->data_file != nullptr);
+    return _current_task->data_file->path;
+}
+
+Status IcebergTableReader::_append_row_position_output_column(format::FileScanRequest* request) {
+    const auto row_position_column_id = format::LocalColumnId(format::ROW_POSITION_COLUMN_ID);
+    _append_file_scan_column(request, row_position_column_id, &request->non_predicate_columns);
+    _row_position_block_position = request->local_positions.at(row_position_column_id).value();
+    return Status::OK();
+}
+
+Status IcebergTableReader::_append_equality_delete_predicates(format::FileScanRequest* request) {
+    DORIS_CHECK(request != nullptr);
+    for (const auto& filter : _equality_delete_filters) {
+        auto delete_predicate =
+                std::make_shared<EqualityDeletePredicate>(filter.delete_block, filter.field_ids);
+        DCHECK_EQ(filter.field_ids.size(), filter.key_types.size());
+        for (size_t idx = 0; idx < filter.field_ids.size(); ++idx) {
+            const int field_id = filter.field_ids[idx];
+            auto field_it = std::ranges::find_if(
+                    _data_reader.file_schema, [field_id](const format::ColumnDefinition& field) {
+                        return field.has_identifier_field_id() &&
+                               field.get_identifier_field_id() == field_id;
+                    });
+            if (field_it == _data_reader.file_schema.end()) {
+                return Status::InternalError(
+                        "Can not find equality delete column field id {} in data file schema",
+                        field_id);
+            }
+            const auto field_column_id = format::LocalColumnId(field_it->file_local_id());
+            _append_file_scan_column(request, field_column_id, &request->predicate_columns);
+            const auto block_position = request->local_positions.at(field_column_id).value();
+            auto slot = VSlotRef::create_shared(cast_set<int>(block_position),
+                                                cast_set<int>(block_position), -1, field_it->type,
+                                                field_it->name);
+            if (field_it->type->equals(*filter.key_types[idx])) {
+                delete_predicate->add_child(std::move(slot));
+            } else {
+                auto cast_expr = Cast::create_shared(filter.key_types[idx]);
+                cast_expr->add_child(std::move(slot));
+                delete_predicate->add_child(std::move(cast_expr));
+            }
+        }
+        request->delete_conjuncts.push_back(
+                VExprContext::create_shared(std::move(delete_predicate)));
+    }
+    return Status::OK();
+}
+
+Status IcebergTableReader::_read_parquet_position_delete_file(
+        const TIcebergDeleteFileDesc& delete_file, const TFileScanRangeParams& scan_params,
+        IcebergDeleteFileIOContext* delete_io_ctx, PositionDeleteRowsCollector* collector) {
+    if (!delete_file.__isset.file_format) {
+        return Status::InternalError("Iceberg position delete file is missing file format");
+    }
+    if (delete_file.file_format == TFileFormatType::FORMAT_ORC) {
+        return Status::NotSupported("Iceberg ORC position delete file is not supported");
+    }
+    if (delete_file.file_format != TFileFormatType::FORMAT_PARQUET) {
+        return Status::NotSupported("Unsupported Iceberg delete file format {}",
+                                    delete_file.file_format);
+    }
+
+    auto delete_range = build_iceberg_delete_file_range(delete_file.path);
+    if (_current_task != nullptr && _current_task->data_file != nullptr &&
+        !_current_task->data_file->fs_name.empty()) {
+        delete_range.__set_fs_name(_current_task->data_file->fs_name);
+    }
+    auto system_properties = _delete_file_system_properties(scan_params);
+    auto file_description = _delete_file_description(delete_range);
+    std::shared_ptr<io::IOContext> io_ctx(&delete_io_ctx->io_ctx, [](io::IOContext*) {});
+    format::parquet::ParquetReader reader(system_properties, file_description, io_ctx,
+                                          _scanner_profile);
+    RETURN_IF_ERROR(reader.init(_runtime_state));
+
+    std::vector<format::ColumnDefinition> schema;
+    RETURN_IF_ERROR(reader.get_schema(&schema));
+    format::ColumnDefinition* file_path_field = nullptr;
+    format::ColumnDefinition* pos_field = nullptr;
+    for (auto& field : schema) {
+        if (field.name == ICEBERG_FILE_PATH) {
+            file_path_field = &field;
+        } else if (field.name == ICEBERG_ROW_POS) {
+            pos_field = &field;
+        }
+    }
+    if (file_path_field == nullptr || pos_field == nullptr) {
+        return Status::InternalError("Position delete parquet file is missing required columns");
+    }
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {
+            format::LocalColumnIndex::top_level(
+                    format::LocalColumnId(file_path_field->file_local_id())),
+            format::LocalColumnIndex::top_level(format::LocalColumnId(pos_field->file_local_id()))};
+    request->local_positions = {
+            {format::LocalColumnId(file_path_field->file_local_id()),
+             format::LocalIndex(ICEBERG_FILE_PATH_BLOCK_POSITION)},
+            {format::LocalColumnId(pos_field->file_local_id()),
+             format::LocalIndex(ICEBERG_ROW_POS_BLOCK_POSITION)},
+    };
+    RETURN_IF_ERROR(reader.open(request));
+
+    bool eof = false;
+    auto build_position_delete_block = [](const format::ColumnDefinition& file_path_field,
+                                          const format::ColumnDefinition& pos_field) -> Block {
+        Block block;
+        block.insert(
+                {file_path_field.type->create_column(), file_path_field.type, ICEBERG_FILE_PATH});
+        block.insert({pos_field.type->create_column(), pos_field.type, ICEBERG_ROW_POS});
+        return block;
+    };
+    while (!eof) {
+        Block block = build_position_delete_block(*file_path_field, *pos_field);
+        size_t read_rows = 0;
+        RETURN_IF_ERROR(reader.get_block(&block, &read_rows, &eof));
+        RETURN_IF_ERROR(collector->collect(block, read_rows));
+    }
+    return reader.close();
+}
+
+Status IcebergTableReader::_init_position_delete_rows(
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    TFileScanRangeParams delete_scan_params =
+            _scan_params == nullptr ? TFileScanRangeParams() : *_scan_params;
+    format::DeleteRows position_delete_rows;
+    IcebergDeleteFileIOContext delete_io_ctx(_runtime_state);
+    PositionDeleteRowsCollector collector(_data_file_path(), &position_delete_rows);
+    for (const auto& delete_file : delete_files) {
+        RETURN_IF_ERROR(_read_parquet_position_delete_file(delete_file, delete_scan_params,
+                                                           &delete_io_ctx, &collector));
+    }
+    if (position_delete_rows.empty()) {
+        return Status::OK();
+    }
+    // Position delete files and deletion vectors both become row-position deletes for the
+    // common TableReader DeletePredicate path. Keep the merged rows in a member vector because
+    // DeletePredicate stores a reference to the vector used by _delete_rows.
+    _position_delete_rows_storage.insert(_position_delete_rows_storage.end(),
+                                         position_delete_rows.begin(), position_delete_rows.end());
+    std::sort(_position_delete_rows_storage.begin(), _position_delete_rows_storage.end());
+    _position_delete_rows_storage.erase(
+            std::unique(_position_delete_rows_storage.begin(), _position_delete_rows_storage.end()),
+            _position_delete_rows_storage.end());
+    _delete_rows = &_position_delete_rows_storage;
+    return Status::OK();
+}
+
+Status IcebergTableReader::_init_equality_delete_predicates(
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    TFileScanRangeParams delete_scan_params =
+            _scan_params == nullptr ? TFileScanRangeParams() : *_scan_params;
+    IcebergDeleteFileIOContext delete_io_ctx(_runtime_state);
+    for (const auto& delete_file : delete_files) {
+        RETURN_IF_ERROR(_read_parquet_equality_delete_file(delete_file, delete_scan_params,
+                                                           &delete_io_ctx));
+    }
+    return Status::OK();
+}
+
+Status IcebergTableReader::_read_parquet_equality_delete_file(
+        const TIcebergDeleteFileDesc& delete_file, const TFileScanRangeParams& scan_params,
+        IcebergDeleteFileIOContext* delete_io_ctx) {
+    if (!delete_file.__isset.file_format) {
+        return Status::InternalError("Iceberg equality delete file is missing file format");
+    }
+    if (delete_file.file_format != TFileFormatType::FORMAT_PARQUET) {
+        return Status::NotSupported("Unsupported Iceberg equality delete file format {}",
+                                    delete_file.file_format);
+    }
+    if (!delete_file.__isset.field_ids || delete_file.field_ids.empty()) {
+        return Status::InternalError("Iceberg equality delete file is missing field ids");
+    }
+
+    auto delete_range = build_iceberg_delete_file_range(delete_file.path);
+    if (_current_task != nullptr && _current_task->data_file != nullptr &&
+        !_current_task->data_file->fs_name.empty()) {
+        delete_range.__set_fs_name(_current_task->data_file->fs_name);
+    }
+    auto system_properties = _delete_file_system_properties(scan_params);
+    auto file_description = _delete_file_description(delete_range);
+    std::shared_ptr<io::IOContext> io_ctx(&delete_io_ctx->io_ctx, [](io::IOContext*) {});
+    format::parquet::ParquetReader reader(system_properties, file_description, io_ctx,
+                                          _scanner_profile);
+    RETURN_IF_ERROR(reader.init(_runtime_state));
+
+    std::vector<format::ColumnDefinition> schema;
+    RETURN_IF_ERROR(reader.get_schema(&schema));
+    std::vector<format::ColumnDefinition> delete_fields;
+    std::vector<int> delete_field_ids;
+    std::vector<DataTypePtr> delete_key_types;
+    for (const auto field_id : delete_file.field_ids) {
+        auto field_it = std::find_if(schema.begin(), schema.end(),
+                                     [field_id](const format::ColumnDefinition& field) {
+                                         return field.has_identifier_field_id() &&
+                                                field_id == field.get_identifier_field_id();
+                                     });
+        if (field_it == schema.end()) {
+            return Status::InternalError("Can not find field id {} in equality delete file {}",
+                                         field_id, delete_file.path);
+        }
+        if (!field_it->children.empty()) {
+            return Status::NotSupported(
+                    "Iceberg equality delete does not support complex column {}", field_it->name);
+        }
+        delete_fields.push_back(*field_it);
+        delete_field_ids.push_back(field_id);
+        delete_key_types.push_back(field_it->type);
+    }
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    for (size_t idx = 0; idx < delete_fields.size(); ++idx) {
+        const auto local_column_id = format::LocalColumnId(delete_fields[idx].file_local_id());
+        request->non_predicate_columns.push_back(
+                format::LocalColumnIndex::top_level(local_column_id));
+        request->local_positions.emplace(local_column_id, format::LocalIndex(idx));
+    }
+    RETURN_IF_ERROR(reader.open(request));
+
+    auto build_equality_delete_block =
+            [](const std::vector<format::ColumnDefinition> fields) -> Block {
+        Block block;
+        for (const auto& field : fields) {
+            block.insert({field.type->create_column(), field.type, field.name});
+        }
+        return block;
+    };
+    Block delete_block = build_equality_delete_block(delete_fields);
+    MutableBlock mutable_delete_block(std::move(delete_block));
+    bool eof = false;
+    while (!eof) {
+        Block block = build_equality_delete_block(delete_fields);
+        size_t read_rows = 0;
+        RETURN_IF_ERROR(reader.get_block(&block, &read_rows, &eof));
+        if (read_rows > 0) {
+            RETURN_IF_ERROR(mutable_delete_block.merge(block));
+        }
+    }
+    RETURN_IF_ERROR(reader.close());
+    delete_block = mutable_delete_block.to_block();
+    _equality_delete_filters.push_back(
+            EqualityDeleteFilter {.field_ids = std::move(delete_field_ids),
+                                  .key_types = std::move(delete_key_types),
+                                  .delete_block = std::move(delete_block)});
+    return Status::OK();
+}
+
+Status IcebergTableReader::_materialize_row_lineage_row_id(Block* table_block, size_t column_idx) {
+    if (_row_lineage_columns.first_row_id < 0) {
+        return Status::OK();
+    }
+    DORIS_CHECK(_row_position_block_position < _data_reader.block_template.columns());
+    const auto& row_position_column = assert_cast<const ColumnInt64&>(
+            *_data_reader.block_template.get_by_position(_row_position_block_position).column);
+    DORIS_CHECK(row_position_column.size() == table_block->rows());
+    auto column = IColumn::mutate(
+            table_block->get_by_position(column_idx).column->convert_to_full_column_if_const());
+    auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
+    auto& null_map = nullable_column->get_null_map_data();
+    auto& data = assert_cast<ColumnInt64&>(*nullable_column->get_nested_column_ptr()).get_data();
+    DORIS_CHECK(null_map.size() == row_position_column.size());
+    DORIS_CHECK(data.size() == row_position_column.size());
+    for (size_t row = 0; row < row_position_column.size(); ++row) {
+        if (null_map[row]) {
+            null_map[row] = 0;
+            data[row] = _row_lineage_columns.first_row_id + row_position_column.get_element(row);
+        }
+    }
+    table_block->replace_by_position(column_idx, std::move(column));
+    return Status::OK();
+}
+
+Status IcebergTableReader::_materialize_iceberg_rowid(Block* table_block, size_t column_idx) {
+    DORIS_CHECK(_row_position_block_position < _data_reader.block_template.columns());
+    const auto& row_position_column = assert_cast<const ColumnInt64&>(
+            *_data_reader.block_template.get_by_position(_row_position_block_position).column);
+    DORIS_CHECK(row_position_column.size() == table_block->rows());
+
+    const auto& type = table_block->get_by_position(column_idx).type;
+    auto column = type->create_column();
+    auto* nullable_column = check_and_get_column<ColumnNullable>(column.get());
+    auto* struct_column = nullable_column != nullptr
+                                  ? check_and_get_column<ColumnStruct>(
+                                            nullable_column->get_nested_column_ptr().get())
+                                  : check_and_get_column<ColumnStruct>(column.get());
+    DORIS_CHECK(struct_column != nullptr);
+    DORIS_CHECK(struct_column->tuple_size() >= 4);
+
+    const auto rows = row_position_column.size();
+    const auto file_path = _data_file_path();
+    const int32_t partition_spec_id =
+            _iceberg_params.has_value() && _iceberg_params->__isset.partition_spec_id
+                    ? _iceberg_params->partition_spec_id
+                    : 0;
+    const std::string partition_data_json =
+            _iceberg_params.has_value() && _iceberg_params->__isset.partition_data_json
+                    ? _iceberg_params->partition_data_json
+                    : "";
+
+    auto& file_path_column = struct_column->get_column(0);
+    auto& row_pos_column = struct_column->get_column(1);
+    auto& spec_id_column = struct_column->get_column(2);
+    auto& partition_data_column = struct_column->get_column(3);
+    file_path_column.reserve(rows);
+    row_pos_column.reserve(rows);
+    spec_id_column.reserve(rows);
+    partition_data_column.reserve(rows);
+    for (size_t row = 0; row < rows; ++row) {
+        file_path_column.insert_data(file_path.data(), file_path.size());
+        const int64_t row_pos = row_position_column.get_element(row);
+        row_pos_column.insert_data(reinterpret_cast<const char*>(&row_pos), sizeof(row_pos));
+        spec_id_column.insert_data(reinterpret_cast<const char*>(&partition_spec_id),
+                                   sizeof(partition_spec_id));
+        partition_data_column.insert_data(partition_data_json.data(), partition_data_json.size());
+    }
+    if (nullable_column != nullptr) {
+        nullable_column->get_null_map_data().resize_fill(rows, 0);
+    }
+    table_block->replace_by_position(column_idx, std::move(column));
+    return Status::OK();
+}
+
+Status IcebergTableReader::_materialize_row_lineage_last_updated_sequence_number(
+        Block* table_block, size_t column_idx) {
+    if (_row_lineage_columns.last_updated_sequence_number < 0) {
+        return Status::OK();
+    }
+    auto column = IColumn::mutate(
+            table_block->get_by_position(column_idx).column->convert_to_full_column_if_const());
+    auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
+    auto& null_map = nullable_column->get_null_map_data();
+    auto& data = assert_cast<ColumnInt64&>(*nullable_column->get_nested_column_ptr()).get_data();
+    DORIS_CHECK(null_map.size() == table_block->rows());
+    DORIS_CHECK(data.size() == table_block->rows());
+    for (size_t row = 0; row < table_block->rows(); ++row) {
+        if (null_map[row]) {
+            null_map[row] = 0;
+            data[row] = _row_lineage_columns.last_updated_sequence_number;
+        }
+    }
+    table_block->replace_by_position(column_idx, std::move(column));
+    return Status::OK();
+}
+
+bool IcebergTableReader::_need_row_lineage_row_id() const {
+    if (_data_reader.column_mapper != nullptr) {
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            if (mapping.virtual_column_type == format::TableVirtualColumnType::ROW_ID) {
+                return true;
+            }
+        }
+    }
+    return std::ranges::any_of(_projected_columns, is_projected_row_lineage_row_id);
+}
+
+bool IcebergTableReader::_need_iceberg_rowid() const {
+    if (_data_reader.column_mapper != nullptr) {
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            if (mapping.virtual_column_type == format::TableVirtualColumnType::ICEBERG_ROWID) {
+                return true;
+            }
+        }
+    }
+    return std::ranges::any_of(_projected_columns, is_projected_iceberg_rowid);
+}
+
+} // namespace doris::format::iceberg
diff --git a/be/src/format_v2/table/iceberg_reader.h b/be/src/format_v2/table/iceberg_reader.h
new file mode 100644
index 00000000000000..1a2811ef968277
--- /dev/null
+++ b/be/src/format_v2/table/iceberg_reader.h
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "format/table/iceberg_delete_file_reader_helper.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris {
+class Block;
+struct DeleteFileDesc;
+namespace io {
+struct FileDescription;
+struct FileSystemProperties;
+} // namespace io
+} // namespace doris
+
+namespace doris::format::iceberg {
+
+// Iceberg table-level reader.
+// It reuses TableReader for split orchestration, dynamic partition pruning and table-block
+// finalization, while composing a FileReader for physical data-file reads instead of inheriting
+// from a concrete file-format reader.
+class IcebergTableReader : public format::TableReader {
+public:
+    ~IcebergTableReader() override = default;
+    Status init(format::TableReadOptions&& options) override {
+        RETURN_IF_ERROR(format::TableReader::init(std::move(options)));
+        _mapper_options.mode = format::TableColumnMappingMode::BY_FIELD_ID;
+        return Status::OK();
+    }
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+    std::string debug_string() const override;
+    format::TableColumnMappingMode mapping_mode() const override {
+        return !_data_reader.file_schema.empty() && _has_field_id(_data_reader.file_schema)
+                       ? format::TableColumnMappingMode::BY_FIELD_ID
+                       : format::TableColumnMappingMode::BY_NAME;
+    }
+
+protected:
+    Status materialize_virtual_columns(Block* table_block) override;
+
+    Status customize_file_scan_request(format::FileScanRequest* file_request) override;
+
+    bool _supports_aggregate_pushdown(TPushAggOp::type agg_type) const override;
+
+    Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc,
+                                       bool* has_delete_file) override;
+
+    Status _init_delete_predicates(const TTableFormatFileDesc& t_desc);
+
+private:
+    bool _has_field_id(const std::vector<format::ColumnDefinition>& schema) const {
+        for (const auto& field : schema) {
+            // TopN lazy materialization asks the file reader to synthesize GLOBAL_ROWID in the
+            // first-phase scan. That virtual column is not an Iceberg data field and therefore has
+            // no Iceberg field id. Do not let it downgrade schema-evolution reads to BY_NAME,
+            // otherwise old data files whose physical names predate a rename (for example,
+            // table column `new_new_id` stored as file column `id`) are materialized as defaults.
+            if (field.column_type != format::ColumnType::DATA_COLUMN) {
+                continue;
+            }
+            if (!field.has_identifier_field_id()) {
+                return false;
+            }
+            if (!_has_field_id(field.children)) {
+                return false;
+            }
+        }
+        return true;
+    }
+    static constexpr int MIN_SUPPORT_DELETE_FILES_VERSION = 2;
+    static constexpr int POSITION_DELETE = 1;
+    static constexpr int EQUALITY_DELETE = 2;
+    static constexpr int DELETION_VECTOR = 3;
+
+    struct RowLineageColumns {
+        int64_t first_row_id = -1;
+        int64_t last_updated_sequence_number = -1;
+    };
+
+    static constexpr const char* ICEBERG_FILE_PATH = "file_path";
+    static constexpr const char* ICEBERG_ROW_POS = "pos";
+    static constexpr size_t ICEBERG_FILE_PATH_BLOCK_POSITION = 0;
+    static constexpr size_t ICEBERG_ROW_POS_BLOCK_POSITION = 1;
+
+    class PositionDeleteRowsCollector final {
+    public:
+        PositionDeleteRowsCollector(std::string data_file_path, format::DeleteRows* rows);
+
+        Status collect(const Block& block, size_t read_rows);
+
+    private:
+        std::string _data_file_path;
+        format::DeleteRows* _rows = nullptr;
+    };
+
+    static std::string _iceberg_delete_vector_cache_key(const TIcebergDeleteFileDesc& delete_file);
+
+    static std::shared_ptr<io::FileSystemProperties> _delete_file_system_properties(
+            const TFileScanRangeParams& scan_params);
+
+    static std::unique_ptr<io::FileDescription> _delete_file_description(
+            const TFileRangeDesc& range);
+
+    std::string _data_file_path() const;
+
+    // Append row position column to file scan request for position delete handling.
+    Status _append_row_position_output_column(format::FileScanRequest* request);
+    // Append equality delete predicates to file scan request based on the delete files in iceberg
+    // params. DeleteVector and position delete files use the common DeleteRows path in TableReader.
+    Status _append_equality_delete_predicates(format::FileScanRequest* request);
+
+    Status _init_equality_delete_predicates(
+            const std::vector<TIcebergDeleteFileDesc>& delete_files);
+
+    // Read equality/position delete files.
+    Status _read_parquet_equality_delete_file(const TIcebergDeleteFileDesc& delete_file,
+                                              const TFileScanRangeParams& scan_params,
+                                              IcebergDeleteFileIOContext* delete_io_ctx);
+    Status _read_parquet_position_delete_file(const TIcebergDeleteFileDesc& delete_file,
+                                              const TFileScanRangeParams& scan_params,
+                                              IcebergDeleteFileIOContext* delete_io_ctx,
+                                              PositionDeleteRowsCollector* collector);
+
+    // Read position delete files and collect deleted row positions to update DeletePredicate.
+    Status _init_position_delete_rows(const std::vector<TIcebergDeleteFileDesc>& delete_files);
+
+    // Materialize row lineage virtual columns based on the position delete file.
+    Status _materialize_iceberg_rowid(Block* table_block, size_t column_idx);
+    Status _materialize_row_lineage_row_id(Block* table_block, size_t column_idx);
+    Status _materialize_row_lineage_last_updated_sequence_number(Block* table_block,
+                                                                 size_t column_idx);
+
+    RowLineageColumns _row_lineage_columns;
+    size_t _row_position_block_position = 0;
+    std::optional<TIcebergFileDesc> _iceberg_params;
+    bool _delete_predicates_initialized = false;
+    format::DeleteRows _position_delete_rows_storage;
+    struct EqualityDeleteFilter {
+        std::vector<int> field_ids;
+        std::vector<DataTypePtr> key_types;
+        Block delete_block;
+    };
+    std::vector<EqualityDeleteFilter> _equality_delete_filters;
+
+    bool _need_row_lineage_row_id() const;
+    bool _need_iceberg_rowid() const;
+};
+
+} // namespace doris::format::iceberg
diff --git a/be/src/format_v2/table/paimon_reader.cpp b/be/src/format_v2/table/paimon_reader.cpp
new file mode 100644
index 00000000000000..9f1bc797dd5ec7
--- /dev/null
+++ b/be/src/format_v2/table/paimon_reader.cpp
@@ -0,0 +1,188 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/paimon_reader.h"
+
+#include <cstring>
+#include <string>
+#include <utility>
+
+#include "exprs/vexpr_context.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/jni/paimon_jni_reader.h"
+#include "format_v2/table/schema_history_util.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::paimon {
+
+Status PaimonReader::prepare_split(const format::SplitReadOptions& options) {
+    _split_schema_id = -1;
+    const auto& paimon_params = options.current_range.table_format_params.paimon_params;
+    if (paimon_params.__isset.schema_id) {
+        _split_schema_id = paimon_params.schema_id;
+    }
+    return format::TableReader::prepare_split(options);
+}
+
+format::TableColumnMappingMode PaimonReader::mapping_mode() const {
+    return format::can_map_by_history_schema(_scan_params, _split_schema_id)
+                   ? format::TableColumnMappingMode::BY_FIELD_ID
+                   : format::TableColumnMappingMode::BY_NAME;
+}
+
+Status PaimonReader::annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
+    DORIS_CHECK(file_schema != nullptr);
+    if (mapping_mode() != format::TableColumnMappingMode::BY_FIELD_ID) {
+        return Status::OK();
+    }
+    return format::annotate_file_schema_from_history(_scan_params, _split_schema_id, file_schema);
+}
+
+Status PaimonReader::_parse_deletion_vector_file(const TTableFormatFileDesc& t_desc,
+                                                 DeleteFileDesc* desc, bool* has_delete_file) {
+    DORIS_CHECK(desc != nullptr);
+    DORIS_CHECK(has_delete_file != nullptr);
+    *has_delete_file = false;
+    const auto& table_desc = t_desc.paimon_params;
+    if (!table_desc.__isset.deletion_file) {
+        return Status::OK();
+    }
+    const auto& deletion_file = table_desc.deletion_file;
+
+    const std::string key_prefix = "paimon_dv:";
+    desc->key.resize(key_prefix.size() + deletion_file.path.size() + sizeof(deletion_file.offset));
+    char* key_data = desc->key.data();
+    memcpy(key_data, key_prefix.data(), key_prefix.size());
+    key_data += key_prefix.size();
+    memcpy(key_data, deletion_file.path.data(), deletion_file.path.size());
+    key_data += deletion_file.path.size();
+    memcpy(key_data, &deletion_file.offset, sizeof(deletion_file.offset));
+    desc->path = deletion_file.path;
+    desc->start_offset = deletion_file.offset;
+    desc->size = deletion_file.length + 4;
+    desc->file_size = -1;
+    desc->format = DeleteFileDesc::Format::PAIMON;
+    *has_delete_file = true;
+    return Status::OK();
+}
+
+Status PaimonHybridReader::init(format::TableReadOptions&& options) {
+    return format::TableReader::init(std::move(options));
+}
+
+Status PaimonHybridReader::prepare_split(const format::SplitReadOptions& options) {
+    RETURN_IF_ERROR(_ensure_current_split_reader(options));
+    DORIS_CHECK(_current_split_reader != nullptr);
+    return _current_split_reader->prepare_split(options);
+}
+
+Status PaimonHybridReader::get_block(Block* block, bool* eos) {
+    DORIS_CHECK(_current_split_reader != nullptr);
+    return _current_split_reader->get_block(block, eos);
+}
+
+Status PaimonHybridReader::close() {
+    Status close_status = Status::OK();
+    if (_native_reader != nullptr) {
+        close_status = _native_reader->close();
+    }
+    if (_jni_reader != nullptr) {
+        auto status = _jni_reader->close();
+        if (!status.ok() && close_status.ok()) {
+            close_status = std::move(status);
+        }
+    }
+    _current_split_reader = nullptr;
+    return close_status;
+}
+
+Status PaimonHybridReader::_ensure_current_split_reader(const format::SplitReadOptions& options) {
+    if (_is_jni_split(options.current_range)) {
+        if (_jni_reader == nullptr) {
+            _jni_reader = std::make_unique<format::paimon::PaimonJniReader>();
+            RETURN_IF_ERROR(_init_child_reader(_jni_reader.get(), format::FileFormat::JNI));
+        }
+        _current_split_reader = _jni_reader.get();
+    } else {
+        format::FileFormat file_format;
+        RETURN_IF_ERROR(_to_file_format(options.current_range, &file_format));
+        if (_native_reader == nullptr) {
+            _native_reader = format::paimon::PaimonReader::create_unique();
+            RETURN_IF_ERROR(_init_child_reader(_native_reader.get(), file_format));
+        }
+        _current_split_reader = _native_reader.get();
+    }
+    return Status::OK();
+}
+
+Status PaimonHybridReader::_init_child_reader(format::TableReader* reader,
+                                              format::FileFormat file_format) {
+    DORIS_CHECK(reader != nullptr);
+    VExprContextSPtrs conjuncts;
+    RETURN_IF_ERROR(_clone_conjuncts(&conjuncts));
+    return reader->init({
+            .projected_columns = _projected_columns,
+            .column_predicates = _table_column_predicates,
+            .conjuncts = std::move(conjuncts),
+            .format = file_format,
+            .scan_params = _scan_params,
+            .io_ctx = _io_ctx,
+            .runtime_state = _runtime_state,
+            .scanner_profile = _scanner_profile,
+            .push_down_agg_type = _push_down_agg_type,
+            .condition_cache_digest = _condition_cache_digest,
+    });
+}
+
+Status PaimonHybridReader::_clone_conjuncts(VExprContextSPtrs* conjuncts) const {
+    DORIS_CHECK(conjuncts != nullptr);
+    conjuncts->clear();
+    conjuncts->reserve(_conjuncts.size());
+    for (const auto& conjunct : _conjuncts) {
+        VExprSPtr root;
+        RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root));
+        conjuncts->push_back(VExprContext::create_shared(std::move(root)));
+    }
+    return Status::OK();
+}
+
+bool PaimonHybridReader::_is_jni_split(const TFileRangeDesc& range) {
+    return range.__isset.table_format_params && range.table_format_params.__isset.paimon_params &&
+           range.table_format_params.paimon_params.__isset.reader_type &&
+           range.table_format_params.paimon_params.reader_type == TPaimonReaderType::PAIMON_JNI;
+}
+
+Status PaimonHybridReader::_to_file_format(const TFileRangeDesc& range,
+                                           format::FileFormat* file_format) {
+    DORIS_CHECK(file_format != nullptr);
+    const auto format_type =
+            range.__isset.format_type ? range.format_type : TFileFormatType::FORMAT_PARQUET;
+    switch (format_type) {
+    case TFileFormatType::FORMAT_PARQUET:
+        *file_format = format::FileFormat::PARQUET;
+        return Status::OK();
+    case TFileFormatType::FORMAT_ORC:
+        *file_format = format::FileFormat::ORC;
+        return Status::OK();
+    default:
+        return Status::NotSupported("Unsupported native Paimon file format {}",
+                                    to_string(format_type));
+    }
+}
+
+} // namespace doris::format::paimon
diff --git a/be/src/format_v2/table/paimon_reader.h b/be/src/format_v2/table/paimon_reader.h
new file mode 100644
index 00000000000000..200c4e885b5055
--- /dev/null
+++ b/be/src/format_v2/table/paimon_reader.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "format_v2/table_reader.h"
+
+namespace doris {
+struct DeleteFileDesc;
+}
+namespace doris::format::paimon {
+
+class PaimonReader final : public format::TableReader {
+public:
+    ENABLE_FACTORY_CREATOR(PaimonReader);
+    ~PaimonReader() final = default;
+    Status prepare_split(const format::SplitReadOptions& options) override;
+
+#ifdef BE_TEST
+    void TEST_set_scan_params(TFileScanRangeParams* params) { _scan_params = params; }
+    format::TableColumnMappingMode TEST_mapping_mode() const { return mapping_mode(); }
+    Status TEST_annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
+        return annotate_file_schema(file_schema);
+    }
+#endif
+
+protected:
+    format::TableColumnMappingMode mapping_mode() const override;
+    Status annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) override;
+
+    Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc,
+                                       bool* has_delete_file) override;
+
+private:
+    int64_t _split_schema_id = -1;
+};
+
+// Paimon scans can contain both native data-file splits and serialized JNI splits in the same
+// SplitSource. FileScannerV2 owns one table reader for the scanner lifetime, so this reader keeps
+// native and JNI child readers internally and dispatches each split to the matching child reader.
+class PaimonHybridReader final : public format::TableReader {
+public:
+    ~PaimonHybridReader() override = default;
+
+    Status init(format::TableReadOptions&& options) override;
+    Status prepare_split(const format::SplitReadOptions& options) override;
+    Status get_block(Block* block, bool* eos) override;
+    Status close() override;
+
+#ifdef BE_TEST
+    static bool TEST_is_jni_split(const TFileRangeDesc& range) { return _is_jni_split(range); }
+    static Status TEST_to_file_format(const TFileRangeDesc& range,
+                                      format::FileFormat* file_format) {
+        return _to_file_format(range, file_format);
+    }
+#endif
+
+private:
+    Status _ensure_current_split_reader(const format::SplitReadOptions& options);
+    Status _init_child_reader(format::TableReader* reader, format::FileFormat file_format);
+    Status _clone_conjuncts(VExprContextSPtrs* conjuncts) const;
+    static bool _is_jni_split(const TFileRangeDesc& range);
+    static Status _to_file_format(const TFileRangeDesc& range, format::FileFormat* file_format);
+
+    std::unique_ptr<format::TableReader> _native_reader; // handle parquet/orc native splits
+    std::unique_ptr<format::TableReader> _jni_reader;    // handle serialized JNI splits
+    format::TableReader* _current_split_reader = nullptr;
+};
+
+} // namespace doris::format::paimon
diff --git a/be/src/format_v2/table/schema_history_util.cpp b/be/src/format_v2/table/schema_history_util.cpp
new file mode 100644
index 00000000000000..10109839e6987d
--- /dev/null
+++ b/be/src/format_v2/table/schema_history_util.cpp
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/schema_history_util.h"
+
+#include <algorithm>
+#include <ranges>
+#include <string>
+
+#include "core/field.h"
+#include "util/string_util.h"
+
+namespace doris::format {
+namespace {
+
+const schema::external::TField* get_field_ptr(const schema::external::TFieldPtr& field_ptr) {
+    if (!field_ptr.__isset.field_ptr || field_ptr.field_ptr == nullptr) {
+        return nullptr;
+    }
+    return field_ptr.field_ptr.get();
+}
+
+const schema::external::TField* find_child_field_by_name(
+        const std::vector<schema::external::TFieldPtr>& fields, const std::string& name) {
+    for (const auto& field_ptr : fields) {
+        const auto* field = get_field_ptr(field_ptr);
+        if (field == nullptr) {
+            continue;
+        }
+        if (field->__isset.name && to_lower(field->name) == to_lower(name)) {
+            return field;
+        }
+        if (field->__isset.name_mapping &&
+            std::ranges::any_of(field->name_mapping, [&](const std::string& alias) {
+                return to_lower(alias) == to_lower(name);
+            })) {
+            return field;
+        }
+    }
+    return nullptr;
+}
+
+void annotate_column_from_field(ColumnDefinition* column, const schema::external::TField& field);
+
+void annotate_struct_children(ColumnDefinition* column,
+                              const schema::external::TStructField& struct_field) {
+    DORIS_CHECK(column != nullptr);
+    if (!struct_field.__isset.fields) {
+        return;
+    }
+    for (auto& child : column->children) {
+        const auto* child_field = find_child_field_by_name(struct_field.fields, child.name);
+        if (child_field != nullptr) {
+            annotate_column_from_field(&child, *child_field);
+        }
+    }
+}
+
+void annotate_column_from_field(ColumnDefinition* column, const schema::external::TField& field) {
+    DORIS_CHECK(column != nullptr);
+    if (field.__isset.id) {
+        column->identifier = Field::create_field<TYPE_INT>(field.id);
+    }
+    column->name_mapping =
+            field.__isset.name_mapping ? field.name_mapping : std::vector<std::string> {};
+    if (!field.__isset.nestedField) {
+        return;
+    }
+    if (field.nestedField.__isset.struct_field) {
+        annotate_struct_children(column, field.nestedField.struct_field);
+    } else if (field.nestedField.__isset.array_field) {
+        if (column->children.empty() || !field.nestedField.array_field.__isset.item_field) {
+            return;
+        }
+        const auto* item_field = get_field_ptr(field.nestedField.array_field.item_field);
+        if (item_field != nullptr) {
+            annotate_column_from_field(&column->children.front(), *item_field);
+        }
+    } else if (field.nestedField.__isset.map_field) {
+        if (!column->children.empty() && field.nestedField.map_field.__isset.key_field) {
+            const auto* key_field = get_field_ptr(field.nestedField.map_field.key_field);
+            if (key_field != nullptr) {
+                annotate_column_from_field(&column->children.front(), *key_field);
+            }
+        }
+        if (column->children.size() > 1 && field.nestedField.map_field.__isset.value_field) {
+            const auto* value_field = get_field_ptr(field.nestedField.map_field.value_field);
+            if (value_field != nullptr) {
+                annotate_column_from_field(&column->children[1], *value_field);
+            }
+        }
+    }
+}
+
+} // namespace
+
+const schema::external::TSchema* find_history_schema(const TFileScanRangeParams* params,
+                                                     int64_t schema_id) {
+    if (params == nullptr || !params->__isset.history_schema_info) {
+        return nullptr;
+    }
+    for (const auto& schema : params->history_schema_info) {
+        if (schema.__isset.schema_id && schema.schema_id == schema_id) {
+            return &schema;
+        }
+    }
+    return nullptr;
+}
+
+bool can_map_by_history_schema(const TFileScanRangeParams* params, int64_t split_schema_id) {
+    if (split_schema_id < 0 || params == nullptr || !params->__isset.current_schema_id ||
+        !params->__isset.history_schema_info) {
+        return false;
+    }
+    return find_history_schema(params, split_schema_id) != nullptr;
+}
+
+Status annotate_file_schema_from_history(const TFileScanRangeParams* params,
+                                         int64_t split_schema_id,
+                                         std::vector<ColumnDefinition>* file_schema) {
+    DORIS_CHECK(file_schema != nullptr);
+    const auto* schema = find_history_schema(params, split_schema_id);
+    DORIS_CHECK(schema != nullptr);
+    if (!schema->__isset.root_field || !schema->root_field.__isset.fields) {
+        return Status::OK();
+    }
+    for (auto& column : *file_schema) {
+        const auto* field = find_child_field_by_name(schema->root_field.fields, column.name);
+        if (field != nullptr) {
+            annotate_column_from_field(&column, *field);
+        }
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/table/schema_history_util.h b/be/src/format_v2/table/schema_history_util.h
new file mode 100644
index 00000000000000..3c4a80b5d4c975
--- /dev/null
+++ b/be/src/format_v2/table/schema_history_util.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/ExternalTableSchema_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format {
+
+const schema::external::TSchema* find_history_schema(const TFileScanRangeParams* params,
+                                                     int64_t schema_id);
+
+bool can_map_by_history_schema(const TFileScanRangeParams* params, int64_t split_schema_id);
+
+// Annotate a file-local schema with the field ids and name mappings from the historical table
+// schema that describes the current split. TableReader has already annotated projected table
+// columns from current_schema_id; this function performs the symmetric annotation for the file
+// schema so TableColumnMapper can match evolved Hudi/Paimon files by field id.
+Status annotate_file_schema_from_history(const TFileScanRangeParams* params,
+                                         int64_t split_schema_id,
+                                         std::vector<ColumnDefinition>* file_schema);
+
+} // namespace doris::format
diff --git a/be/src/format_v2/table_reader.cpp b/be/src/format_v2/table_reader.cpp
new file mode 100644
index 00000000000000..d90d4f6ea337d1
--- /dev/null
+++ b/be/src/format_v2/table_reader.cpp
@@ -0,0 +1,832 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table_reader.h"
+
+#include <gen_cpp/ExternalTableSchema_types.h>
+#include <gen_cpp/PlanNodes_types.h>
+#include <gen_cpp/Types_types.h>
+
+#include <algorithm>
+#include <cstring>
+#include <ranges>
+#include <set>
+#include <sstream>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+#include "common/cast_set.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_struct.h"
+#include "exec/common/endian.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vslot_ref.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/delimited_text/csv_reader.h"
+#include "format_v2/delimited_text/text_reader.h"
+#include "format_v2/json/json_reader.h"
+#include "format_v2/parquet/parquet_reader.h"
+#include "roaring/roaring64map.hh"
+#include "storage/segment/condition_cache.h"
+#include "util/string_util.h"
+
+namespace doris::format {
+namespace {
+
+template <typename T, typename Formatter>
+std::string join_table_reader_debug_strings(const std::vector<T>& values, Formatter formatter) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t i = 0; i < values.size(); ++i) {
+        if (i > 0) {
+            out << ", ";
+        }
+        out << formatter(values[i]);
+    }
+    out << "]";
+    return out.str();
+}
+
+std::string file_format_to_string(FileFormat format) {
+    switch (format) {
+    case FileFormat::PARQUET:
+        return "PARQUET";
+    case FileFormat::ORC:
+        return "ORC";
+    case FileFormat::CSV:
+        return "CSV";
+    case FileFormat::JSON:
+        return "JSON";
+    case FileFormat::TEXT:
+        return "TEXT";
+    case FileFormat::JNI:
+        return "JNI";
+    }
+    return "UNKNOWN";
+}
+
+std::string push_down_agg_to_string(TPushAggOp::type op) {
+    switch (op) {
+    case TPushAggOp::NONE:
+        return "NONE";
+    case TPushAggOp::COUNT:
+        return "COUNT";
+    case TPushAggOp::MINMAX:
+        return "MINMAX";
+    case TPushAggOp::MIX:
+        return "MIX";
+    case TPushAggOp::COUNT_ON_INDEX:
+        return "COUNT_ON_INDEX";
+    }
+    return "UNKNOWN";
+}
+
+std::string current_file_debug_string(const std::unique_ptr<ScanTask>& task) {
+    if (task == nullptr || task->data_file == nullptr) {
+        return "null";
+    }
+    const auto& file = *task->data_file;
+    std::ostringstream out;
+    out << "FileDescription{path=" << file.path << ", file_size=" << file.file_size
+        << ", range_start_offset=" << file.range_start_offset << ", range_size=" << file.range_size
+        << ", mtime=" << file.mtime << ", fs_name=" << file.fs_name
+        << ", file_cache_admission=" << file.file_cache_admission << "}";
+    return out.str();
+}
+
+std::string partition_values_debug_string(const std::map<std::string, Field>& partition_values) {
+    std::ostringstream out;
+    out << "{";
+    size_t idx = 0;
+    for (const auto& [key, _] : partition_values) {
+        if (idx++ > 0) {
+            out << ", ";
+        }
+        out << key;
+    }
+    out << "}";
+    return out.str();
+}
+
+const schema::external::TField* get_field_ptr(const schema::external::TFieldPtr& field_ptr) {
+    if (!field_ptr.__isset.field_ptr || field_ptr.field_ptr == nullptr) {
+        return nullptr;
+    }
+    return field_ptr.field_ptr.get();
+}
+
+bool external_field_matches_name(const schema::external::TField& field, const std::string& name) {
+    if (field.__isset.name && to_lower(field.name) == to_lower(name)) {
+        return true;
+    }
+    return field.__isset.name_mapping &&
+           std::ranges::any_of(field.name_mapping, [&](const std::string& alias) {
+               return to_lower(alias) == to_lower(name);
+           });
+}
+
+DataTypePtr find_struct_child_type_by_name(const DataTypeStruct& struct_type,
+                                           const std::string& field_name) {
+    for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) {
+        if (to_lower(struct_type.get_element_name(field_idx)) == to_lower(field_name)) {
+            return struct_type.get_element(field_idx);
+        }
+    }
+    return nullptr;
+}
+
+ColumnDefinition build_schema_column_from_external_field(const schema::external::TField& field,
+                                                         DataTypePtr type) {
+    ColumnDefinition column {
+            .identifier = field.__isset.id ? Field::create_field<TYPE_INT>(field.id) : Field {},
+            .name = field.__isset.name ? field.name : "",
+            .name_mapping =
+                    field.__isset.name_mapping ? field.name_mapping : std::vector<std::string> {},
+            .type = std::move(type),
+            .children = {},
+            .default_expr = nullptr,
+            .is_partition_key = false,
+    };
+    if (column.type == nullptr || !field.__isset.nestedField) {
+        return column;
+    }
+
+    const auto nested_type = remove_nullable(column.type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_STRUCT: {
+        if (!field.nestedField.__isset.struct_field ||
+            !field.nestedField.struct_field.__isset.fields) {
+            return column;
+        }
+        const auto& struct_type = assert_cast<const DataTypeStruct&>(*nested_type);
+        for (const auto& child_ptr : field.nestedField.struct_field.fields) {
+            const auto* child_field = get_field_ptr(child_ptr);
+            if (child_field == nullptr || !child_field->__isset.name) {
+                continue;
+            }
+            auto child_type = find_struct_child_type_by_name(struct_type, child_field->name);
+            if (child_type == nullptr) {
+                continue;
+            }
+            column.children.push_back(
+                    build_schema_column_from_external_field(*child_field, child_type));
+        }
+        break;
+    }
+    case TYPE_ARRAY: {
+        if (!field.nestedField.__isset.array_field ||
+            !field.nestedField.array_field.__isset.item_field) {
+            return column;
+        }
+        const auto* item_field = get_field_ptr(field.nestedField.array_field.item_field);
+        if (item_field == nullptr) {
+            return column;
+        }
+        const auto& array_type = assert_cast<const DataTypeArray&>(*nested_type);
+        auto child =
+                build_schema_column_from_external_field(*item_field, array_type.get_nested_type());
+        child.name = "element";
+        if (child.has_identifier_name()) {
+            child.identifier = Field::create_field<TYPE_STRING>(child.name);
+        }
+        column.children.push_back(std::move(child));
+        break;
+    }
+    case TYPE_MAP: {
+        if (!field.nestedField.__isset.map_field ||
+            !field.nestedField.map_field.__isset.key_field ||
+            !field.nestedField.map_field.__isset.value_field) {
+            return column;
+        }
+        const auto& map_type = assert_cast<const DataTypeMap&>(*nested_type);
+        const auto* key_field = get_field_ptr(field.nestedField.map_field.key_field);
+        if (key_field != nullptr) {
+            auto child =
+                    build_schema_column_from_external_field(*key_field, map_type.get_key_type());
+            child.name = "key";
+            if (child.has_identifier_name()) {
+                child.identifier = Field::create_field<TYPE_STRING>(child.name);
+            }
+            column.children.push_back(std::move(child));
+        }
+        const auto* value_field = get_field_ptr(field.nestedField.map_field.value_field);
+        if (value_field != nullptr) {
+            auto child = build_schema_column_from_external_field(*value_field,
+                                                                 map_type.get_value_type());
+            child.name = "value";
+            if (child.has_identifier_name()) {
+                child.identifier = Field::create_field<TYPE_STRING>(child.name);
+            }
+            column.children.push_back(std::move(child));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return column;
+}
+
+const schema::external::TField* find_external_root_field(const TFileScanRangeParams* params,
+                                                         const ColumnDefinition& column) {
+    if (params == nullptr || !params->__isset.history_schema_info ||
+        params->history_schema_info.empty()) {
+        return nullptr;
+    }
+    const auto* schema = &params->history_schema_info.front();
+    if (params->__isset.current_schema_id) {
+        for (const auto& candidate_schema : params->history_schema_info) {
+            if (candidate_schema.__isset.schema_id &&
+                candidate_schema.schema_id == params->current_schema_id) {
+                schema = &candidate_schema;
+                break;
+            }
+        }
+    }
+    if (!schema->__isset.root_field || !schema->root_field.__isset.fields) {
+        return nullptr;
+    }
+    for (const auto& field_ptr : schema->root_field.fields) {
+        const auto* field = get_field_ptr(field_ptr);
+        if (field == nullptr) {
+            continue;
+        }
+        if (external_field_matches_name(*field, column.name)) {
+            return field;
+        }
+    }
+    return nullptr;
+}
+
+std::string expr_context_debug_string(const VExprContextSPtr& context) {
+    if (context == nullptr) {
+        return "null";
+    }
+    const auto root = context->root();
+    if (root == nullptr) {
+        return "VExprContext{root=null}";
+    }
+    std::ostringstream out;
+    out << "VExprContext{root_name=" << root->expr_name() << ", root_debug=" << root->debug_string()
+        << "}";
+    return out.str();
+}
+
+std::string table_filter_debug_string(const TableFilter& filter) {
+    std::ostringstream out;
+    out << "TableFilter{conjunct=" << expr_context_debug_string(filter.conjunct)
+        << ", global_indices="
+        << join_table_reader_debug_strings(
+                   filter.global_indices,
+                   [](GlobalIndex global_index) { return std::to_string(global_index.value()); })
+        << "}";
+    return out.str();
+}
+
+std::string table_column_predicates_debug_string(const TableColumnPredicates& predicates) {
+    std::ostringstream out;
+    out << "{";
+    size_t idx = 0;
+    for (const auto& [global_index, column_predicates] : predicates) {
+        if (idx++ > 0) {
+            out << ", ";
+        }
+        out << global_index.value() << ":{predicate_count=" << column_predicates.size() << "}";
+    }
+    out << "}";
+    return out.str();
+}
+
+bool contains_runtime_filter(const VExprContextSPtrs& conjuncts) {
+    return std::ranges::any_of(conjuncts, [](const auto& conjunct) {
+        return conjunct != nullptr && conjunct->root() != nullptr &&
+               conjunct->root()->is_rf_wrapper();
+    });
+}
+
+void collect_global_indices(const VExprSPtr& expr, std::set<GlobalIndex>* global_indices) {
+    if (expr == nullptr) {
+        return;
+    }
+    if (expr->is_rf_wrapper()) {
+        // RuntimeFilterExpr wraps a real predicate expression but its own thrift node can still
+        // look like SLOT_REF. Collect indices from the wrapped predicate; do not cast the wrapper
+        // itself to VSlotRef.
+        collect_global_indices(expr->get_impl(), global_indices);
+        return;
+    }
+    if (expr->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(expr.get());
+        DORIS_CHECK(slot_ref->column_id() >= 0);
+        global_indices->insert(GlobalIndex(cast_set<size_t>(slot_ref->column_id())));
+    }
+    for (const auto& child : expr->children()) {
+        collect_global_indices(child, global_indices);
+    }
+}
+
+Status build_table_filters_from_conjunct(const VExprContextSPtr& conjunct, RuntimeState* state,
+                                         std::vector<TableFilter>* table_filters) {
+    if (conjunct == nullptr) {
+        return Status::OK();
+    }
+    std::set<GlobalIndex> global_indices;
+    collect_global_indices(conjunct->root(), &global_indices);
+    if (!global_indices.empty()) {
+        TableFilter table_filter;
+        VExprSPtr filter_root;
+        RETURN_IF_ERROR(clone_table_expr_tree(conjunct->root(), &filter_root));
+        table_filter.conjunct = VExprContext::create_shared(std::move(filter_root));
+        for (const auto global_index : global_indices) {
+            table_filter.global_indices.push_back(global_index);
+        }
+        table_filters->push_back(std::move(table_filter));
+    }
+    return Status::OK();
+}
+
+Status parse_deletion_vector(const char* buf, size_t buffer_size, DeleteFileDesc::Format format,
+                             DeleteRows* delete_rows) {
+    DORIS_CHECK(buf != nullptr);
+    DORIS_CHECK(delete_rows != nullptr);
+    DORIS_CHECK(format == DeleteFileDesc::Format::PAIMON ||
+                format == DeleteFileDesc::Format::ICEBERG);
+
+    const size_t checksum_size = format == DeleteFileDesc::Format::ICEBERG ? 4 : 0;
+    if (buffer_size < 8 + checksum_size) [[unlikely]] {
+        return Status::DataQualityError("Deletion vector file size too small: {}", buffer_size);
+    }
+
+    auto total_length = BigEndian::Load32(buf);
+    if (total_length + 4 + checksum_size != buffer_size) [[unlikely]] {
+        return Status::DataQualityError("Deletion vector length mismatch, expected: {}, actual: {}",
+                                        total_length + 4 + checksum_size, buffer_size);
+    }
+
+    const char* bitmap_buf = buf + 8;
+    const size_t bitmap_size = buffer_size - 8 - checksum_size;
+    if (format == DeleteFileDesc::Format::PAIMON) {
+        // Paimon BitmapDeletionVector stores:
+        //   [4-byte big-endian length][4-byte magic 0x5E43F2D0][32-bit roaring bitmap]
+        // The length covers magic + bitmap, and does not include the leading length field.
+        constexpr static char PAIMON_BITMAP_MAGIC[] = {'\x5E', '\x43', '\xF2', '\xD0'};
+        if (memcmp(buf + sizeof(total_length), PAIMON_BITMAP_MAGIC, 4) != 0) [[unlikely]] {
+            return Status::DataQualityError(
+                    "Paimon deletion vector magic number mismatch, expected: {}, actual: {}",
+                    BigEndian::Load32(PAIMON_BITMAP_MAGIC),
+                    BigEndian::Load32(buf + sizeof(total_length)));
+        }
+
+        roaring::Roaring bitmap;
+        try {
+            bitmap = roaring::Roaring::readSafe(bitmap_buf, bitmap_size);
+        } catch (const std::runtime_error& e) {
+            return Status::DataQualityError("Decode roaring bitmap failed, {}", e.what());
+        }
+
+        delete_rows->reserve(bitmap.cardinality());
+        for (auto it = bitmap.begin(); it != bitmap.end(); it++) {
+            delete_rows->push_back(*it);
+        }
+        return Status::OK();
+    }
+
+    constexpr static char ICEBERG_DV_MAGIC[] = {'\xD1', '\xD3', '\x39', '\x64'};
+    if (memcmp(buf + sizeof(total_length), ICEBERG_DV_MAGIC, 4) != 0) [[unlikely]] {
+        return Status::DataQualityError(
+                "Iceberg deletion vector magic number mismatch, expected: {}, actual: {}",
+                BigEndian::Load32(ICEBERG_DV_MAGIC), BigEndian::Load32(buf + sizeof(total_length)));
+    }
+
+    roaring::Roaring64Map bitmap;
+    try {
+        bitmap = roaring::Roaring64Map::readSafe(bitmap_buf, bitmap_size);
+    } catch (const std::runtime_error& e) {
+        return Status::DataQualityError("Decode roaring bitmap failed, {}", e.what());
+    }
+
+    delete_rows->reserve(bitmap.cardinality());
+    for (auto it = bitmap.begin(); it != bitmap.end(); it++) {
+        delete_rows->push_back(cast_set<int64_t>(*it));
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+std::shared_ptr<io::FileSystemProperties> create_system_properties(
+        const TFileScanRangeParams* scan_params) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    if (scan_params == nullptr || !scan_params->__isset.file_type) {
+        system_properties->system_type = TFileType::FILE_LOCAL;
+        return system_properties;
+    }
+    system_properties->system_type = scan_params->file_type;
+    system_properties->properties = scan_params->properties;
+    system_properties->hdfs_params = scan_params->hdfs_params;
+    if (scan_params->__isset.broker_addresses) {
+        system_properties->broker_addresses.assign(scan_params->broker_addresses.begin(),
+                                                   scan_params->broker_addresses.end());
+    }
+    return system_properties;
+}
+
+std::string TableReader::debug_string() const {
+    std::ostringstream out;
+    out << "TableReader{format=" << file_format_to_string(_format)
+        << ", push_down_agg_type=" << push_down_agg_to_string(_push_down_agg_type)
+        << ", aggregate_pushdown_tried=" << _aggregate_pushdown_tried
+        << ", has_current_reader=" << (_data_reader.reader != nullptr)
+        << ", has_current_task=" << (_current_task != nullptr)
+        << ", current_file=" << current_file_debug_string(_current_task)
+        << ", has_delete_rows=" << (_delete_rows != nullptr)
+        << ", delete_row_count=" << (_delete_rows == nullptr ? 0 : _delete_rows->size())
+        << ", has_system_properties=" << (_system_properties != nullptr) << ", system_type="
+        << (_system_properties == nullptr ? static_cast<int>(TFileType::FILE_LOCAL)
+                                          : static_cast<int>(_system_properties->system_type))
+        << ", has_scan_params=" << (_scan_params != nullptr)
+        << ", has_io_ctx=" << (_io_ctx != nullptr)
+        << ", has_runtime_state=" << (_runtime_state != nullptr)
+        << ", has_scanner_profile=" << (_scanner_profile != nullptr)
+        << ", mapper_options=" << _mapper_options.debug_string() << ", projected_columns="
+        << join_table_reader_debug_strings(
+                   _projected_columns,
+                   [](const ColumnDefinition& column) { return column.debug_string(); })
+        << ", partition_values=" << partition_values_debug_string(_partition_values)
+        << ", table_filters="
+        << join_table_reader_debug_strings(
+                   _table_filters,
+                   [](const TableFilter& filter) { return table_filter_debug_string(filter); })
+        << ", table_column_predicates="
+        << table_column_predicates_debug_string(_table_column_predicates)
+        << ", conjunct_count=" << _conjuncts.size() << ", conjuncts="
+        << join_table_reader_debug_strings(_conjuncts,
+                                           [](const VExprContextSPtr& conjunct) {
+                                               return expr_context_debug_string(conjunct);
+                                           })
+        << ", file_schema="
+        << join_table_reader_debug_strings(
+                   _data_reader.file_schema,
+                   [](const ColumnDefinition& field) { return field.debug_string(); })
+        << ", file_block_layout="
+        << join_table_reader_debug_strings(
+                   _data_reader.file_block_layout,
+                   [](const FileBlockColumn& column) {
+                       std::ostringstream column_out;
+                       column_out << "FileBlockColumn{file_column_id=" << column.file_column_id
+                                  << ", name=" << column.name << ", type="
+                                  << (column.type == nullptr ? "null" : column.type->get_name())
+                                  << "}";
+                       return column_out.str();
+                   })
+        << ", block_template_columns=" << _data_reader.block_template.columns()
+        << ", column_mapper="
+        << (_data_reader.column_mapper == nullptr ? "null"
+                                                  : _data_reader.column_mapper->debug_string())
+        << "}";
+    return out.str();
+}
+
+Status TableReader::annotate_projected_column(const TFileScanSlotInfo& slot_info,
+                                              ProjectedColumnBuildContext* context,
+                                              ColumnDefinition* column) const {
+    (void)slot_info;
+    DORIS_CHECK(context != nullptr);
+    DORIS_CHECK(column != nullptr);
+    context->schema_column.reset();
+    const auto* schema_field = find_external_root_field(context->scan_params, *column);
+    if (schema_field == nullptr) {
+        return Status::OK();
+    }
+    context->schema_column = build_schema_column_from_external_field(*schema_field, column->type);
+    column->identifier = context->schema_column->identifier;
+    column->name_mapping = context->schema_column->name_mapping;
+    return Status::OK();
+}
+
+Status TableReader::init(TableReadOptions&& options) {
+    _scan_params = options.scan_params;
+    _format = options.format;
+    _io_ctx = options.io_ctx;
+    _runtime_state = options.runtime_state;
+    _scanner_profile = options.scanner_profile;
+    _file_slot_descs = options.file_slot_descs;
+    _push_down_agg_type = options.push_down_agg_type;
+    _condition_cache_digest = options.condition_cache_digest;
+    _projected_columns = std::move(options.projected_columns);
+    _system_properties = create_system_properties(_scan_params);
+    _mapper_options.mode = TableColumnMappingMode::BY_NAME;
+    _conjuncts = std::move(options.conjuncts);
+    _table_column_predicates = std::move(options.column_predicates);
+
+    if (_scanner_profile != nullptr) {
+        static const char* table_profile = "TableReader";
+        ADD_TIMER_WITH_LEVEL(_scanner_profile, table_profile, 1);
+        _profile.num_delete_files = ADD_CHILD_COUNTER_WITH_LEVEL(_scanner_profile, "NumDeleteFiles",
+                                                                 TUnit::UNIT, table_profile, 1);
+        _profile.num_delete_rows = ADD_CHILD_COUNTER_WITH_LEVEL(_scanner_profile, "NumDeleteRows",
+                                                                TUnit::UNIT, table_profile, 1);
+        _profile.parse_delete_file_time = ADD_CHILD_TIMER_WITH_LEVEL(
+                _scanner_profile, "ParseDeleteFileTime", table_profile, 1);
+        _profile.exec_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "GetBlockTime", table_profile, 1);
+        _profile.prepare_split_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "PrepareSplitTime", table_profile, 1);
+        _profile.finalize_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "FinalizeBlockTime", table_profile, 1);
+        _profile.create_reader_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "CreateReaderTime", table_profile, 1);
+        _profile.pushdown_agg_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "PushDownAggTime", table_profile, 1);
+        _profile.open_reader_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "OpenReaderTime", table_profile, 1);
+    }
+    return Status::OK();
+}
+
+Status TableReader::_build_table_filters_from_conjuncts() {
+    _table_filters.clear();
+    for (const auto& conjunct : _conjuncts) {
+        RETURN_IF_ERROR(
+                build_table_filters_from_conjunct(conjunct, _runtime_state, &_table_filters));
+    }
+    return Status::OK();
+}
+
+Status TableReader::_open_local_filter_exprs(const FileScanRequest& file_request) {
+    RowDescriptor row_desc;
+    for (const auto& conjunct : file_request.conjuncts) {
+        RETURN_IF_ERROR(conjunct->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(conjunct->open(_runtime_state));
+    }
+    for (const auto& delete_conjunct : file_request.delete_conjuncts) {
+        RETURN_IF_ERROR(delete_conjunct->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(delete_conjunct->open(_runtime_state));
+    }
+    return Status::OK();
+}
+
+bool TableReader::_should_enable_condition_cache(const FileScanRequest& file_request) const {
+    if (_condition_cache_digest == 0 || _push_down_agg_type == TPushAggOp::type::COUNT ||
+        _current_file_description == std::nullopt || _data_reader.reader == nullptr) {
+        return false;
+    }
+    // Condition cache is populated by file readers after evaluating file-local row-level
+    // conjuncts. ColumnPredicate-only scans can prune row groups/pages, but they do not produce a
+    // per-row survivor bitmap that can safely populate the cache.
+    if (file_request.conjuncts.empty()) {
+        return false;
+    }
+    // Delete files/deletion vectors are table-format state. They may change independently of the
+    // data file path/mtime/size used by the external cache key, so caching their result can become
+    // stale. Keep delete filtering enabled, but do not read or write condition cache.
+    if (_delete_rows != nullptr || !file_request.delete_conjuncts.empty()) {
+        return false;
+    }
+    // Runtime filters can arrive late and their payload is not guaranteed to be represented by the
+    // scan-local digest. Without a read-only mode, a MISS could insert a bitmap for P AND RF under
+    // the digest for only P. This mirrors the old FileScanner guard.
+    return !contains_runtime_filter(file_request.conjuncts);
+}
+
+Status TableReader::_init_reader_condition_cache(const FileScanRequest& file_request) {
+    _condition_cache = nullptr;
+    _condition_cache_ctx = nullptr;
+    if (!_should_enable_condition_cache(file_request)) {
+        return Status::OK();
+    }
+
+    auto* cache = segment_v2::ConditionCache::instance();
+    if (cache == nullptr) {
+        return Status::OK();
+    }
+    const auto& file = *_current_file_description;
+    _condition_cache_key = segment_v2::ConditionCache::ExternalCacheKey(
+            file.path, file.mtime, file.file_size, _condition_cache_digest, file.range_start_offset,
+            file.range_size);
+
+    segment_v2::ConditionCacheHandle handle;
+    const bool condition_cache_hit = cache->lookup(_condition_cache_key, &handle);
+    if (condition_cache_hit) {
+        _condition_cache = handle.get_filter_result();
+        ++_condition_cache_hit_count;
+    } else {
+        const int64_t total_rows = _data_reader.reader->get_total_rows();
+        if (total_rows <= 0) {
+            return Status::OK();
+        }
+        // Add one guard granule for split ranges that start in the middle of a granule. A guard
+        // false bit beyond the real range never overlaps real rows, but avoids boundary overflow
+        // when a reader marks the last partial granule.
+        const size_t num_granules = (total_rows + ConditionCacheContext::GRANULE_SIZE - 1) /
+                                    ConditionCacheContext::GRANULE_SIZE;
+        _condition_cache = std::make_shared<std::vector<bool>>(num_granules + 1, false);
+    }
+
+    if (_condition_cache != nullptr) {
+        _condition_cache_ctx = std::make_shared<ConditionCacheContext>();
+        _condition_cache_ctx->is_hit = condition_cache_hit;
+        _condition_cache_ctx->filter_result = _condition_cache;
+        _data_reader.reader->set_condition_cache_context(_condition_cache_ctx);
+    }
+    return Status::OK();
+}
+
+void TableReader::_finalize_reader_condition_cache() {
+    if (_condition_cache_ctx == nullptr || _condition_cache_ctx->is_hit) {
+        _condition_cache = nullptr;
+        _condition_cache_ctx = nullptr;
+        return;
+    }
+    // LIMIT or scanner cancellation may close a reader before all selected row ranges are visited.
+    // Unvisited granules remain false in a MISS bitmap, so inserting a partial bitmap would make a
+    // later HIT skip valid rows. Only publish cache entries after the physical reader reaches EOF.
+    if (!_current_reader_reached_eof) {
+        _condition_cache = nullptr;
+        _condition_cache_ctx = nullptr;
+        return;
+    }
+    segment_v2::ConditionCache::instance()->insert(_condition_cache_key,
+                                                   std::move(_condition_cache));
+    _condition_cache = nullptr;
+    _condition_cache_ctx = nullptr;
+}
+
+Status TableReader::create_next_reader(bool* eos) {
+    SCOPED_TIMER(_profile.create_reader_timer);
+    DCHECK(_data_reader.reader == nullptr);
+    if (_current_task == nullptr) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    RETURN_IF_ERROR(create_file_reader(&_data_reader.reader));
+    DORIS_CHECK(_data_reader.reader != nullptr);
+    RETURN_IF_ERROR(_data_reader.reader->init(_runtime_state));
+    RETURN_IF_ERROR(open_reader());
+    if (_data_reader.reader == nullptr) {
+        *eos = _current_task == nullptr;
+        return Status::OK();
+    }
+    *eos = false;
+    return Status::OK();
+}
+
+Status TableReader::create_file_reader(std::unique_ptr<FileReader>* reader) {
+    DORIS_CHECK(reader != nullptr);
+    if (_format == FileFormat::PARQUET) {
+        const bool enable_mapping_timestamp_tz =
+                _scan_params != nullptr && _scan_params->__isset.enable_mapping_timestamp_tz &&
+                _scan_params->enable_mapping_timestamp_tz;
+        *reader = std::make_unique<format::parquet::ParquetReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+                _global_rowid_context, enable_mapping_timestamp_tz);
+        return Status::OK();
+    }
+    if (_format == FileFormat::CSV) {
+        if (_file_slot_descs == nullptr) {
+            return Status::InvalidArgument("CSV reader requires file slot descriptors");
+        }
+        // CSV has no embedded schema. TableReader owns table-level mapping, while CsvReader needs
+        // only the physical file slots plus scan text parameters to build a file-local schema.
+        // Non-file columns such as partitions/defaults/virtual row ids are intentionally excluded
+        // from `_file_slot_descs` and are materialized during finalize_chunk().
+        *reader = std::make_unique<format::csv::CsvReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+                _scan_params, *_file_slot_descs, _current_range_compress_type,
+                _current_range_load_id);
+        return Status::OK();
+    }
+    if (_format == FileFormat::TEXT) {
+        if (_file_slot_descs == nullptr) {
+            return Status::InvalidArgument("Text reader requires file slot descriptors");
+        }
+        // Text files have no embedded schema. As with CSV, TableReader handles table-level mapping
+        // and only passes physical file slots to the v2 TextReader.
+        *reader = std::make_unique<format::text::TextReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+                _scan_params, *_file_slot_descs, _current_range_compress_type,
+                _current_range_load_id);
+        return Status::OK();
+    }
+    if (_format == FileFormat::JSON) {
+        if (_file_slot_descs == nullptr) {
+            return Status::InvalidArgument("JSON reader requires file slot descriptors");
+        }
+        *reader = std::make_unique<format::json::JsonReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+                _scan_params, _current_file_range_desc, *_file_slot_descs,
+                _current_range_compress_type, _current_range_load_id);
+        return Status::OK();
+    }
+    return Status::NotSupported("TableReader does not support file format {}",
+                                file_format_to_string(_format));
+}
+
+std::unique_ptr<io::FileDescription> create_file_description(const TFileRangeDesc& range) {
+    auto file_description = std::make_unique<io::FileDescription>();
+    file_description->path = range.path;
+    file_description->file_size = range.__isset.file_size ? range.file_size : -1;
+    file_description->mtime = range.__isset.modification_time ? range.modification_time : 0;
+    file_description->range_start_offset = range.__isset.start_offset ? range.start_offset : 0;
+    file_description->range_size = range.__isset.size ? range.size : -1;
+    if (range.__isset.fs_name) {
+        file_description->fs_name = range.fs_name;
+    }
+    if (range.__isset.file_cache_admission) {
+        file_description->file_cache_admission = range.file_cache_admission;
+    }
+    return file_description;
+}
+
+Status TableReader::prepare_split(const SplitReadOptions& options) {
+    SCOPED_TIMER(_profile.prepare_split_timer);
+    _partition_values = std::move(options.partition_values);
+    _current_task = std::make_unique<ScanTask>();
+    _current_task->data_file = create_file_description(options.current_range);
+    _current_file_description = *_current_task->data_file;
+    _current_file_range_desc = options.current_range;
+    _current_range_compress_type = options.current_range.__isset.compress_type
+                                           ? options.current_range.compress_type
+                                           : TFileCompressType::UNKNOWN;
+    _current_range_load_id = options.current_range.__isset.load_id
+                                     ? std::make_optional(options.current_range.load_id)
+                                     : std::nullopt;
+    _global_rowid_context = options.global_rowid_context;
+    _delete_rows = nullptr;
+    _aggregate_pushdown_tried = false;
+    _remaining_table_level_count = -1;
+    _current_reader_reached_eof = false;
+    if (_push_down_agg_type == TPushAggOp::type::COUNT &&
+        options.current_range.__isset.table_format_params &&
+        options.current_range.table_format_params.__isset.table_level_row_count) {
+        DORIS_CHECK(options.current_range.table_format_params.table_level_row_count >= -1);
+        _remaining_table_level_count =
+                options.current_range.table_format_params.table_level_row_count;
+    }
+    if (_is_table_level_count_active()) {
+        return Status::OK();
+    }
+    return _parse_delete_predicates(options);
+}
+
+Status TableReader::_parse_delete_predicates(const SplitReadOptions& options) {
+    DeleteFileDesc desc {.fs_name = options.current_range.fs_name};
+    bool has_delete_file = false;
+    RETURN_IF_ERROR(_parse_deletion_vector_file(options.current_range.table_format_params, &desc,
+                                                &has_delete_file));
+    if (has_delete_file) {
+        DORIS_CHECK(options.cache != nullptr);
+        Status create_status = Status::OK();
+
+        _delete_rows = options.cache->get<DeleteRows>(desc.key, [&]() -> DeleteRows* {
+            auto* delete_rows = new DeleteRows;
+
+            DeletionVectorReader dv_reader(_runtime_state, _scanner_profile, *_scan_params, desc,
+                                           _io_ctx.get());
+            create_status = dv_reader.open();
+            if (!create_status.ok()) [[unlikely]] {
+                return nullptr;
+            }
+
+            size_t bytes_read = desc.size;
+            std::vector<char> buffer(bytes_read);
+            create_status = dv_reader.read_at(desc.start_offset, {buffer.data(), bytes_read});
+            if (!create_status.ok()) [[unlikely]] {
+                return nullptr;
+            }
+
+            const char* buf = buffer.data();
+            SCOPED_TIMER(_profile.parse_delete_file_time);
+            create_status = parse_deletion_vector(buf, bytes_read, desc.format, delete_rows);
+            if (!create_status.ok()) [[unlikely]] {
+                return nullptr;
+            }
+            COUNTER_UPDATE(_profile.num_delete_rows, delete_rows->size());
+            return delete_rows;
+        });
+        RETURN_IF_ERROR(create_status);
+    }
+
+    return Status::OK();
+}
+} // namespace doris::format
diff --git a/be/src/format_v2/table_reader.h b/be/src/format_v2/table_reader.h
new file mode 100644
index 00000000000000..a94cae621c9546
--- /dev/null
+++ b/be/src/format_v2/table_reader.h
@@ -0,0 +1,1535 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <bvar/status.h>
+
+#include <algorithm>
+#include <exception>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "common/cast_set.h"
+#include "common/exception.h"
+#include "common/logging.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_array.h"
+#include "core/column/column_const.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/field.h"
+#include "exec/common/stringop_substring.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vexpr_fwd.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/column_data.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/expr/cast.h"
+#include "format_v2/expr/delete_predicate.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/schema_projection.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/descriptors.h"
+#include "storage/segment/condition_cache.h"
+
+namespace doris {
+class Block;
+class ColumnPredicate;
+struct DeleteFileDesc;
+class RuntimeState;
+} // namespace doris
+
+namespace doris::format {
+
+using DeleteRows = std::vector<int64_t>;
+
+// Row-level predicates on table/global schema. They are rewritten to file-local expressions when
+// possible, and remain the source of row-level filtering after localization.
+struct TableFilter {
+    VExprContextSPtr conjunct;
+    std::vector<GlobalIndex> global_indices;
+};
+
+struct ScanTask {
+    virtual ~ScanTask() = default;
+
+    std::unique_ptr<io::FileDescription> data_file;
+};
+
+struct ProjectedColumnBuildContext {
+    const TFileScanRangeParams* scan_params = nullptr;
+    const TFileRangeDesc* range = nullptr;
+    RuntimeState* runtime_state = nullptr;
+    std::optional<ColumnDefinition> schema_column = std::nullopt;
+    size_t next_file_column_idx = 0;
+};
+
+struct ReadProfile {
+    RuntimeProfile::Counter* num_delete_files = nullptr;
+    RuntimeProfile::Counter* num_delete_rows = nullptr;
+    RuntimeProfile::Counter* parse_delete_file_time = nullptr;
+    RuntimeProfile::Counter* exec_timer = nullptr;
+    RuntimeProfile::Counter* prepare_split_timer = nullptr;
+    RuntimeProfile::Counter* finalize_timer = nullptr;
+    RuntimeProfile::Counter* create_reader_timer = nullptr;
+    RuntimeProfile::Counter* pushdown_agg_timer = nullptr;
+    RuntimeProfile::Counter* open_reader_timer = nullptr;
+};
+
+struct TableReadOptions {
+    // Columns need to be read from file and output by table reader. They are all in table/global
+    // schema semantics.
+    const std::vector<ColumnDefinition> projected_columns;
+    // Simple predicates for a single column, which is parsed on scan operator.
+    const TableColumnPredicates column_predicates;
+    // All complex conjuncts from scan operator
+    const VExprContextSPtrs conjuncts;
+    // File format of the underlying data files, needed for reader initialization and reader-level
+    // filter pushdown.
+    const FileFormat format;
+    TFileScanRangeParams* scan_params;
+    std::shared_ptr<io::IOContext> io_ctx;
+    RuntimeState* runtime_state;
+    RuntimeProfile* scanner_profile;
+    // File formats without self-describing metadata, such as CSV, need the original FE slot
+    // descriptors to build their file-local schema and deserialize values. Self-describing formats
+    // ignore this field and use metadata parsed from the file footer/header.
+    const std::vector<SlotDescriptor*>* file_slot_descs = nullptr;
+    // Push-down aggregate type.
+    const TPushAggOp::type push_down_agg_type = TPushAggOp::type::NONE;
+    // Digest of stable pushed-down predicates. A zero digest disables condition cache.
+    uint64_t condition_cache_digest = 0;
+};
+
+struct SplitReadOptions {
+    // Split-level information for reader initialization, which may include file path, partition values, delete file info, etc. The content is table format specific and opaque to table reader base class; it's the responsibility of the concrete table reader implementation to parse necessary information for reader initialization and filter pushdown.
+    std::map<std::string, Field> partition_values;
+    ShardedKVCache* cache;
+    TFileRangeDesc current_range;
+    std::optional<GlobalRowIdContext> global_rowid_context;
+};
+
+// Base class for table-level readers.
+// This layer owns common table-level orchestration, such as split iteration, dynamic partition
+// pruning, delete handling and conversion from file-local blocks to table-schema blocks. Concrete
+// table-format readers only need to provide format-specific hooks for opening readers and parsing
+// split metadata.
+class TableReader {
+public:
+    virtual ~TableReader() = default;
+
+    // Initialize common runtime options for the table reader. Subclasses may call this from their
+    // own init(options); table-format schema and split metadata are provided later per split.
+    virtual Status init(TableReadOptions&& options);
+
+    // Prepare for reading a new split/task.
+    // 1. Pass a new split/task to reader, which will be used in subsequent open_reader() to initialize the underlying file reader.
+    // 2. Parse delete predicates from split/task information, which will be used for later dynamic filtering and delete handling.
+    virtual Status prepare_split(const SplitReadOptions& options);
+
+    // Public entry point for reading a table-schema block. The base class opens the current reader,
+    // advances across EOF, and closes exhausted readers. Subclasses provide protected hooks for
+    // table-format-specific behavior.
+    virtual Status get_block(Block* block, bool* eos) {
+        SCOPED_TIMER(_profile.exec_timer);
+        DORIS_CHECK(block->columns() == _projected_columns.size());
+        block->clear_column_data(_projected_columns.size());
+
+        while (true) {
+            if (*eos) {
+                return Status::OK();
+            }
+            if (!_data_reader.reader) {
+                if (_is_table_level_count_active()) {
+                    RETURN_IF_ERROR(_read_table_level_count(block, eos));
+                    return Status::OK();
+                }
+                RETURN_IF_ERROR(create_next_reader(eos));
+                if (!_data_reader.reader) {
+                    DCHECK(*eos);
+                    return Status::OK();
+                }
+            }
+
+            // Materialize a reduced row set for upper aggregate operators when aggregate
+            // pushdown can be applied. This is not the final aggregate result: COUNT emits
+            // `count` default rows for the upper COUNT(*), and MIN/MAX emits two rows containing
+            // file-level min/max values for the upper MIN/MAX.
+            if (!_aggregate_pushdown_tried) {
+                SCOPED_TIMER(_profile.pushdown_agg_timer);
+                bool pushed_down = false;
+                RETURN_IF_ERROR(_try_materialize_aggregate_pushdown_rows(block, &pushed_down));
+                if (pushed_down) {
+                    return Status::OK();
+                }
+            }
+
+            bool current_eof = false;
+            _data_reader.block_template.clear_column_data(
+                    cast_set<int64_t>(_data_reader.file_block_layout.size()));
+            size_t current_rows = 0;
+            RETURN_IF_ERROR(_data_reader.reader->get_block(&_data_reader.block_template,
+                                                           &current_rows, &current_eof));
+            if (current_rows == 0) {
+                if (current_eof) {
+                    _current_reader_reached_eof = true;
+                    RETURN_IF_ERROR(close_current_reader());
+                }
+                continue;
+            }
+            DCHECK_EQ(_data_reader.block_template.columns(), _data_reader.file_block_layout.size())
+                    << _data_reader.block_template.dump_structure();
+#ifndef NDEBUG
+            RETURN_IF_ERROR(_check_file_block_columns("after file reader get_block", current_rows));
+#endif
+            DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size());
+            RETURN_IF_ERROR(finalize_chunk(block, current_rows));
+#ifndef NDEBUG
+            RETURN_IF_ERROR(
+                    _check_table_block_columns("after finalize_chunk", block, current_rows));
+#endif
+            if (current_eof) {
+                _current_reader_reached_eof = true;
+                RETURN_IF_ERROR(close_current_reader());
+            }
+            return Status::OK();
+        }
+    }
+
+    // Close the table reader and the currently active file reader. Subclasses that hold additional
+    // table-format resources should override this and call TableReader::close() first.
+    virtual Status close() {
+        if (_data_reader.reader) {
+            RETURN_IF_ERROR(close_current_reader());
+        }
+        _current_task.reset();
+        _current_file_description.reset();
+        _remaining_table_level_count = -1;
+        return Status::OK();
+    }
+
+    int64_t condition_cache_hit_count() const { return _condition_cache_hit_count; }
+
+    virtual std::string debug_string() const;
+
+    virtual Status annotate_projected_column(const TFileScanSlotInfo& slot_info,
+                                             ProjectedColumnBuildContext* context,
+                                             ColumnDefinition* column) const;
+
+    virtual Status validate_projected_columns(const ProjectedColumnBuildContext& context) const {
+        (void)context;
+        return Status::OK();
+    }
+
+protected:
+    // Parse deletion vector information from table format specific file description.
+    virtual Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc,
+                                               DeleteFileDesc* desc, bool* has_delete_file) {
+        *has_delete_file = false;
+        return Status::OK();
+    }
+
+    // Advance to the next reader. This closes the current reader first and then opens the next
+    // concrete reader. Subclasses should not duplicate this loop.
+    Status create_next_reader(bool* eos);
+    virtual Status create_file_reader(std::unique_ptr<FileReader>* reader);
+    virtual TableColumnMappingMode mapping_mode() const { return TableColumnMappingMode::BY_NAME; }
+    virtual Status annotate_file_schema(std::vector<ColumnDefinition>* file_schema) {
+        DORIS_CHECK(file_schema != nullptr);
+        return Status::OK();
+    }
+
+    // Open the concrete reader for the current split/task and build the file-local scan request.
+    virtual Status open_reader() {
+        SCOPED_TIMER(_profile.open_reader_timer);
+        // 1. Get file schema and create column mapping.
+        std::vector<ColumnDefinition> file_schema;
+        RETURN_IF_ERROR(_data_reader.reader->get_schema(&file_schema));
+        // For Paimon/Hudi, FE can provide field ids through `history_schema_info`. Annotate the
+        // file schema before column mapping when the table format maps columns by field id.
+        RETURN_IF_ERROR(annotate_file_schema(&file_schema));
+        _data_reader.file_schema = file_schema;
+        _mapper_options.mode = mapping_mode();
+
+        _data_reader.column_mapper = _data_reader.reader->create_column_mapper(_mapper_options);
+        DORIS_CHECK(_data_reader.column_mapper != nullptr);
+        RETURN_IF_ERROR(_data_reader.column_mapper->create_mapping(_projected_columns,
+                                                                   _partition_values, file_schema));
+        DORIS_CHECK(_data_reader.column_mapper->mappings().size() == _projected_columns.size());
+
+        // 2. Build table filters based on conjuncts and column predicates.
+        RETURN_IF_ERROR(_build_table_filters_from_conjuncts());
+
+        // 3. Create file scan request based on column mapping and table filters, then open file
+        // reader with the request. File scan request carries row-level expression filters and
+        // file-level pruning hints. Only expression filters decide returned rows; column predicates
+        // are pruning hints.
+        auto file_request = std::make_shared<FileScanRequest>();
+        RETURN_IF_ERROR(_data_reader.column_mapper->create_scan_request(
+                _table_filters, _table_column_predicates, _projected_columns, file_request.get(),
+                _runtime_state));
+        bool constant_filter_pruned_split = false;
+        RETURN_IF_ERROR(_evaluate_constant_filters(&constant_filter_pruned_split));
+        if (constant_filter_pruned_split) {
+            RETURN_IF_ERROR(close_current_reader());
+            return Status::OK();
+        }
+        RETURN_IF_ERROR(customize_file_scan_request(file_request.get()));
+        RETURN_IF_ERROR(_open_local_filter_exprs(*file_request));
+        _data_reader.file_block_layout.clear();
+        _data_reader.block_template.clear();
+        _data_reader.file_block_layout.resize(file_request->local_positions.size());
+
+        // 4. Build file block layout from file schema and column mapping. The layout describes
+        // the block returned by file reader before table-column materialization.
+        for (const auto& [file_column_id, block_position] : file_request->local_positions) {
+            DORIS_CHECK(block_position.value() < _data_reader.file_block_layout.size());
+            const auto* field = _find_column_definition(_data_reader.file_schema, file_column_id);
+            DORIS_CHECK(field != nullptr);
+
+            ColumnDefinition projected_field;
+            {
+                auto it = std::find_if(
+                        file_request->non_predicate_columns.begin(),
+                        file_request->non_predicate_columns.end(),
+                        [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; });
+                if (it != file_request->non_predicate_columns.end()) {
+                    RETURN_IF_ERROR(project_column_definition(*field, *it, &projected_field));
+                }
+            }
+            {
+                auto it = std::find_if(
+                        file_request->predicate_columns.begin(),
+                        file_request->predicate_columns.end(),
+                        [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; });
+                if (it != file_request->predicate_columns.end()) {
+                    RETURN_IF_ERROR(project_column_definition(*field, *it, &projected_field));
+                }
+            }
+            _data_reader.file_block_layout[block_position.value()] = {
+                    .file_column_id = file_column_id,
+                    .name = projected_field.name,
+                    .type = projected_field.type,
+            };
+            DORIS_CHECK(_data_reader.file_block_layout[block_position.value()].type != nullptr);
+        }
+
+        // 5. Prepare block template from file block layout. The block template stores the block
+        // returned by file reader before table-column materialization.
+        _data_reader.block_template.reserve(_data_reader.file_block_layout.size());
+        for (const auto& column : _data_reader.file_block_layout) {
+            _data_reader.block_template.insert(
+                    {column.type->create_column(), column.type, column.name});
+        }
+        if (VLOG_DEBUG_IS_ON) {
+            VLOG_DEBUG << "TableReader debug: " << debug_string();
+        }
+        RETURN_IF_ERROR(_open_mapping_exprs());
+        RETURN_IF_ERROR(_data_reader.reader->open(file_request));
+        RETURN_IF_ERROR(_init_reader_condition_cache(*file_request));
+        return Status::OK();
+    }
+
+    Status _build_table_filters_from_conjuncts();
+    Status _open_local_filter_exprs(const FileScanRequest& file_request);
+    Status _init_reader_condition_cache(const FileScanRequest& file_request);
+    void _finalize_reader_condition_cache();
+    bool _should_enable_condition_cache(const FileScanRequest& file_request) const;
+
+    Status _evaluate_constant_filters(bool* can_filter_all) {
+        DORIS_CHECK(can_filter_all != nullptr);
+        *can_filter_all = false;
+        for (const auto& table_filter : _table_filters) {
+            if (table_filter.conjunct == nullptr ||
+                // RuntimeFilterExpr does not implement execute_column_impl(); it is evaluated by
+                // the row-level filter path through execute_filter(). Constant split pruning uses
+                // VExprContext::execute() on a one-row synthetic block, so runtime filters must not
+                // be pre-executed here even when their referenced slot maps to a constant value.
+                table_filter.conjunct->root()->is_rf_wrapper() ||
+                !_table_filter_has_only_constant_entries(table_filter)) {
+                continue;
+            }
+            Block eval_block;
+            RETURN_IF_ERROR(_build_constant_filter_block(table_filter, &eval_block));
+            RowDescriptor row_desc;
+            RETURN_IF_ERROR(table_filter.conjunct->prepare(_runtime_state, row_desc));
+            RETURN_IF_ERROR(table_filter.conjunct->open(_runtime_state));
+            int result_column_id = -1;
+            RETURN_IF_ERROR(table_filter.conjunct->execute(&eval_block, &result_column_id));
+            DORIS_CHECK(result_column_id >= 0);
+            if (_filter_result_filters_all(eval_block.get_by_position(result_column_id).column)) {
+                *can_filter_all = true;
+                return Status::OK();
+            }
+        }
+        return Status::OK();
+    }
+
+    bool _table_filter_has_only_constant_entries(const TableFilter& table_filter) const {
+        const auto& filter_entries = _data_reader.column_mapper->filter_entries();
+        for (const auto global_index : table_filter.global_indices) {
+            const auto entry_it = filter_entries.find(global_index);
+            if (entry_it == filter_entries.end() || !entry_it->second.is_constant()) {
+                return false;
+            }
+        }
+        return !table_filter.global_indices.empty();
+    }
+
+    Status _build_constant_filter_block(const TableFilter& table_filter, Block* eval_block) {
+        DORIS_CHECK(eval_block != nullptr);
+        eval_block->clear();
+        const auto& mappings = _data_reader.column_mapper->mappings();
+        const auto& filter_entries = _data_reader.column_mapper->filter_entries();
+        DORIS_CHECK(mappings.size() == _projected_columns.size());
+        for (size_t column_idx = 0; column_idx < mappings.size(); ++column_idx) {
+            const auto global_index = GlobalIndex(column_idx);
+            const auto& mapping = mappings[column_idx];
+            const auto entry_it = filter_entries.find(global_index);
+            const bool referenced_by_filter =
+                    std::find(table_filter.global_indices.begin(),
+                              table_filter.global_indices.end(),
+                              global_index) != table_filter.global_indices.end();
+            if (referenced_by_filter && entry_it != filter_entries.end() &&
+                entry_it->second.is_constant()) {
+                ColumnPtr constant_column;
+                RETURN_IF_ERROR(_materialize_constant_filter_column(
+                        entry_it->second.constant_index(), &constant_column));
+                eval_block->insert({std::move(constant_column), mapping.table_type,
+                                    mapping.table_column_name});
+            } else {
+                eval_block->insert({mapping.table_type->create_column_const_with_default_value(1),
+                                    mapping.table_type, mapping.table_column_name});
+            }
+        }
+        return Status::OK();
+    }
+
+    Status _materialize_constant_filter_column(ConstantIndex constant_index, ColumnPtr* column) {
+        DORIS_CHECK(column != nullptr);
+        const auto& constant_entry = _data_reader.column_mapper->constant_map().get(constant_index);
+        DORIS_CHECK(constant_entry.expr != nullptr);
+        DORIS_CHECK(constant_entry.type != nullptr);
+        RowDescriptor row_desc;
+        RETURN_IF_ERROR(constant_entry.expr->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(constant_entry.expr->open(_runtime_state));
+        Block eval_block;
+        eval_block.insert({constant_entry.type->create_column_const_with_default_value(1),
+                           constant_entry.type, "__table_reader_constant_filter"});
+        int result_column_id = -1;
+        RETURN_IF_ERROR(constant_entry.expr->execute(&eval_block, &result_column_id));
+        DORIS_CHECK(result_column_id >= 0);
+        *column = eval_block.get_by_position(result_column_id).column;
+        DORIS_CHECK((*column)->size() == 1);
+        return Status::OK();
+    }
+
+    static bool _filter_result_filters_all(const ColumnPtr& filter_column) {
+        DORIS_CHECK(filter_column.get() != nullptr);
+        DORIS_CHECK(filter_column->size() == 1);
+        return !filter_column->get_bool(0);
+    }
+
+    virtual Status customize_file_scan_request(FileScanRequest* file_request) {
+        return _append_delete_predicate(file_request);
+    }
+
+    bool _is_table_level_count_active() const { return _remaining_table_level_count >= 0; }
+
+    Status _materialize_count_rows(size_t rows, Block* block) const {
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(block->columns() > 0 || rows == 0);
+        for (size_t column_idx = 0; column_idx < block->columns(); ++column_idx) {
+            auto column = block->get_by_position(column_idx).type->create_column();
+            column->resize(rows);
+            block->replace_by_position(column_idx, std::move(column));
+        }
+        return Status::OK();
+    }
+
+    Status _read_table_level_count(Block* block, bool* eos) {
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(eos != nullptr);
+        DORIS_CHECK(_push_down_agg_type == TPushAggOp::type::COUNT);
+        DORIS_CHECK(_remaining_table_level_count >= 0);
+        if (_remaining_table_level_count == 0) {
+            _remaining_table_level_count = -1;
+            _current_task.reset();
+            *eos = true;
+            return Status::OK();
+        }
+
+        const int64_t batch_size = _runtime_state == nullptr
+                                           ? _remaining_table_level_count
+                                           : static_cast<int64_t>(_runtime_state->batch_size());
+        const auto rows = std::min(_remaining_table_level_count, batch_size);
+        RETURN_IF_ERROR(_materialize_count_rows(cast_set<size_t>(rows), block));
+        _remaining_table_level_count -= rows;
+        *eos = false;
+        return Status::OK();
+    }
+
+    void _append_file_scan_column(FileScanRequest* request, LocalColumnId column_id,
+                                  std::vector<LocalColumnIndex>* scan_columns) {
+        DORIS_CHECK(request != nullptr);
+        DORIS_CHECK(scan_columns != nullptr);
+        FileScanRequestBuilder builder(request);
+        Status status;
+        if (scan_columns == &request->predicate_columns) {
+            status = builder.add_predicate_column(column_id);
+        } else {
+            DORIS_CHECK(scan_columns == &request->non_predicate_columns);
+            status = builder.add_non_predicate_column(column_id);
+        }
+        DORIS_CHECK(status.ok()) << status.to_string();
+        if (column_id == LocalColumnId(ROW_POSITION_COLUMN_ID) &&
+            _find_column_definition(_data_reader.file_schema, column_id) == nullptr) {
+            _data_reader.file_schema.push_back(row_position_column_definition());
+        }
+    }
+
+    // Append DeletePredicate to file scan request if there are deletes. The predicate will be evaluated in file reader level and filter out deleted rows before returning data to table reader.
+    Status _append_delete_predicate(FileScanRequest* request) {
+        DORIS_CHECK(request != nullptr);
+        if (_delete_rows == nullptr || _delete_rows->empty()) {
+            return Status::OK();
+        }
+        const auto row_position_column_id = LocalColumnId(ROW_POSITION_COLUMN_ID);
+        _append_file_scan_column(request, row_position_column_id, &request->predicate_columns);
+
+        auto delete_predicate = std::make_shared<DeletePredicate>(*_delete_rows);
+        const auto block_position = request->local_positions.at(row_position_column_id);
+        delete_predicate->add_child(VSlotRef::create_shared(
+                cast_set<int>(block_position.value()), cast_set<int>(block_position.value()), -1,
+                std::make_shared<DataTypeInt64>(), ROW_POSITION_COLUMN_NAME));
+
+        request->delete_conjuncts.push_back(
+                VExprContext::create_shared(std::move(delete_predicate)));
+        return Status::OK();
+    }
+
+    // Close the current concrete reader. This hook is called by both create_next_reader() and
+    // close(), so it should remain idempotent.
+    virtual Status close_current_reader() {
+        _finalize_reader_condition_cache();
+        RETURN_IF_ERROR(_data_reader.reader->close());
+        _data_reader.reader.reset();
+        if (_data_reader.column_mapper != nullptr) {
+            _data_reader.column_mapper->clear();
+            _data_reader.column_mapper.reset();
+        }
+        _table_filters.clear();
+        _data_reader.file_schema.clear();
+        _data_reader.file_block_layout.clear();
+        _data_reader.block_template.clear();
+        _current_task.reset();
+        _current_file_description.reset();
+        _current_reader_reached_eof = false;
+        return Status::OK();
+    }
+
+    // Finalize file-local block to table/global schema block.
+    Status finalize_chunk(Block* block, const size_t rows) {
+        SCOPED_TIMER(_profile.finalize_timer);
+        size_t idx = 0;
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            ColumnPtr column;
+            RETURN_IF_ERROR(_materialize_mapping_column(mapping, &_data_reader.block_template, rows,
+                                                        &column));
+            block->replace_by_position(idx, IColumn::mutate(std::move(column)));
+            idx++;
+        }
+        RETURN_IF_ERROR(materialize_virtual_columns(block));
+        // Enforce CHAR/VARCHAR length declared by the table schema after all file-to-table
+        // materialization has finished.
+        RETURN_IF_ERROR(_truncate_char_or_varchar_columns(block));
+        return Status::OK();
+    }
+
+    // Materialize virtual columns in the table block, such as Iceberg _row_id and
+    // _last_updated_sequence_number. This runs after normal column materialization so finalize
+    // expressions can reference those virtual columns.
+    virtual Status materialize_virtual_columns(Block* table_block) { return Status::OK(); }
+
+#ifndef NDEBUG
+    Status _check_file_block_columns(std::string_view stage, size_t rows) {
+        DORIS_CHECK(_data_reader.block_template.columns() == _data_reader.file_block_layout.size());
+        for (size_t idx = 0; idx < _data_reader.block_template.columns(); ++idx) {
+            const auto& file_block_column = _data_reader.file_block_layout[idx];
+            const auto& column_with_type = _data_reader.block_template.get_by_position(idx);
+            const auto* column = column_with_type.column.get();
+            try {
+                if (column == nullptr) {
+                    auto st = Status::InternalError(
+                            "Invalid file block column {} at {}: file_column_id={}, name='{}', "
+                            "type={}, column=null, expected_rows={}, reader={}",
+                            idx, stage, file_block_column.file_column_id.value(),
+                            file_block_column.name,
+                            file_block_column.type == nullptr ? "null"
+                                                              : file_block_column.type->get_name(),
+                            rows, debug_string());
+                    LOG(WARNING) << st;
+                    return st;
+                }
+                column->sanity_check();
+                auto st = column_with_type.check_type_and_column_match();
+                if (!st.ok()) {
+                    auto contextual_status = Status::InternalError(
+                            "Invalid file block column {} at {}: file_column_id={}, name='{}', "
+                            "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                            "reader={}",
+                            idx, stage, file_block_column.file_column_id.value(),
+                            file_block_column.name,
+                            file_block_column.type == nullptr ? "null"
+                                                              : file_block_column.type->get_name(),
+                            column->get_name(), column->size(), rows, st.to_string(),
+                            debug_string());
+                    LOG(WARNING) << contextual_status;
+                    return contextual_status;
+                }
+            } catch (const Exception& e) {
+                auto st = Status::InternalError(
+                        "Invalid file block column {} at {}: file_column_id={}, name='{}', "
+                        "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                        "reader={}",
+                        idx, stage, file_block_column.file_column_id.value(),
+                        file_block_column.name,
+                        file_block_column.type == nullptr ? "null"
+                                                          : file_block_column.type->get_name(),
+                        column == nullptr ? "null" : column->get_name(),
+                        column == nullptr ? 0 : column->size(), rows, e.to_string(),
+                        debug_string());
+                LOG(WARNING) << st;
+                return st;
+            } catch (const std::exception& e) {
+                auto st = Status::InternalError(
+                        "Invalid file block column {} at {}: file_column_id={}, name='{}', "
+                        "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                        "reader={}",
+                        idx, stage, file_block_column.file_column_id.value(),
+                        file_block_column.name,
+                        file_block_column.type == nullptr ? "null"
+                                                          : file_block_column.type->get_name(),
+                        column == nullptr ? "null" : column->get_name(),
+                        column == nullptr ? 0 : column->size(), rows, e.what(), debug_string());
+                LOG(WARNING) << st;
+                return st;
+            }
+        }
+        return Status::OK();
+    }
+
+    Status _check_table_block_columns(std::string_view stage, const Block* block, size_t rows) {
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size());
+        for (size_t idx = 0; idx < block->columns(); ++idx) {
+            const auto& mapping = _data_reader.column_mapper->mappings()[idx];
+            const auto& column_with_type = block->get_by_position(idx);
+            const auto* column = column_with_type.column.get();
+            try {
+                if (column == nullptr) {
+                    auto st = Status::InternalError(
+                            "Invalid table block column {} at {}: table_column='{}', "
+                            "global_index={}, type={}, column=null, expected_rows={}, mapping={}",
+                            idx, stage, mapping.table_column_name, mapping.global_index.value(),
+                            mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(),
+                            rows, mapping.debug_string());
+                    LOG(WARNING) << st;
+                    return st;
+                }
+                column->sanity_check();
+                auto st = column_with_type.check_type_and_column_match();
+                if (!st.ok()) {
+                    auto contextual_status = Status::InternalError(
+                            "Invalid table block column {} at {}: table_column='{}', "
+                            "global_index={}, type={}, column={}, column_size={}, "
+                            "expected_rows={}, error={}, mapping={}",
+                            idx, stage, mapping.table_column_name, mapping.global_index.value(),
+                            mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(),
+                            column->get_name(), column->size(), rows, st.to_string(),
+                            mapping.debug_string());
+                    LOG(WARNING) << contextual_status;
+                    return contextual_status;
+                }
+            } catch (const Exception& e) {
+                auto st = Status::InternalError(
+                        "Invalid table block column {} at {}: table_column='{}', global_index={}, "
+                        "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                        "mapping={}",
+                        idx, stage, mapping.table_column_name, mapping.global_index.value(),
+                        mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(),
+                        column == nullptr ? "null" : column->get_name(),
+                        column == nullptr ? 0 : column->size(), rows, e.to_string(),
+                        mapping.debug_string());
+                LOG(WARNING) << st;
+                return st;
+            } catch (const std::exception& e) {
+                auto st = Status::InternalError(
+                        "Invalid table block column {} at {}: table_column='{}', global_index={}, "
+                        "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                        "mapping={}",
+                        idx, stage, mapping.table_column_name, mapping.global_index.value(),
+                        mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(),
+                        column == nullptr ? "null" : column->get_name(),
+                        column == nullptr ? 0 : column->size(), rows, e.what(),
+                        mapping.debug_string());
+                LOG(WARNING) << st;
+                return st;
+            }
+        }
+        return Status::OK();
+    }
+#endif
+
+    Status _truncate_char_or_varchar_columns(Block* block) {
+        DORIS_CHECK(block != nullptr);
+        if (_runtime_state == nullptr ||
+            !_runtime_state->query_options().truncate_char_or_varchar_columns) {
+            return Status::OK();
+        }
+        DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size());
+        for (size_t idx = 0; idx < _data_reader.column_mapper->mappings().size(); ++idx) {
+            const auto& mapping = _data_reader.column_mapper->mappings()[idx];
+            if (!_should_truncate_char_or_varchar_column(mapping)) {
+                continue;
+            }
+            const auto target_len =
+                    assert_cast<const DataTypeString*>(remove_nullable(mapping.table_type).get())
+                            ->len();
+            _truncate_char_or_varchar_column(block, idx, target_len);
+        }
+        return Status::OK();
+    }
+
+    // Return true when the table schema has a bounded CHAR/VARCHAR length that is stricter than
+    // the file-side type. Examples:
+    // - table VARCHAR(10), file VARCHAR(20): truncate to 10;
+    // - table VARCHAR(10), file STRING: truncate to 10 because STRING has no declared bound;
+    // - table STRING, any file type: no truncation because the target has no bound.
+    static bool _should_truncate_char_or_varchar_column(const ColumnMapping& mapping) {
+        if (mapping.table_type == nullptr) {
+            return false;
+        }
+        const auto table_type = remove_nullable(mapping.table_type);
+        const auto primitive_type = table_type->get_primitive_type();
+        if (primitive_type != TYPE_VARCHAR && primitive_type != TYPE_CHAR) {
+            return false;
+        }
+        const auto target_len = assert_cast<const DataTypeString*>(table_type.get())->len();
+        if (target_len <= 0) {
+            return false;
+        }
+        if (mapping.file_type == nullptr) {
+            return true;
+        }
+        const auto file_type = remove_nullable(mapping.file_type);
+        DORIS_CHECK(file_type != nullptr);
+        int file_len = -1;
+        if (file_type->get_primitive_type() == TYPE_VARCHAR ||
+            file_type->get_primitive_type() == TYPE_CHAR ||
+            file_type->get_primitive_type() == TYPE_STRING) {
+            file_len = assert_cast<const DataTypeString*>(file_type.get())->len();
+        }
+
+        return file_len < 0 || target_len < file_len;
+    }
+
+    // Truncate a materialized CHAR/VARCHAR column in place by reusing the vectorized substring
+    // implementation: substring(column, 1, len). Nullable columns are unwrapped before substring
+    // execution and wrapped back with the original null map afterward, because substring operates
+    // on the nested string payload only.
+    static void _truncate_char_or_varchar_column(Block* block, size_t idx, int len) {
+        DORIS_CHECK(block != nullptr);
+        auto int_type = std::make_shared<DataTypeInt32>();
+        const auto num_columns_without_result = cast_set<uint32_t>(block->columns());
+        auto& target = block->get_by_position(idx);
+        const bool is_nullable = target.type->is_nullable();
+        ColumnPtr input_column = target.column;
+        ColumnPtr null_map_column;
+        if (is_nullable) {
+            const auto* nullable_column = assert_cast<const ColumnNullable*>(target.column.get());
+            input_column = nullable_column->get_nested_column_ptr();
+            null_map_column = nullable_column->get_null_map_column_ptr();
+        }
+        block->replace_by_position(idx, std::move(input_column));
+        block->insert({int_type->create_column_const(block->rows(), to_field<TYPE_INT>(1)),
+                       int_type, "const 1"});
+        block->insert({int_type->create_column_const(block->rows(), to_field<TYPE_INT>(len)),
+                       int_type, "const len"});
+        block->insert({nullptr, std::make_shared<DataTypeString>(), "result"});
+
+        ColumnNumbers temp_arguments(3);
+        temp_arguments[0] = cast_set<uint32_t>(idx);
+        temp_arguments[1] = num_columns_without_result;
+        temp_arguments[2] = num_columns_without_result + 1;
+        const uint32_t result_column_id = num_columns_without_result + 2;
+        SubstringUtil::substring_execute(*block, temp_arguments, result_column_id, block->rows());
+
+        ColumnPtr result_column = block->get_by_position(result_column_id).column;
+        if (is_nullable) {
+            result_column = ColumnNullable::create(std::move(result_column), null_map_column);
+        }
+        block->replace_by_position(idx, std::move(result_column));
+        block->erase_tail(num_columns_without_result);
+    }
+
+    Status _try_materialize_aggregate_pushdown_rows(Block* block, bool* pushed_down) {
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(pushed_down != nullptr);
+        *pushed_down = false;
+        block->clear_column_data(_projected_columns.size());
+        _aggregate_pushdown_tried = true;
+        if (!_supports_aggregate_pushdown(_push_down_agg_type)) {
+            return Status::OK();
+        }
+
+        FileAggregateRequest file_request;
+        RETURN_IF_ERROR(_build_file_aggregate_request(_push_down_agg_type, &file_request));
+        FileAggregateResult file_result;
+        const auto status = _data_reader.reader->get_aggregate_result(file_request, &file_result);
+        if (status.is<ErrorCode::NOT_IMPLEMENTED_ERROR>()) {
+            return Status::OK();
+        }
+        RETURN_IF_ERROR(status);
+        RETURN_IF_ERROR(
+                _materialize_aggregate_pushdown_rows(_push_down_agg_type, file_result, block));
+        *pushed_down = true;
+        RETURN_IF_ERROR(close_current_reader());
+        return Status::OK();
+    }
+
+    virtual bool _supports_aggregate_pushdown(TPushAggOp::type agg_type) const {
+        // Only COUNT and MIN/MAX can be push down.
+        if (agg_type != TPushAggOp::type::COUNT && agg_type != TPushAggOp::type::MINMAX) {
+            return false;
+        }
+        // Only support aggregate pushdown when there is no delete, filter and column predicate, so
+        // the reduced rows consumed by the upper aggregate remain semantically equivalent to a
+        // normal scan.
+        if (_delete_rows != nullptr && !_delete_rows->empty()) {
+            return false;
+        }
+        if (!_table_filters.empty() || !_table_column_predicates.empty()) {
+            return false;
+        }
+        if (agg_type == TPushAggOp::type::COUNT) {
+            return true;
+        }
+        // For MIN/MAX, only support direct file-to-table column mappings. The two emitted rows
+        // must be enough for the upper MIN/MAX aggregate without evaluating default expressions or
+        // virtual columns.
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            if (!mapping.file_local_id.has_value() ||
+                mapping.virtual_column_type != TableVirtualColumnType::INVALID ||
+                mapping.default_expr != nullptr || mapping.file_type == nullptr ||
+                mapping.table_type == nullptr) {
+                return false;
+            }
+            if (!_can_push_down_minmax_for_mapping(mapping)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    static ColumnPtr _detach_column(ColumnPtr column) {
+        DORIS_CHECK(column.get() != nullptr);
+        return IColumn::mutate(std::move(column));
+    }
+
+    static Status _align_column_nullability(ColumnPtr* column, const DataTypePtr& table_type) {
+        DORIS_CHECK(column != nullptr);
+        DORIS_CHECK(column->get() != nullptr);
+        DORIS_CHECK(table_type != nullptr);
+        // Must return non-const column
+        *column = (*column)->convert_to_full_column_if_const();
+        if (table_type->is_nullable()) {
+            const auto& nested_type =
+                    assert_cast<const DataTypeNullable&>(*table_type).get_nested_type();
+            if (!(*column)->is_nullable()) {
+                RETURN_IF_ERROR(_align_column_nullability(column, nested_type));
+                *column = make_nullable(*column);
+                return Status::OK();
+            }
+            const auto& nullable_column = assert_cast<const ColumnNullable&>(**column);
+            ColumnPtr nested_column = nullable_column.get_nested_column_ptr();
+            RETURN_IF_ERROR(_align_column_nullability(&nested_column, nested_type));
+            *column = ColumnNullable::create(nested_column,
+                                             nullable_column.get_null_map_column_ptr());
+            return Status::OK();
+        }
+        if ((*column)->is_nullable()) {
+            const auto& nullable_column = assert_cast<const ColumnNullable&>(**column);
+            if (nullable_column.has_null()) {
+                return Status::InternalError(
+                        "Default expression produced NULL for non-nullable table column");
+            }
+            ColumnPtr nested_column = nullable_column.get_nested_column_ptr();
+            RETURN_IF_ERROR(_align_column_nullability(&nested_column, table_type));
+            *column = nested_column;
+            return Status::OK();
+        }
+        if (const auto* array_type = typeid_cast<const DataTypeArray*>(table_type.get())) {
+            const auto& array_column = assert_cast<const ColumnArray&>(**column);
+            ColumnPtr nested_column = array_column.get_data_ptr();
+            RETURN_IF_ERROR(
+                    _align_column_nullability(&nested_column, array_type->get_nested_type()));
+            *column = ColumnArray::create(nested_column, array_column.get_offsets_ptr());
+            return Status::OK();
+        }
+        if (const auto* map_type = typeid_cast<const DataTypeMap*>(table_type.get())) {
+            const auto& map_column = assert_cast<const ColumnMap&>(**column);
+            ColumnPtr key_column = map_column.get_keys_ptr();
+            ColumnPtr value_column = map_column.get_values_ptr();
+            RETURN_IF_ERROR(_align_column_nullability(&key_column, map_type->get_key_type()));
+            RETURN_IF_ERROR(_align_column_nullability(&value_column, map_type->get_value_type()));
+            *column = ColumnMap::create(key_column, value_column, map_column.get_offsets_ptr());
+            return Status::OK();
+        }
+        if (const auto* struct_type = typeid_cast<const DataTypeStruct*>(table_type.get())) {
+            const auto& struct_column = assert_cast<const ColumnStruct&>(**column);
+            Columns columns = struct_column.get_columns_copy();
+            DORIS_CHECK(columns.size() == struct_type->get_elements().size());
+            for (size_t i = 0; i < columns.size(); ++i) {
+                RETURN_IF_ERROR(
+                        _align_column_nullability(&columns[i], struct_type->get_element(i)));
+            }
+            *column = ColumnStruct::create(columns);
+            return Status::OK();
+        }
+        return Status::OK();
+    }
+
+    static Status _execute_default_expr_without_root_type_check(
+            const VExprContextSPtr& default_expr, const Block* block,
+            ColumnWithTypeAndName* result_data) {
+        DORIS_CHECK(default_expr != nullptr);
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(result_data != nullptr);
+        ColumnPtr result_column;
+        Status st;
+        RETURN_IF_CATCH_EXCEPTION({
+            st = default_expr->root()->execute_column_impl(default_expr.get(), block, nullptr,
+                                                           block->rows(), result_column);
+        });
+        RETURN_IF_ERROR(st);
+        DORIS_CHECK(result_column.get() != nullptr);
+        if (result_column->size() != block->rows()) {
+            return Status::InternalError(
+                    "Default expr {} return column size {} not equal to expected size {}",
+                    default_expr->expr_name(), result_column->size(), block->rows());
+        }
+        result_data->column = result_column;
+        result_data->type = default_expr->execute_type(block);
+        result_data->name = default_expr->expr_name();
+        return Status::OK();
+    }
+
+    Status _cast_column_to_type(ColumnPtr* column, const DataTypePtr& file_type,
+                                const DataTypePtr& table_type,
+                                const std::string& column_name) const {
+        DORIS_CHECK(column != nullptr);
+        DORIS_CHECK(column->get() != nullptr);
+        DORIS_CHECK(file_type != nullptr);
+        DORIS_CHECK(table_type != nullptr);
+        if (file_type->equals(*table_type)) {
+            return Status::OK();
+        }
+
+        DataTypePtr input_type = file_type;
+        if ((*column)->is_nullable() && !input_type->is_nullable()) {
+            input_type = make_nullable(input_type);
+        }
+        Block cast_block;
+        cast_block.insert({*column, input_type, column_name});
+        auto slot_ref = VSlotRef::create_shared(0, 0, -1, input_type, column_name);
+        auto cast_expr = Cast::create_shared(table_type);
+        cast_expr->add_child(std::move(slot_ref));
+        auto cast_ctx = VExprContext::create_shared(std::move(cast_expr));
+        RowDescriptor row_desc;
+        RETURN_IF_ERROR(cast_ctx->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(cast_ctx->open(_runtime_state));
+        ColumnPtr cast_column;
+        RETURN_IF_ERROR(cast_ctx->execute(&cast_block, cast_column));
+        *column = std::move(cast_column);
+        return Status::OK();
+    }
+
+    Status _materialize_present_child_mapping_column(const ColumnMapping& mapping,
+                                                     const ColumnPtr& file_column,
+                                                     const size_t rows, ColumnPtr* column) {
+        DORIS_CHECK(column != nullptr);
+        DORIS_CHECK(mapping.file_type != nullptr);
+        DORIS_CHECK(mapping.table_type != nullptr);
+        *column = file_column;
+        if (!mapping.is_trivial) {
+            if (!mapping.child_mappings.empty()) {
+                RETURN_IF_ERROR(
+                        _materialize_complex_mapping_column(mapping, *column, rows, column));
+            } else {
+                RETURN_IF_ERROR(_cast_column_to_type(column, mapping.file_type, mapping.table_type,
+                                                     mapping.file_column_name));
+            }
+        }
+        RETURN_IF_ERROR(_align_column_nullability(column, mapping.table_type));
+        return Status::OK();
+    }
+
+    Status _materialize_mapping_column(const ColumnMapping& mapping, Block* current_block,
+                                       const size_t rows, ColumnPtr* column) {
+        if (!mapping.is_trivial && mapping.file_local_id.has_value() &&
+            !mapping.child_mappings.empty()) {
+            DCHECK(mapping.projection != nullptr);
+            int res_id;
+            auto st = mapping.projection->execute(current_block, &res_id);
+            if (!st.ok()) {
+                return Status::InternalError(
+                        "Failed to execute complex mapping projection for table column '{}' "
+                        "(global_index={}, file_local_id={}, rows={}): {}, mapping={}",
+                        mapping.table_column_name, mapping.global_index.value(),
+                        *mapping.file_local_id, rows, st.to_string(), mapping.debug_string());
+            }
+            ColumnPtr result_column = current_block->get_by_position(res_id).column;
+            RETURN_IF_ERROR(
+                    _materialize_complex_mapping_column(mapping, result_column, rows, column));
+            return Status::OK();
+        }
+        if (mapping.projection != nullptr) {
+            int res_id;
+            auto st = mapping.projection->execute(current_block, &res_id);
+            if (!st.ok()) {
+                std::string file_local_id = "null";
+                if (mapping.file_local_id.has_value()) {
+                    file_local_id = std::to_string(*mapping.file_local_id);
+                }
+                return Status::InternalError(
+                        "Failed to execute mapping projection for table column '{}' "
+                        "(global_index={}, file_local_id={}, rows={}): {}, mapping={}",
+                        mapping.table_column_name, mapping.global_index.value(), file_local_id,
+                        rows, st.to_string(), mapping.debug_string());
+            }
+            ColumnPtr result_column = current_block->get_by_position(res_id).column;
+            *column = _detach_column(std::move(result_column));
+            return Status::OK();
+        }
+        if (mapping.default_expr != nullptr) {
+            if (current_block->rows() == rows) {
+                ColumnWithTypeAndName result;
+                RETURN_IF_ERROR(_execute_default_expr_without_root_type_check(
+                        mapping.default_expr, current_block, &result));
+                ColumnPtr result_column = result.column;
+                RETURN_IF_ERROR(_align_column_nullability(&result_column, mapping.table_type));
+                *column = _detach_column(std::move(result_column));
+            } else {
+                DORIS_CHECK(mapping.constant_index.has_value());
+                Block eval_block;
+                eval_block.insert({mapping.table_type->create_column_const_with_default_value(rows),
+                                   mapping.table_type, "__table_reader_const_rows"});
+                ColumnWithTypeAndName result;
+                RETURN_IF_ERROR(_execute_default_expr_without_root_type_check(
+                        mapping.default_expr, &eval_block, &result));
+                ColumnPtr result_column = result.column;
+                RETURN_IF_ERROR(_align_column_nullability(&result_column, mapping.table_type));
+                *column = _detach_column(std::move(result_column));
+            }
+            return Status::OK();
+        }
+        ColumnPtr result_column = mapping.table_type->create_column_const_with_default_value(rows);
+        *column = _detach_column(std::move(result_column));
+        return Status::OK();
+    }
+
+    Status _materialize_complex_mapping_column(const ColumnMapping& mapping,
+                                               const ColumnPtr& file_column, const size_t rows,
+                                               ColumnPtr* column) {
+        DORIS_CHECK(mapping.table_type != nullptr);
+        DORIS_CHECK(file_column.get() != nullptr);
+        const auto table_type = remove_nullable(mapping.table_type);
+        switch (table_type->get_primitive_type()) {
+        case TYPE_STRUCT:
+            RETURN_IF_ERROR(_materialize_struct_mapping_column(mapping, file_column, rows, column));
+            break;
+        case TYPE_ARRAY:
+            RETURN_IF_ERROR(_materialize_array_mapping_column(mapping, file_column, rows, column));
+            break;
+        case TYPE_MAP:
+            RETURN_IF_ERROR(_materialize_map_mapping_column(mapping, file_column, rows, column));
+            break;
+        default:
+            *column = _detach_column(file_column);
+            break;
+        }
+        return Status::OK();
+    }
+
+    static std::vector<const ColumnMapping*> _present_child_mappings_in_file_order(
+            const std::vector<ColumnMapping>& child_mappings) {
+        std::vector<const ColumnMapping*> result;
+        result.reserve(child_mappings.size());
+        for (const auto& child_mapping : child_mappings) {
+            if (child_mapping.file_local_id.has_value()) {
+                result.push_back(&child_mapping);
+            }
+        }
+        std::ranges::sort(result, [](const ColumnMapping* lhs, const ColumnMapping* rhs) {
+            DORIS_CHECK(lhs->file_local_id.has_value());
+            DORIS_CHECK(rhs->file_local_id.has_value());
+            return *lhs->file_local_id < *rhs->file_local_id;
+        });
+        return result;
+    }
+
+    static size_t _file_child_ordinal_for_mapping(
+            const ColumnMapping& mapping, const ColumnMapping& child_mapping,
+            const std::vector<const ColumnMapping*>& file_ordered_children) {
+        DORIS_CHECK(child_mapping.file_local_id.has_value());
+        if (!mapping.projected_file_children.empty()) {
+            const auto child_it = std::ranges::find_if(
+                    mapping.projected_file_children, [&](const ColumnDefinition& file_child) {
+                        return file_child.file_local_id() == *child_mapping.file_local_id;
+                    });
+            DORIS_CHECK(child_it != mapping.projected_file_children.end());
+            return static_cast<size_t>(
+                    std::distance(mapping.projected_file_children.begin(), child_it));
+        }
+        const auto child_it = std::ranges::find(file_ordered_children, &child_mapping);
+        DORIS_CHECK(child_it != file_ordered_children.end());
+        return static_cast<size_t>(std::distance(file_ordered_children.begin(), child_it));
+    }
+
+    static std::vector<const ColumnMapping*> _child_mappings_in_table_type_order(
+            const ColumnMapping& mapping, const DataTypeStruct& table_type) {
+        std::vector<const ColumnMapping*> result;
+        result.reserve(mapping.child_mappings.size());
+        for (size_t child_idx = 0; child_idx < table_type.get_elements().size(); ++child_idx) {
+            const auto& child_name = table_type.get_element_name(child_idx);
+            const auto child_it = std::ranges::find_if(
+                    mapping.child_mappings, [&](const ColumnMapping& child_mapping) {
+                        return child_mapping.table_column_name == child_name;
+                    });
+            DORIS_CHECK(child_it != mapping.child_mappings.end())
+                    << mapping.debug_string() << ", table_child_name=" << child_name;
+            result.push_back(&*child_it);
+        }
+        return result;
+    }
+
+    static const IColumn* _nested_column_if_nullable(const ColumnPtr& column,
+                                                     const NullMap** null_map) {
+        DORIS_CHECK(column.get() != nullptr);
+        if (const auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+            if (null_map != nullptr) {
+                *null_map = &nullable_column->get_null_map_data();
+            }
+            return &nullable_column->get_nested_column();
+        }
+        return column.get();
+    }
+
+    Status _materialize_struct_mapping_column(const ColumnMapping& mapping,
+                                              const ColumnPtr& file_column, const size_t rows,
+                                              ColumnPtr* column) {
+        DORIS_CHECK(mapping.table_type != nullptr);
+        const auto* table_type =
+                assert_cast<const DataTypeStruct*>(remove_nullable(mapping.table_type).get());
+        const auto full_file_column = file_column->convert_to_full_column_if_const();
+        const NullMap* parent_null_map = nullptr;
+        const auto* nested_file_column =
+                _nested_column_if_nullable(full_file_column, &parent_null_map);
+        const auto* file_struct = assert_cast<const ColumnStruct*>(nested_file_column);
+        DORIS_CHECK(table_type->get_elements().size() == mapping.child_mappings.size());
+
+        Columns child_columns;
+        child_columns.reserve(mapping.child_mappings.size());
+        const auto file_ordered_children =
+                _present_child_mappings_in_file_order(mapping.child_mappings);
+        const auto table_ordered_children =
+                _child_mappings_in_table_type_order(mapping, *table_type);
+        for (const auto* child_mapping : table_ordered_children) {
+            DORIS_CHECK(child_mapping != nullptr);
+            if (!child_mapping->file_local_id.has_value()) {
+                child_columns.push_back(
+                        child_mapping->table_type->create_column_const_with_default_value(rows)
+                                ->convert_to_full_column_if_const());
+                continue;
+            }
+            const auto file_child_idx =
+                    _file_child_ordinal_for_mapping(mapping, *child_mapping, file_ordered_children);
+            DORIS_CHECK(file_child_idx < file_struct->get_columns().size());
+            ColumnPtr child_column = file_struct->get_column_ptr(file_child_idx);
+            RETURN_IF_ERROR(_materialize_present_child_mapping_column(*child_mapping, child_column,
+                                                                      rows, &child_column));
+            child_columns.push_back(std::move(child_column));
+        }
+        MutableColumns mutable_child_columns;
+        mutable_child_columns.reserve(child_columns.size());
+        for (auto& child_column : child_columns) {
+            mutable_child_columns.push_back(IColumn::mutate(std::move(child_column)));
+        }
+        auto result = ColumnStruct::create(std::move(mutable_child_columns));
+        if (mapping.table_type->is_nullable()) {
+            auto null_map = ColumnUInt8::create();
+            auto& null_map_data = null_map->get_data();
+            null_map_data.resize(rows);
+            if (parent_null_map != nullptr) {
+                DORIS_CHECK(parent_null_map->size() == rows);
+                null_map_data.assign(parent_null_map->begin(), parent_null_map->end());
+            } else {
+                std::fill(null_map_data.begin(), null_map_data.end(), 0);
+            }
+            *column = ColumnNullable::create(std::move(result), std::move(null_map));
+        } else {
+            *column = std::move(result);
+        }
+        return Status::OK();
+    }
+
+    Status _materialize_array_mapping_column(const ColumnMapping& mapping,
+                                             const ColumnPtr& file_column, const size_t rows,
+                                             ColumnPtr* column) {
+        DORIS_CHECK(mapping.child_mappings.size() == 1);
+        const auto full_file_column = file_column->convert_to_full_column_if_const();
+        const NullMap* parent_null_map = nullptr;
+        const auto* nested_file_column =
+                _nested_column_if_nullable(full_file_column, &parent_null_map);
+        const auto* file_array = assert_cast<const ColumnArray*>(nested_file_column);
+        ColumnPtr nested_column = file_array->get_data_ptr();
+        const auto& element_mapping = mapping.child_mappings[0];
+        RETURN_IF_ERROR(_materialize_present_child_mapping_column(
+                element_mapping, nested_column, nested_column->size(), &nested_column));
+        auto offsets_column = file_array->get_offsets_ptr()->convert_to_full_column_if_const();
+        auto result = ColumnArray::create(IColumn::mutate(std::move(nested_column)),
+                                          IColumn::mutate(std::move(offsets_column)));
+        if (mapping.table_type->is_nullable()) {
+            auto null_map = ColumnUInt8::create();
+            auto& null_map_data = null_map->get_data();
+            null_map_data.resize(rows);
+            if (parent_null_map != nullptr) {
+                DORIS_CHECK(parent_null_map->size() == rows);
+                null_map_data.assign(parent_null_map->begin(), parent_null_map->end());
+            } else {
+                std::fill(null_map_data.begin(), null_map_data.end(), 0);
+            }
+            *column = ColumnNullable::create(std::move(result), std::move(null_map));
+        } else {
+            *column = std::move(result);
+        }
+        return Status::OK();
+    }
+
+    Status _materialize_map_mapping_column(const ColumnMapping& mapping,
+                                           const ColumnPtr& file_column, const size_t rows,
+                                           ColumnPtr* column) {
+        const auto full_file_column = file_column->convert_to_full_column_if_const();
+        const NullMap* parent_null_map = nullptr;
+        const auto* nested_file_column =
+                _nested_column_if_nullable(full_file_column, &parent_null_map);
+        const auto* file_map = assert_cast<const ColumnMap*>(nested_file_column);
+        ColumnPtr key_column = file_map->get_keys_ptr();
+        ColumnPtr value_column = file_map->get_values_ptr();
+
+        const ColumnMapping* key_mapping = nullptr;
+        const ColumnMapping* value_mapping = nullptr;
+        for (const auto& child_mapping : mapping.child_mappings) {
+            if (!child_mapping.file_local_id.has_value()) {
+                continue;
+            }
+            if (*child_mapping.file_local_id == 0) {
+                key_mapping = &child_mapping;
+            } else if (*child_mapping.file_local_id == 1) {
+                value_mapping = &child_mapping;
+            }
+        }
+
+        if (key_mapping != nullptr) {
+            RETURN_IF_ERROR(_materialize_present_child_mapping_column(
+                    *key_mapping, key_column, key_column->size(), &key_column));
+        }
+        if (value_mapping != nullptr) {
+            RETURN_IF_ERROR(_materialize_present_child_mapping_column(
+                    *value_mapping, value_column, value_column->size(), &value_column));
+        }
+        auto offsets_column = file_map->get_offsets_ptr()->convert_to_full_column_if_const();
+        auto result = ColumnMap::create(IColumn::mutate(std::move(key_column)),
+                                        IColumn::mutate(std::move(value_column)),
+                                        IColumn::mutate(std::move(offsets_column)));
+        if (mapping.table_type->is_nullable()) {
+            auto null_map = ColumnUInt8::create();
+            auto& null_map_data = null_map->get_data();
+            null_map_data.resize(rows);
+            if (parent_null_map != nullptr) {
+                DORIS_CHECK(parent_null_map->size() == rows);
+                null_map_data.assign(parent_null_map->begin(), parent_null_map->end());
+            } else {
+                std::fill(null_map_data.begin(), null_map_data.end(), 0);
+            }
+            *column = ColumnNullable::create(std::move(result), std::move(null_map));
+        } else {
+            *column = std::move(result);
+        }
+        return Status::OK();
+    }
+
+    Status _open_mapping_exprs() {
+        RowDescriptor row_desc;
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            if (mapping.projection != nullptr) {
+                RETURN_IF_ERROR(mapping.projection->prepare(_runtime_state, row_desc));
+                RETURN_IF_ERROR(mapping.projection->open(_runtime_state));
+            }
+            if (mapping.default_expr != nullptr) {
+                RETURN_IF_ERROR(mapping.default_expr->prepare(_runtime_state, row_desc));
+                RETURN_IF_ERROR(mapping.default_expr->open(_runtime_state));
+            }
+        }
+        return Status::OK();
+    }
+
+    Status _build_file_aggregate_request(TPushAggOp::type agg_type,
+                                         FileAggregateRequest* request) const {
+        DORIS_CHECK(request != nullptr);
+        DORIS_CHECK(_supports_aggregate_pushdown(agg_type));
+        request->agg_type = agg_type;
+        request->columns.clear();
+        if (agg_type == TPushAggOp::type::COUNT) {
+            return Status::OK();
+        }
+        request->columns.reserve(_data_reader.column_mapper->mappings().size());
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            DORIS_CHECK(mapping.file_local_id.has_value());
+            FileAggregateRequest::Column column;
+            column.projection = LocalColumnIndex::top_level(LocalColumnId(*mapping.file_local_id));
+            if (!mapping.child_mappings.empty()) {
+                RETURN_IF_ERROR(build_aggregate_projection(mapping, &column.projection));
+            }
+            request->columns.push_back(std::move(column));
+        }
+        return Status::OK();
+    }
+
+    Status _materialize_aggregate_pushdown_rows(TPushAggOp::type agg_type,
+                                                const FileAggregateResult& file_result,
+                                                Block* block) {
+        if (agg_type == TPushAggOp::type::COUNT) {
+            // COUNT pushdown is not a final count value. It emits `count` default rows so the
+            // upper COUNT(*) aggregate can count them and produce the final result, including
+            // zero rows when count is 0.
+            DORIS_CHECK(file_result.count >= 0);
+            return _materialize_count_rows(cast_set<size_t>(file_result.count), block);
+        }
+        // MIN/MAX pushdown emits two rows, min first and max second, for each projected column.
+        // The upper MIN/MAX aggregate consumes those two rows to produce the final aggregate value.
+        DORIS_CHECK(file_result.columns.size() == _data_reader.column_mapper->mappings().size());
+        DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size());
+        Block file_block;
+        file_block.reserve(_data_reader.file_block_layout.size());
+        for (const auto& column : _data_reader.file_block_layout) {
+            file_block.insert({column.type->create_column(), column.type, column.name});
+        }
+        for (size_t column_idx = 0; column_idx < file_result.columns.size(); ++column_idx) {
+            const auto& result_column = file_result.columns[column_idx];
+            if (!result_column.has_min || !result_column.has_max) {
+                return Status::NotSupported("Missing min/max aggregate result for column {}",
+                                            _projected_columns[column_idx].name);
+            }
+            bool found_file_column = false;
+            for (size_t block_position = 0; block_position < _data_reader.file_block_layout.size();
+                 ++block_position) {
+                if (_data_reader.file_block_layout[block_position].file_column_id ==
+                    file_result.columns[column_idx].projection.column_id()) {
+                    found_file_column = true;
+                    auto column = file_block.get_by_position(block_position)
+                                          .type->create_column()
+                                          ->assert_mutable();
+                    RETURN_IF_ERROR(_insert_aggregate_projection_value(
+                            file_result.columns[column_idx].projection, result_column.min_value,
+                            column.get()));
+                    RETURN_IF_ERROR(_insert_aggregate_projection_value(
+                            file_result.columns[column_idx].projection, result_column.max_value,
+                            column.get()));
+                    file_block.replace_by_position(block_position, std::move(column));
+                    break;
+                }
+            }
+            DORIS_CHECK(found_file_column);
+        }
+        for (size_t column_idx = 0; column_idx < _data_reader.column_mapper->mappings().size();
+             ++column_idx) {
+            ColumnPtr table_column;
+            RETURN_IF_ERROR(
+                    _materialize_mapping_column(_data_reader.column_mapper->mappings()[column_idx],
+                                                &file_block, 2, &table_column));
+            block->replace_by_position(column_idx, std::move(table_column));
+        }
+        return Status::OK();
+    }
+
+    struct FileBlockColumn {
+        LocalColumnId file_column_id = LocalColumnId::invalid();
+        std::string name;
+        DataTypePtr type;
+    };
+
+    struct DataReader {
+        std::unique_ptr<FileReader> reader;
+        std::unique_ptr<TableColumnMapper> column_mapper;
+        // Schema of the data file, also including virtual column (row position).
+        std::vector<ColumnDefinition> file_schema;
+        // Layout of the block returned by file reader, determined by column mapping and file
+        // schema. It is used for file reader to materialize columns into correct type and position.
+        std::vector<FileBlockColumn> file_block_layout;
+        Block block_template;
+    };
+    DataReader _data_reader;
+    std::vector<ColumnDefinition> _projected_columns;
+    std::unique_ptr<ScanTask> _current_task;
+    std::optional<io::FileDescription> _current_file_description;
+    // Range-level compression has higher priority than scan-param compression. TVF/load can keep
+    // the logical format as CSV/TEXT while carrying the concrete compression such as GZ or LZO on
+    // each TFileRangeDesc, matching the old FileScanner reader contract.
+    TFileCompressType::type _current_range_compress_type = TFileCompressType::UNKNOWN;
+    std::optional<TUniqueId> _current_range_load_id;
+    TFileRangeDesc _current_file_range_desc;
+    std::shared_ptr<io::FileSystemProperties> _system_properties;
+    // partition key -> value
+    std::map<std::string, Field> _partition_values;
+    // Predicates built from scan conjuncts before file-level localization.
+    std::vector<TableFilter> _table_filters;
+    TableColumnPredicates _table_column_predicates;
+    VExprContextSPtrs _conjuncts;
+    ReadProfile _profile;
+    // Parsed from row-position based delete files, including position delete and deletion vector.
+    DeleteRows* _delete_rows = nullptr;
+    TFileScanRangeParams* _scan_params;
+    std::shared_ptr<io::IOContext> _io_ctx;
+    RuntimeState* _runtime_state;
+    RuntimeProfile* _scanner_profile;
+    const std::vector<SlotDescriptor*>* _file_slot_descs = nullptr;
+    FileFormat _format;
+    TPushAggOp::type _push_down_agg_type = TPushAggOp::type::NONE;
+    uint64_t _condition_cache_digest = 0;
+    segment_v2::ConditionCache::ExternalCacheKey _condition_cache_key;
+    std::shared_ptr<std::vector<bool>> _condition_cache;
+    std::shared_ptr<ConditionCacheContext> _condition_cache_ctx;
+    int64_t _condition_cache_hit_count = 0;
+    bool _current_reader_reached_eof = false;
+    int64_t _remaining_table_level_count = -1;
+    std::optional<GlobalRowIdContext> _global_rowid_context;
+    bool _aggregate_pushdown_tried = false;
+    TableColumnMapperOptions _mapper_options;
+
+private:
+    static const ColumnDefinition* _find_column_definition(
+            const std::vector<ColumnDefinition>& schema, LocalColumnId column_id) {
+        for (const auto& field : schema) {
+            if (field.file_local_id() == column_id.value()) {
+                return &field;
+            }
+        }
+        return nullptr;
+    }
+
+    static bool _can_push_down_minmax_for_mapping(const ColumnMapping& mapping) {
+        if (mapping.child_mappings.empty()) {
+            return true;
+        }
+        const auto primitive_type = remove_nullable(mapping.file_type)->get_primitive_type();
+        if (primitive_type != TYPE_STRUCT) {
+            return false;
+        }
+        size_t mapped_children = 0;
+        const ColumnMapping* mapped_child = nullptr;
+        for (const auto& child_mapping : mapping.child_mappings) {
+            if (!child_mapping.file_local_id.has_value()) {
+                continue;
+            }
+            ++mapped_children;
+            mapped_child = &child_mapping;
+        }
+        return mapped_children == 1 && mapped_child != nullptr &&
+               _can_push_down_minmax_for_mapping(*mapped_child);
+    }
+
+    static Status build_aggregate_projection(const ColumnMapping& mapping,
+                                             LocalColumnIndex* projection) {
+        DORIS_CHECK(projection != nullptr);
+        DORIS_CHECK(mapping.file_local_id.has_value());
+        *projection = LocalColumnIndex::local(*mapping.file_local_id);
+        projection->children.clear();
+        projection->project_all_children = true;
+        if (mapping.child_mappings.empty()) {
+            return Status::OK();
+        }
+        projection->project_all_children = false;
+        for (const auto& child_mapping : mapping.child_mappings) {
+            if (!child_mapping.file_local_id.has_value()) {
+                continue;
+            }
+            LocalColumnIndex child_projection;
+            RETURN_IF_ERROR(build_aggregate_projection(child_mapping, &child_projection));
+            projection->children.push_back(std::move(child_projection));
+        }
+        DORIS_CHECK(projection->children.size() == 1);
+        return Status::OK();
+    }
+
+    static Status _insert_aggregate_projection_value(const LocalColumnIndex& projection,
+                                                     const Field& value, IColumn* column) {
+        DORIS_CHECK(column != nullptr);
+        if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+            RETURN_IF_ERROR(_insert_aggregate_projection_value(
+                    projection, value, &nullable_column->get_nested_column()));
+            nullable_column->get_null_map_data().push_back(0);
+            return Status::OK();
+        }
+        if (projection.project_all_children || projection.children.empty()) {
+            column->insert(value);
+            return Status::OK();
+        }
+        auto* struct_column = assert_cast<ColumnStruct*>(column);
+        DORIS_CHECK(projection.children.size() == 1);
+        const auto& child_projection = projection.children[0];
+        DORIS_CHECK(struct_column->get_columns().size() == 1);
+        RETURN_IF_ERROR(_insert_aggregate_projection_value(child_projection, value,
+                                                           &struct_column->get_column(0)));
+        return Status::OK();
+    }
+
+    // Parse row-position deletes from table format specific parameters, and fill in _delete_rows.
+    Status _parse_delete_predicates(const SplitReadOptions& options);
+};
+
+} // namespace doris::format
diff --git a/be/src/io/file_factory.cpp b/be/src/io/file_factory.cpp
index 553cdc4460e15c..9610bc028595ec 100644
--- a/be/src/io/file_factory.cpp
+++ b/be/src/io/file_factory.cpp
@@ -57,21 +57,20 @@ namespace doris {
 
 constexpr std::string_view RANDOM_CACHE_BASE_PATH = "random";
 
-io::FileReaderOptions FileFactory::get_reader_options(RuntimeState* state,
+io::FileReaderOptions FileFactory::get_reader_options(const TQueryOptions& option,
                                                       const io::FileDescription& fd) {
     io::FileReaderOptions opts {
             .cache_base_path {},
             .file_size = fd.file_size,
             .mtime = fd.mtime,
     };
-    if (config::enable_file_cache && state != nullptr &&
-        state->query_options().__isset.enable_file_cache &&
-        state->query_options().enable_file_cache && fd.file_cache_admission) {
+    if (config::enable_file_cache && option.__isset.enable_file_cache && option.enable_file_cache &&
+        fd.file_cache_admission) {
         opts.cache_type = io::FileCachePolicy::FILE_BLOCK_CACHE;
     }
-    if (state != nullptr && state->query_options().__isset.file_cache_base_path &&
-        state->query_options().file_cache_base_path != RANDOM_CACHE_BASE_PATH) {
-        opts.cache_base_path = state->query_options().file_cache_base_path;
+    if (option.__isset.file_cache_base_path &&
+        option.file_cache_base_path != RANDOM_CACHE_BASE_PATH) {
+        opts.cache_base_path = option.file_cache_base_path;
     }
     return opts;
 }
diff --git a/be/src/io/file_factory.h b/be/src/io/file_factory.h
index 7d662e4fdde469..33595313b921b1 100644
--- a/be/src/io/file_factory.h
+++ b/be/src/io/file_factory.h
@@ -16,6 +16,7 @@
 // under the License.
 #pragma once
 
+#include <gen_cpp/PaloInternalService_types.h>
 #include <gen_cpp/PlanNodes_types.h>
 #include <gen_cpp/Types_types.h>
 #include <glog/logging.h>
@@ -64,6 +65,8 @@ struct FileDescription {
     // -1 means unset.
     // If the file length is not set, the file length will be fetched from the file system.
     int64_t file_size = -1;
+    int64_t range_start_offset = 0;
+    int64_t range_size = -1;
     // modification time of this file.
     // 0 means unset.
     int64_t mtime = 0;
@@ -83,7 +86,7 @@ class FileFactory {
     ENABLE_FACTORY_CREATOR(FileFactory);
 
 public:
-    static io::FileReaderOptions get_reader_options(RuntimeState* state,
+    static io::FileReaderOptions get_reader_options(const TQueryOptions& option,
                                                     const io::FileDescription& fd);
 
     /// Create a temporary FileSystem for accessing file corresponding to `file_description`
diff --git a/be/src/io/io_common.h b/be/src/io/io_common.h
index 36b20517afb87c..566e376219efab 100644
--- a/be/src/io/io_common.h
+++ b/be/src/io/io_common.h
@@ -97,6 +97,10 @@ struct IOContext {
     // if `is_warmup` == true, this I/O request is from a warm up task
     bool is_warmup {false};
     int64_t condition_cache_filtered_rows = 0;
+    // Rows removed by file-local predicate conjuncts inside FileReader/TableReader. Scanner-level
+    // output filtering already records its own unselected rows; this counter carries the rows that
+    // were filtered before the block returned to Scanner.
+    int64_t predicate_filtered_rows = 0;
 };
 
 } // namespace io
diff --git a/be/src/storage/segment/condition_cache.h b/be/src/storage/segment/condition_cache.h
index 511b9c56abac5e..a189312ee1427a 100644
--- a/be/src/storage/segment/condition_cache.h
+++ b/be/src/storage/segment/condition_cache.h
@@ -26,6 +26,7 @@
 #include <memory>
 #include <roaring/roaring.hh>
 #include <string>
+#include <vector>
 
 #include "common/config.h"
 #include "common/status.h"
@@ -38,7 +39,19 @@
 #include "util/slice.h"
 #include "util/time.h"
 
-namespace doris::segment_v2 {
+namespace doris {
+
+// Context passed from scan/table-reader layers to physical readers for condition cache
+// integration. On MISS, readers set filter_result[granule] to true when row-level predicates keep
+// at least one row in that granule. On HIT, readers skip granules whose cached bit is false.
+struct ConditionCacheContext {
+    bool is_hit = false;
+    std::shared_ptr<std::vector<bool>> filter_result; // per-granule: true = has surviving rows
+    int64_t base_granule = 0;                         // global granule index of filter_result[0]
+    static constexpr int GRANULE_SIZE = 2048;
+};
+
+namespace segment_v2 {
 
 class ConditionCacheHandle;
 
@@ -167,4 +180,5 @@ class ConditionCacheHandle {
     DISALLOW_COPY_AND_ASSIGN(ConditionCacheHandle);
 };
 
-} // namespace doris::segment_v2
+} // namespace segment_v2
+} // namespace doris
diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h
index b230ac67f4778e..de9030b5b3a7c7 100644
--- a/be/src/util/jni-util.h
+++ b/be/src/util/jni-util.h
@@ -606,6 +606,14 @@ class Object {
 
     bool uninitialized() const { return _obj == nullptr; }
 
+    void reset(JNIEnv* env) {
+        if (_obj == nullptr) {
+            return;
+        }
+        RefHelper<Ref>::destroy(env, _obj);
+        _obj = nullptr;
+    }
+
     template <RefType T>
     bool equal(JNIEnv* env, const Object<T>& other) {
         DCHECK(!uninitialized());
diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt
index 2edcff5eef87c9..95d2a435d8d00e 100644
--- a/be/test/CMakeLists.txt
+++ b/be/test/CMakeLists.txt
@@ -30,6 +30,7 @@ file(GLOB_RECURSE UT_FILES CONFIGURE_DEPENDS
     exec/*.cpp
     exprs/*.cpp
     format/*.cpp
+    format_v2/*.cpp
     gutil/*.cpp
     io/*.cpp
     load/*.cpp
diff --git a/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp b/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp
new file mode 100644
index 00000000000000..69cf458e2fdc5f
--- /dev/null
+++ b/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp
@@ -0,0 +1,1852 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cctz/time_zone.h>
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/column/column_decimal.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_date_or_datetime_v2.h"
+#include "core/data_type/data_type_decimal.h"
+#include "core/data_type/data_type_nothing.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_time.h"
+#include "core/data_type/data_type_timestamptz.h"
+#include "core/data_type_serde/decoded_column_view.h"
+#include "core/field.h"
+#include "core/string_ref.h"
+#include "core/value/timestamptz_value.h"
+#include "util/timezone_utils.h"
+
+namespace doris {
+namespace {
+
+struct ReadColumnResult {
+    Status status;
+    MutableColumnPtr column;
+};
+
+template <typename T>
+DecodedColumnView make_fixed_view(DecodedValueKind kind, const std::vector<T>& values,
+                                  const std::vector<uint8_t>* null_map = nullptr) {
+    DecodedColumnView view;
+    view.value_kind = kind;
+    view.row_count = null_map != nullptr ? static_cast<int64_t>(null_map->size())
+                                         : static_cast<int64_t>(values.size());
+    view.values = values.empty() ? nullptr : reinterpret_cast<const uint8_t*>(values.data());
+    view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data();
+    return view;
+}
+
+DecodedColumnView make_binary_view(DecodedValueKind kind, const std::vector<StringRef>& values,
+                                   int fixed_length = -1,
+                                   const std::vector<uint8_t>* null_map = nullptr) {
+    DecodedColumnView view;
+    view.value_kind = kind;
+    view.row_count = null_map != nullptr ? static_cast<int64_t>(null_map->size())
+                                         : static_cast<int64_t>(values.size());
+    view.binary_values = values.empty() ? nullptr : &values;
+    view.fixed_length = fixed_length;
+    view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data();
+    return view;
+}
+
+DecodedColumnView make_bool_view(const std::vector<uint8_t>& values,
+                                 const std::vector<uint8_t>* null_map = nullptr) {
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::BOOL;
+    view.row_count = null_map != nullptr ? static_cast<int64_t>(null_map->size())
+                                         : static_cast<int64_t>(values.size());
+    view.values = values.empty() ? nullptr : reinterpret_cast<const uint8_t*>(values.data());
+    view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data();
+    return view;
+}
+
+DecodedColumnView with_logical_integer(DecodedColumnView view, int bit_width, bool is_signed) {
+    view.logical_integer_bit_width = bit_width;
+    view.logical_integer_is_signed = is_signed;
+    return view;
+}
+
+ReadColumnResult read_column(const DataTypePtr& type, const DecodedColumnView& view) {
+    auto column = type->create_column();
+    auto status = type->get_serde()->read_column_from_decoded_values(*column, view);
+    return {std::move(status), std::move(column)};
+}
+
+void expect_not_supported(const Status& status) {
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(ErrorCode::NOT_IMPLEMENTED_ERROR, status.code()) << status;
+}
+
+void expect_corruption(const Status& status) {
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(ErrorCode::CORRUPTION, status.code()) << status;
+}
+
+void expect_data_quality_error(const Status& status) {
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(ErrorCode::DATA_QUALITY_ERROR, status.code()) << status;
+}
+
+void expect_column_strings(const IDataType& type, const IColumn& column,
+                           const std::vector<std::string>& expected) {
+    ASSERT_EQ(expected.size(), column.size());
+    for (size_t row = 0; row < expected.size(); ++row) {
+        EXPECT_EQ(expected[row], type.to_string(column, row)) << "row=" << row;
+    }
+}
+
+void expect_binary_column(const IColumn& column, const std::vector<std::string>& expected) {
+    const auto& string_column = assert_cast<const ColumnString&>(column);
+    ASSERT_EQ(expected.size(), string_column.size());
+    for (size_t row = 0; row < expected.size(); ++row) {
+        const auto value = string_column.get_data_at(row);
+        EXPECT_EQ(expected[row], std::string(value.data, value.size)) << "row=" << row;
+    }
+}
+
+void expect_nullable_all_null(const IColumn& column, size_t expected_size) {
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    ASSERT_EQ(expected_size, nullable_column.size());
+    ASSERT_EQ(expected_size, nullable_column.get_nested_column().size());
+    for (size_t row = 0; row < expected_size; ++row) {
+        EXPECT_TRUE(nullable_column.is_null_at(row)) << "row=" << row;
+    }
+}
+
+Field read_field(const DataTypePtr& type, const DecodedColumnView& view) {
+    Field field;
+    auto status = type->get_serde()->read_field_from_decoded_value(*type, &field, view);
+    EXPECT_TRUE(status.ok()) << status;
+    return field;
+}
+
+Status read_field_status(const DataTypePtr& type, const DecodedColumnView& view) {
+    Field field;
+    return type->get_serde()->read_field_from_decoded_value(*type, &field, view);
+}
+
+std::vector<StringRef> string_refs(const std::vector<std::string>& values) {
+    std::vector<StringRef> refs;
+    refs.reserve(values.size());
+    for (const auto& value : values) {
+        refs.emplace_back(value.data(), value.size());
+    }
+    return refs;
+}
+
+#pragma pack(1)
+struct TestInt96Timestamp {
+    int64_t nanos_of_day;
+    int32_t julian_day;
+};
+#pragma pack()
+
+static_assert(sizeof(TestInt96Timestamp) == 12);
+
+Decimal128V3 decimal128_v3(Int128 value) {
+    return Decimal128V3(value);
+}
+
+Decimal256 decimal256_from_int64(int64_t value) {
+    return Decimal256(wide::Int256(value));
+}
+
+} // namespace
+
+// ----------------------------------------------------------------------
+// Base SerDe behavior
+// ----------------------------------------------------------------------
+// These cases define the default contract for types that have not implemented decoded-value
+// materialization. Batch reads must report NotSupported, and the single-field path must surface
+// the same error because it is implemented by delegating to the batch reader.
+
+TEST(DataTypeSerDeDecodedValuesTest, BaseSerdeRejectsDecodedValues) {
+    auto type = std::make_shared<DataTypeNothing>();
+    std::vector<int32_t> values = {1};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+
+    auto result = read_column(type, view);
+
+    expect_not_supported(result.status);
+    EXPECT_EQ(0, result.column->size());
+    EXPECT_NE(std::string::npos, result.status.to_string().find("Nothing"));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, BaseFieldUsesBatchReaderAndPropagatesError) {
+    auto type = std::make_shared<DataTypeNothing>();
+    std::vector<int32_t> values = {1};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+    Field field = Field::create_field<TYPE_INT>(123);
+
+    auto status = type->get_serde()->read_field_from_decoded_value(*type, &field, view);
+
+    expect_not_supported(status);
+    EXPECT_EQ(TYPE_INT, field.get_type());
+    EXPECT_EQ(123, field.get<TYPE_INT>());
+}
+
+// ----------------------------------------------------------------------
+// Number SerDe happy path
+// ----------------------------------------------------------------------
+// The numeric matrix verifies physical kind dispatch and the exact static_cast behavior used by
+// the reader. Narrow integer overflow is intentionally locked to current C++ conversion behavior;
+// if product semantics change to reject overflow, these expectations should be updated with the
+// implementation change.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadBooleanFromBool) {
+    auto type = std::make_shared<DataTypeBool>();
+    std::vector<uint8_t> values = {true, false, true};
+    auto view = make_bool_view(values);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnBool&>(*result.column);
+    ASSERT_EQ(3, column.size());
+    EXPECT_EQ(1, column.get_element(0));
+    EXPECT_EQ(0, column.get_element(1));
+    EXPECT_EQ(1, column.get_element(2));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadSignedIntegersFromInt32) {
+    std::vector<int32_t> values = {0, 1, -1, 127, -128};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+
+    {
+        auto result = read_column(std::make_shared<DataTypeInt8>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt8&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        EXPECT_EQ(0, column.get_element(0));
+        EXPECT_EQ(1, column.get_element(1));
+        EXPECT_EQ(-1, column.get_element(2));
+        EXPECT_EQ(127, column.get_element(3));
+        EXPECT_EQ(-128, column.get_element(4));
+    }
+    {
+        auto result = read_column(std::make_shared<DataTypeInt16>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt16&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        for (size_t row = 0; row < values.size(); ++row) {
+            EXPECT_EQ(static_cast<int16_t>(values[row]), column.get_element(row));
+        }
+    }
+    {
+        auto result = read_column(std::make_shared<DataTypeInt32>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt32&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        for (size_t row = 0; row < values.size(); ++row) {
+            EXPECT_EQ(values[row], column.get_element(row));
+        }
+    }
+    {
+        auto result = read_column(std::make_shared<DataTypeInt64>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt64&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        for (size_t row = 0; row < values.size(); ++row) {
+            EXPECT_EQ(static_cast<int64_t>(values[row]), column.get_element(row));
+        }
+    }
+    {
+        auto result = read_column(std::make_shared<DataTypeInt128>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt128&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        for (size_t row = 0; row < values.size(); ++row) {
+            EXPECT_EQ(static_cast<__int128_t>(values[row]), column.get_element(row));
+        }
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadSignedIntegersFromInt64) {
+    std::vector<int64_t> values = {0, 1, -1, 127, -128};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+
+    auto tiny = read_column(std::make_shared<DataTypeInt8>(), view);
+    ASSERT_TRUE(tiny.status.ok()) << tiny.status;
+    const auto& tiny_column = assert_cast<const ColumnInt8&>(*tiny.column);
+    EXPECT_EQ(127, tiny_column.get_element(3));
+    EXPECT_EQ(-128, tiny_column.get_element(4));
+
+    auto small = read_column(std::make_shared<DataTypeInt16>(), view);
+    ASSERT_TRUE(small.status.ok()) << small.status;
+    const auto& small_column = assert_cast<const ColumnInt16&>(*small.column);
+    EXPECT_EQ(127, small_column.get_element(3));
+    EXPECT_EQ(-128, small_column.get_element(4));
+
+    auto integer = read_column(std::make_shared<DataTypeInt32>(), view);
+    ASSERT_TRUE(integer.status.ok()) << integer.status;
+    const auto& int_column = assert_cast<const ColumnInt32&>(*integer.column);
+    EXPECT_EQ(127, int_column.get_element(3));
+    EXPECT_EQ(-128, int_column.get_element(4));
+
+    auto bigint = read_column(std::make_shared<DataTypeInt64>(), view);
+    ASSERT_TRUE(bigint.status.ok()) << bigint.status;
+    const auto& bigint_column = assert_cast<const ColumnInt64&>(*bigint.column);
+    ASSERT_EQ(values.size(), bigint_column.size());
+    for (size_t row = 0; row < values.size(); ++row) {
+        EXPECT_EQ(values[row], bigint_column.get_element(row));
+    }
+
+    auto largeint = read_column(std::make_shared<DataTypeInt128>(), view);
+    ASSERT_TRUE(largeint.status.ok()) << largeint.status;
+    const auto& largeint_column = assert_cast<const ColumnInt128&>(*largeint.column);
+    ASSERT_EQ(values.size(), largeint_column.size());
+    for (size_t row = 0; row < values.size(); ++row) {
+        EXPECT_EQ(static_cast<__int128_t>(values[row]), largeint_column.get_element(row));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadIntegersFromUnsignedSources) {
+    {
+        std::vector<uint32_t> values = {0, 1, std::numeric_limits<uint32_t>::max()};
+        auto view = make_fixed_view(DecodedValueKind::UINT32, values);
+        auto result = read_column(std::make_shared<DataTypeInt64>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt64&>(*result.column);
+        EXPECT_EQ(0, column.get_element(0));
+        EXPECT_EQ(1, column.get_element(1));
+        EXPECT_EQ(static_cast<int64_t>(std::numeric_limits<uint32_t>::max()),
+                  column.get_element(2));
+    }
+    {
+        std::vector<uint64_t> values = {0, 1, std::numeric_limits<uint64_t>::max()};
+        auto view = make_fixed_view(DecodedValueKind::UINT64, values);
+        auto result = read_column(std::make_shared<DataTypeInt128>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt128&>(*result.column);
+        EXPECT_EQ(0, column.get_element(0));
+        EXPECT_EQ(1, column.get_element(1));
+        EXPECT_EQ(static_cast<__int128_t>(std::numeric_limits<uint64_t>::max()),
+                  column.get_element(2));
+    }
+    {
+        std::vector<uint64_t> values = {static_cast<uint64_t>(std::numeric_limits<int64_t>::max())};
+        auto view = make_fixed_view(DecodedValueKind::UINT64, values);
+        auto result = read_column(std::make_shared<DataTypeInt64>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt64&>(*result.column);
+        EXPECT_EQ(std::numeric_limits<int64_t>::max(), column.get_element(0));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadUnsignedLogicalIntegersCastsPhysicalValues) {
+    {
+        std::vector<int32_t> values = {0, 127, 255, 32767, 65535, -1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, false);
+        auto result = read_column(std::make_shared<DataTypeInt16>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt16&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        EXPECT_EQ(0, column.get_element(0));
+        EXPECT_EQ(127, column.get_element(1));
+        EXPECT_EQ(255, column.get_element(2));
+        EXPECT_EQ(255, column.get_element(3));
+        EXPECT_EQ(255, column.get_element(4));
+        EXPECT_EQ(255, column.get_element(5));
+    }
+    {
+        std::vector<int32_t> values = {32767, 65535, -1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 16, false);
+        auto result = read_column(std::make_shared<DataTypeInt32>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt32&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        EXPECT_EQ(32767, column.get_element(0));
+        EXPECT_EQ(65535, column.get_element(1));
+        EXPECT_EQ(65535, column.get_element(2));
+    }
+    {
+        std::vector<int32_t> values = {-1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::UINT32, values), 32, false);
+        auto result = read_column(std::make_shared<DataTypeInt64>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt64&>(*result.column);
+        ASSERT_EQ(1, column.size());
+        EXPECT_EQ(4294967295LL, column.get_element(0));
+    }
+    {
+        std::vector<int64_t> values = {-1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::UINT64, values), 64, false);
+        auto result = read_column(std::make_shared<DataTypeInt128>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt128&>(*result.column);
+        ASSERT_EQ(1, column.size());
+        EXPECT_EQ(static_cast<__int128_t>(std::numeric_limits<uint64_t>::max()),
+                  column.get_element(0));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadSignedLogicalIntegersCastsPhysicalValues) {
+    std::vector<int32_t> values = {127, 128, 255, -1};
+    auto view = with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, true);
+    auto result = read_column(std::make_shared<DataTypeInt8>(), view);
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnInt8&>(*result.column);
+    ASSERT_EQ(values.size(), column.size());
+    EXPECT_EQ(static_cast<Int8>(127), column.get_element(0));
+    EXPECT_EQ(static_cast<Int8>(-128), column.get_element(1));
+    EXPECT_EQ(static_cast<Int8>(-1), column.get_element(2));
+    EXPECT_EQ(static_cast<Int8>(-1), column.get_element(3));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFloatAndDouble) {
+    {
+        auto type = std::make_shared<DataTypeFloat32>();
+        std::vector<float> values = {0.0F, -0.0F, 1.5F, -2.25F};
+        auto result = read_column(type, make_fixed_view(DecodedValueKind::FLOAT, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnFloat32&>(*result.column);
+        EXPECT_FLOAT_EQ(0.0F, column.get_element(0));
+        EXPECT_TRUE(std::signbit(column.get_element(1)));
+        EXPECT_FLOAT_EQ(1.5F, column.get_element(2));
+        EXPECT_FLOAT_EQ(-2.25F, column.get_element(3));
+    }
+    {
+        auto type = std::make_shared<DataTypeFloat64>();
+        std::vector<double> values = {0.0, -0.0, 1.5, -2.25};
+        auto result = read_column(type, make_fixed_view(DecodedValueKind::DOUBLE, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnFloat64&>(*result.column);
+        EXPECT_DOUBLE_EQ(0.0, column.get_element(0));
+        EXPECT_TRUE(std::signbit(column.get_element(1)));
+        EXPECT_DOUBLE_EQ(1.5, column.get_element(2));
+        EXPECT_DOUBLE_EQ(-2.25, column.get_element(3));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFloatSpecialValues) {
+    {
+        std::vector<float> values = {std::numeric_limits<float>::quiet_NaN(),
+                                     std::numeric_limits<float>::infinity(),
+                                     -std::numeric_limits<float>::infinity()};
+        auto result = read_column(std::make_shared<DataTypeFloat32>(),
+                                  make_fixed_view(DecodedValueKind::FLOAT, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnFloat32&>(*result.column);
+        EXPECT_TRUE(std::isnan(column.get_element(0)));
+        EXPECT_TRUE(std::isinf(column.get_element(1)));
+        EXPECT_FALSE(std::signbit(column.get_element(1)));
+        EXPECT_TRUE(std::isinf(column.get_element(2)));
+        EXPECT_TRUE(std::signbit(column.get_element(2)));
+    }
+    {
+        std::vector<double> values = {std::numeric_limits<double>::quiet_NaN(),
+                                      std::numeric_limits<double>::infinity(),
+                                      -std::numeric_limits<double>::infinity()};
+        auto result = read_column(std::make_shared<DataTypeFloat64>(),
+                                  make_fixed_view(DecodedValueKind::DOUBLE, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnFloat64&>(*result.column);
+        EXPECT_TRUE(std::isnan(column.get_element(0)));
+        EXPECT_TRUE(std::isinf(column.get_element(1)));
+        EXPECT_FALSE(std::signbit(column.get_element(1)));
+        EXPECT_TRUE(std::isinf(column.get_element(2)));
+        EXPECT_TRUE(std::signbit(column.get_element(2)));
+    }
+}
+
+// ----------------------------------------------------------------------
+// Number SerDe error paths
+// ----------------------------------------------------------------------
+// These cases separate unsupported physical kinds from corrupt decoded buffers. Unsupported kinds
+// must not append to the destination column; missing value buffers are allowed only for empty or
+// all-null batches where no non-null row can dereference the buffer.
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsMismatchedKind) {
+    struct Case {
+        DataTypePtr type;
+        DecodedValueKind kind;
+    };
+    std::vector<Case> cases = {
+            {std::make_shared<DataTypeBool>(), DecodedValueKind::INT32},
+            {std::make_shared<DataTypeInt32>(), DecodedValueKind::BOOL},
+            {std::make_shared<DataTypeFloat32>(), DecodedValueKind::DOUBLE},
+            {std::make_shared<DataTypeFloat64>(), DecodedValueKind::FLOAT},
+            {std::make_shared<DataTypeInt32>(), DecodedValueKind::BINARY},
+    };
+
+    for (const auto& test_case : cases) {
+        std::vector<int32_t> values = {1};
+        auto result = read_column(test_case.type, make_fixed_view(test_case.kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsMissingValuesWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeInt32>();
+    {
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT32;
+        view.row_count = 3;
+        auto result = read_column(type, view);
+        expect_corruption(result.status);
+    }
+    {
+        std::vector<uint8_t> null_map = {1, 0, 1};
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT32;
+        view.row_count = 3;
+        view.null_map = null_map.data();
+        auto result = read_column(type, view);
+        expect_corruption(result.status);
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberAllowsMissingValuesForAllNullOrEmpty) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    {
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT32;
+        view.row_count = 0;
+        auto result = read_column(type, view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        EXPECT_EQ(0, result.column->size());
+    }
+    {
+        std::vector<uint8_t> null_map = {1, 1, 1};
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT32;
+        view.row_count = 3;
+        view.null_map = null_map.data();
+        auto result = read_column(type, view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+        const auto& nested_column =
+                assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+        ASSERT_EQ(3, nullable_column.size());
+        for (size_t row = 0; row < nullable_column.size(); ++row) {
+            EXPECT_TRUE(nullable_column.is_null_at(row));
+            EXPECT_EQ(0, nested_column.get_element(row));
+        }
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsOutOfRangeValueInStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt8>());
+    std::vector<int64_t> values = {127, 128};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberNullsOutOfRangeValueInNonStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt8>());
+    std::vector<int64_t> values = {127, 128, -129, -128};
+    std::vector<uint8_t> null_map = {0, 0, 0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& nested_column = assert_cast<const ColumnInt8&>(nullable_column.get_nested_column());
+    ASSERT_EQ(4, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_TRUE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_EQ(127, nested_column.get_element(0));
+    EXPECT_EQ(0, nested_column.get_element(1));
+    EXPECT_EQ(0, nested_column.get_element(2));
+    EXPECT_EQ(-128, nested_column.get_element(3));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsUnsignedOverflowInStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>());
+    std::vector<uint64_t> values = {static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
+                                    std::numeric_limits<uint64_t>::max()};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::UINT64, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberNullsUnsignedOverflowInNonStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>());
+    std::vector<uint64_t> values = {static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
+                                    std::numeric_limits<uint64_t>::max()};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::UINT64, values, &null_map);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& nested_column =
+            assert_cast<const ColumnInt64&>(nullable_column.get_nested_column());
+    ASSERT_EQ(2, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_EQ(std::numeric_limits<int64_t>::max(), nested_column.get_element(0));
+    EXPECT_EQ(0, nested_column.get_element(1));
+}
+
+// ----------------------------------------------------------------------
+// String / Binary SerDe
+// ----------------------------------------------------------------------
+// String-like decoded reads must preserve exact byte sequences. The embedded-NUL case prevents
+// accidental C-string truncation. Nullable string tests ensure null rows materialize default nested
+// values while the outer null map remains authoritative.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadStringFromBinary) {
+    auto type = std::make_shared<DataTypeString>();
+    std::vector<std::string> storage = {"alpha", "", std::string("a\0b", 3), "utf8-\xe4\xb8\xad"};
+    auto refs = string_refs(storage);
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_binary_column(*result.column, storage);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadStringFromFixedBinary) {
+    auto type = std::make_shared<DataTypeString>();
+    std::vector<std::string> storage = {std::string("\x00\x01\x02\x03", 4),
+                                        std::string("\x7f\x80\xfe\xff", 4)};
+    auto refs = string_refs(storage);
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 4));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_binary_column(*result.column, storage);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, StringNullMapMaterialization) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    std::vector<std::string> storage = {"alpha", "", "omega"};
+    auto refs = string_refs(storage);
+    std::vector<uint8_t> null_map = {0, 1, 0};
+
+    auto result =
+            read_column(type, make_binary_view(DecodedValueKind::BINARY, refs, -1, &null_map));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    expect_binary_column(nullable_column.get_nested_column(), {"alpha", "", "omega"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, StringRejectsMismatchedKind) {
+    auto type = std::make_shared<DataTypeString>();
+    for (auto kind : {DecodedValueKind::INT32, DecodedValueKind::INT64, DecodedValueKind::DOUBLE}) {
+        std::vector<int64_t> values = {1};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, StringRejectsMissingBinaryValuesWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeString>();
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::BINARY;
+    view.row_count = 1;
+
+    auto result = read_column(type, view);
+
+    expect_corruption(result.status);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, StringAllowsAllNullWithoutBinaryValues) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    std::vector<uint8_t> null_map = {1, 1};
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::BINARY;
+    view.row_count = 2;
+    view.null_map = null_map.data();
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(2, nullable_column.size());
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    expect_binary_column(nullable_column.get_nested_column(), {"", ""});
+}
+
+// ----------------------------------------------------------------------
+// DateV2 SerDe
+// ----------------------------------------------------------------------
+// DateV2 accepts Parquet DATE-style epoch days as INT32. Null rows insert default nested dates and
+// missing buffers are rejected only when a non-null row requires a value.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateV2FromEpochDays) {
+    auto type = std::make_shared<DataTypeDateV2>();
+    std::vector<int32_t> values = {-1, 0, 1, 18628, 18321};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1969-12-31", "1970-01-01", "1970-01-02", "2021-01-01", "2020-02-29"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateV2HandlesNulls) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateV2>());
+    std::vector<int32_t> values = {0, 1, 2};
+    std::vector<uint8_t> null_map = {0, 1, 0};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values, &null_map));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    expect_column_strings(*type, *result.column, {"1970-01-01", "NULL", "1970-01-03"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateV2RejectsInvalidKind) {
+    auto type = std::make_shared<DataTypeDateV2>();
+    for (auto kind :
+         {DecodedValueKind::INT64, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) {
+        std::vector<int64_t> values = {0};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateV2RejectsMissingValuesWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeDateV2>();
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::INT32;
+    view.row_count = 1;
+
+    auto result = read_column(type, view);
+
+    expect_corruption(result.status);
+}
+
+// ----------------------------------------------------------------------
+// DateTimeV2 SerDe
+// ----------------------------------------------------------------------
+// Timestamp decoding covers INT64 micros/millis, UNKNOWN-as-micros compatibility, UTC-adjusted
+// conversion with explicit/default timezones, INT96 Julian-day timestamps, and invalid buffer/kind
+// errors. Negative epoch values are included to lock correct floor-division behavior.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Micros) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {-1, 0, 1, 1234567, 86400000000LL - 1};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1969-12-31 23:59:59.999999", "1970-01-01 00:00:00.000000",
+                           "1970-01-01 00:00:00.000001", "1970-01-01 00:00:01.234567",
+                           "1970-01-01 23:59:59.999999"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Millis) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {-1, 0, 1, 1234};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MILLIS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1969-12-31 23:59:59.999000", "1970-01-01 00:00:00.000000",
+                           "1970-01-01 00:00:00.001000", "1970-01-01 00:00:01.234000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Nanos) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {-1000, 0, 1000, 1234567890};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::NANOS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1969-12-31 23:59:59.999999", "1970-01-01 00:00:00.000000",
+                           "1970-01-01 00:00:00.000001", "1970-01-01 00:00:01.234567"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UnknownUnitAsMicros) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {1000000};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::UNKNOWN;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column, {"1970-01-01 00:00:01.000000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UtcAdjustedDefaultUtc) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MICROS;
+    view.timestamp_is_adjusted_to_utc = true;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column, {"1970-01-01 00:00:00.000000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UtcAdjustedWithTimezones) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {0, -1, 1234567};
+    cctz::time_zone shanghai;
+    cctz::time_zone new_york;
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai));
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("-05:00", new_york));
+
+    auto shanghai_view = make_fixed_view(DecodedValueKind::INT64, values);
+    shanghai_view.time_unit = DecodedTimeUnit::MICROS;
+    shanghai_view.timestamp_is_adjusted_to_utc = true;
+    shanghai_view.timezone = &shanghai;
+    auto shanghai_result = read_column(type, shanghai_view);
+    ASSERT_TRUE(shanghai_result.status.ok()) << shanghai_result.status;
+    expect_column_strings(*type, *shanghai_result.column,
+                          {"1970-01-01 08:00:00.000000", "1970-01-01 07:59:59.999999",
+                           "1970-01-01 08:00:01.234567"});
+
+    auto new_york_view = make_fixed_view(DecodedValueKind::INT64, values);
+    new_york_view.time_unit = DecodedTimeUnit::MICROS;
+    new_york_view.timestamp_is_adjusted_to_utc = true;
+    new_york_view.timezone = &new_york;
+    auto new_york_result = read_column(type, new_york_view);
+    ASSERT_TRUE(new_york_result.status.ok()) << new_york_result.status;
+    expect_column_strings(*type, *new_york_result.column,
+                          {"1969-12-31 19:00:00.000000", "1969-12-31 18:59:59.999999",
+                           "1969-12-31 19:00:01.234567"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Int96) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTimeV2>(6));
+    std::vector<TestInt96Timestamp> values = {
+            {0, 2440588},
+            {86399999999000LL, 2440587},
+            {0, 2440589},
+    };
+    std::vector<uint8_t> null_map = {0, 0, 1};
+    auto view = make_fixed_view(DecodedValueKind::INT96, values, &null_map);
+    cctz::time_zone shanghai;
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai));
+    view.timezone = &shanghai;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1970-01-01 08:00:00.000000", "1970-01-01 07:59:59.999999", "NULL"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimestampTzInt64AsUtcInstant) {
+    auto type = std::make_shared<DataTypeTimeStampTz>(6);
+    // 2024-12-31 16:00:00 UTC is displayed as 2025-01-01 00:00:00+08:00.
+    cctz::time_zone shanghai;
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai));
+
+    std::vector<int64_t> micros_values = {1735660800000000LL, 1735660800123456LL};
+    auto micros_view = make_fixed_view(DecodedValueKind::INT64, micros_values);
+    micros_view.time_unit = DecodedTimeUnit::MICROS;
+    auto micros_result = read_column(type, micros_view);
+    ASSERT_TRUE(micros_result.status.ok()) << micros_result.status;
+    const auto& micros_column = assert_cast<const ColumnTimeStampTz&>(*micros_result.column);
+    EXPECT_EQ(micros_column.get_element(0).to_string(shanghai, 6),
+              "2025-01-01 00:00:00.000000+08:00");
+    EXPECT_EQ(micros_column.get_element(1).to_string(shanghai, 6),
+              "2025-01-01 00:00:00.123456+08:00");
+
+    std::vector<int64_t> millis_values = {1735660800000LL};
+    auto millis_view = make_fixed_view(DecodedValueKind::INT64, millis_values);
+    millis_view.time_unit = DecodedTimeUnit::MILLIS;
+    auto millis_result = read_column(type, millis_view);
+    ASSERT_TRUE(millis_result.status.ok()) << millis_result.status;
+    const auto& millis_column = assert_cast<const ColumnTimeStampTz&>(*millis_result.column);
+    EXPECT_EQ(millis_column.get_element(0).to_string(shanghai, 6),
+              "2025-01-01 00:00:00.000000+08:00");
+
+    std::vector<int64_t> nanos_values = {1735660800123456000LL};
+    auto nanos_view = make_fixed_view(DecodedValueKind::INT64, nanos_values);
+    nanos_view.time_unit = DecodedTimeUnit::NANOS;
+    auto nanos_result = read_column(type, nanos_view);
+    ASSERT_TRUE(nanos_result.status.ok()) << nanos_result.status;
+    const auto& nanos_column = assert_cast<const ColumnTimeStampTz&>(*nanos_result.column);
+    EXPECT_EQ(nanos_column.get_element(0).to_string(shanghai, 6),
+              "2025-01-01 00:00:00.123456+08:00");
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, TimestampTzReadsInt96AsUtcInstant) {
+    auto type = std::make_shared<DataTypeTimeStampTz>(6);
+    std::vector<TestInt96Timestamp> values = {{0, 2440588}, {123456789000LL, 2440588}};
+    auto view = make_fixed_view(DecodedValueKind::INT96, values);
+    cctz::time_zone shanghai;
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai));
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnTimeStampTz&>(*result.column);
+    EXPECT_EQ(column.get_element(0).to_string(shanghai, 6), "1970-01-01 08:00:00.000000+08:00");
+    EXPECT_EQ(column.get_element(1).to_string(shanghai, 6), "1970-01-01 08:02:03.456789+08:00");
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsInvalidKind) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    for (auto kind :
+         {DecodedValueKind::INT32, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) {
+        std::vector<int64_t> values = {0};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsMissingValuesWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::INT64;
+    view.row_count = 1;
+
+    auto result = read_column(type, view);
+
+    expect_corruption(result.status);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsOutOfRangeEpochWithoutAbort) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {0, -377673580800000001LL};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+    EXPECT_EQ(0, result.column->size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableDateTimeV2RejectsOutOfRangeEpochInStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTimeV2>(6));
+    std::vector<int64_t> values = {0, -377673580800000001LL};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.time_unit = DecodedTimeUnit::MICROS;
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableDateTimeV2NullsOutOfRangeEpochInNonStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTimeV2>(6));
+    std::vector<int64_t> values = {0, -377673580800000001LL, 1};
+    std::vector<uint8_t> null_map = {0, 0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    expect_column_strings(*type, *result.column,
+                          {"1970-01-01 00:00:00.000000", "NULL", "1970-01-01 00:00:00.000001"});
+}
+
+// ----------------------------------------------------------------------
+// TimeV2 SerDe
+// ----------------------------------------------------------------------
+// TimeV2 decodes INT32 as milliseconds and INT64 according to the supplied time unit. Negative
+// durations are verified because they use a sign bit in TimeValue::TimeType rather than DateTimeV2
+// epoch arithmetic.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt32Millis) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    std::vector<int32_t> values = {0, 1, -1, 3661001};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(
+            *type, *result.column,
+            {"00:00:00.000000", "00:00:00.001000", "-00:00:00.001000", "01:01:01.001000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Micros) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    std::vector<int64_t> values = {0, 1, -1, 3661000001LL};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(
+            *type, *result.column,
+            {"00:00:00.000000", "00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"});
+
+    view.time_unit = DecodedTimeUnit::UNKNOWN;
+    auto unknown_result = read_column(type, view);
+    ASSERT_TRUE(unknown_result.status.ok()) << unknown_result.status;
+    expect_column_strings(
+            *type, *unknown_result.column,
+            {"00:00:00.000000", "00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Millis) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    std::vector<int64_t> values = {1, -1, 3661001};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MILLIS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"00:00:00.001000", "-00:00:00.001000", "01:01:01.001000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Nanos) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    std::vector<int64_t> values = {1000, -1000, 3661000001000LL};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::NANOS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, TimeV2HandlesNulls) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeTimeV2>(6));
+    std::vector<int64_t> values = {0, 1, 2};
+    std::vector<uint8_t> null_map = {0, 1, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    expect_column_strings(*type, *result.column, {"00:00:00.000000", "NULL", "00:00:00.000002"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, TimeV2RejectsInvalidKind) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    for (auto kind : {DecodedValueKind::BOOL, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) {
+        std::vector<int64_t> values = {0};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+// ----------------------------------------------------------------------
+// Decimal SerDe
+// ----------------------------------------------------------------------
+// Decimal cases cover integer-backed values and Parquet big-endian two's-complement binary values.
+// String assertions validate the user-visible scale, while direct column checks lock the native
+// unscaled value for every decimal width.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal32FromInt32) {
+    auto type = std::make_shared<DataTypeDecimal32>(9, 2);
+    std::vector<int32_t> values = {12345, -67, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnDecimal32&>(*result.column);
+    EXPECT_EQ(Decimal32(12345), column.get_element(0));
+    EXPECT_EQ(Decimal32(-67), column.get_element(1));
+    EXPECT_EQ(Decimal32(0), column.get_element(2));
+    expect_column_strings(*type, *result.column, {"123.45", "-0.67", "0.00"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal64FromInt64) {
+    auto type = std::make_shared<DataTypeDecimal64>(18, 4);
+    std::vector<int64_t> values = {123456789, -1};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnDecimal64&>(*result.column);
+    EXPECT_EQ(Decimal64(123456789), column.get_element(0));
+    EXPECT_EQ(Decimal64(-1), column.get_element(1));
+    expect_column_strings(*type, *result.column, {"12345.6789", "-0.0001"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal128FromInt32AndInt64) {
+    auto type = std::make_shared<DataTypeDecimal128>(38, 6);
+    {
+        std::vector<int32_t> values = {123456, -1};
+        auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnDecimal128V3&>(*result.column);
+        EXPECT_EQ(decimal128_v3(123456), column.get_element(0));
+        EXPECT_EQ(decimal128_v3(-1), column.get_element(1));
+        expect_column_strings(*type, *result.column, {"0.123456", "-0.000001"});
+    }
+    {
+        std::vector<int64_t> values = {1234567890123LL, -1234567LL};
+        auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnDecimal128V3&>(*result.column);
+        EXPECT_EQ(decimal128_v3(1234567890123LL), column.get_element(0));
+        EXPECT_EQ(decimal128_v3(-1234567LL), column.get_element(1));
+        expect_column_strings(*type, *result.column, {"1234567.890123", "-1.234567"});
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal256FromInt64) {
+    auto type = std::make_shared<DataTypeDecimal256>(76, 8);
+    std::vector<int64_t> values = {std::numeric_limits<int64_t>::max(),
+                                   std::numeric_limits<int64_t>::min()};
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnDecimal256&>(*result.column);
+    EXPECT_EQ(decimal256_from_int64(std::numeric_limits<int64_t>::max()), column.get_element(0));
+    EXPECT_EQ(decimal256_from_int64(std::numeric_limits<int64_t>::min()), column.get_element(1));
+    expect_column_strings(*type, *result.column, {"92233720368.54775807", "-92233720368.54775808"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimalFromBinaryBigEndian) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    std::vector<std::string> storage = {
+            std::string("\x00", 1), std::string("\x7f", 1),     std::string("\x80", 1),
+            std::string("\xff", 1), std::string("\xff\xbd", 2), std::string("\x30\x39", 2),
+    };
+    auto refs = string_refs(storage);
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnDecimal128V3&>(*result.column);
+    std::vector<Decimal128V3> expected = {decimal128_v3(0),    decimal128_v3(127),
+                                          decimal128_v3(-128), decimal128_v3(-1),
+                                          decimal128_v3(-67),  decimal128_v3(12345)};
+    ASSERT_EQ(expected.size(), column.size());
+    for (size_t row = 0; row < expected.size(); ++row) {
+        EXPECT_EQ(expected[row], column.get_element(row)) << "row=" << row;
+    }
+    expect_column_strings(*type, *result.column,
+                          {"0.00", "1.27", "-1.28", "-0.01", "-0.67", "123.45"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimalFromFixedBinaryLengths) {
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {std::string("\x00", 1), std::string("\x80", 1)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 1));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"0.00", "-1.28"});
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {std::string("\xff\xbd", 2), std::string("\x30\x39", 2)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 2));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"-0.67", "123.45"});
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {std::string("\0\0\0\0\0\0\x30\x39", 8)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 8));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"123.45"});
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {
+                std::string("\xff\xff\xff\xff\xff\xff\xff\xff"
+                            "\xff\xff\xff\xff\xff\xff\xff\xbd",
+                            16)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 16));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"-0.67"});
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal256>(76, 2);
+        std::vector<std::string> storage = {std::string(31, '\xff') + std::string("\xbd", 1)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 32));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"-0.67"});
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalHandlesNulls) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal128>(18, 2));
+    std::vector<int64_t> values = {12345, -1, -67};
+    std::vector<uint8_t> null_map = {0, 1, 0};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values, &null_map));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& decimal_column =
+            assert_cast<const ColumnDecimal128V3&>(nullable_column.get_nested_column());
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_EQ(decimal128_v3(12345), decimal_column.get_element(0));
+    EXPECT_EQ(decimal128_v3(0), decimal_column.get_element(1));
+    EXPECT_EQ(decimal128_v3(-67), decimal_column.get_element(2));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsOutOfRangeValueInStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal32>(9, 2));
+    std::vector<int64_t> values = {999999999, 1000000000};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalNullsOutOfRangeValueInNonStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal32>(9, 2));
+    std::vector<int64_t> values = {999999999, 1000000000, -1000000000, -999999999};
+    std::vector<uint8_t> null_map = {0, 0, 0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& decimal_column =
+            assert_cast<const ColumnDecimal32&>(nullable_column.get_nested_column());
+    ASSERT_EQ(4, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_TRUE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_EQ(Decimal32(999999999), decimal_column.get_element(0));
+    EXPECT_EQ(Decimal32(0), decimal_column.get_element(1));
+    EXPECT_EQ(Decimal32(0), decimal_column.get_element(2));
+    EXPECT_EQ(Decimal32(-999999999), decimal_column.get_element(3));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsNullBinaryDataWithPositiveLength) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    std::vector<StringRef> refs = {StringRef(static_cast<const char*>(nullptr), 2)};
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    expect_corruption(result.status);
+    EXPECT_NE(std::string::npos, result.status.to_string().find("row 0"));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalAllowsZeroLengthBinaryAsZero) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    std::vector<StringRef> refs = {StringRef(static_cast<const char*>(nullptr), 0),
+                                   StringRef("", 0)};
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column, {"0.00", "0.00"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsInvalidKind) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    for (auto kind : {DecodedValueKind::BOOL, DecodedValueKind::FLOAT, DecodedValueKind::DOUBLE,
+                      DecodedValueKind::UINT64}) {
+        std::vector<int64_t> values = {0};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsMissingBufferWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    {
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT64;
+        view.row_count = 1;
+        auto result = read_column(type, view);
+        expect_corruption(result.status);
+    }
+    {
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::BINARY;
+        view.row_count = 1;
+        auto result = read_column(type, view);
+        expect_corruption(result.status);
+    }
+}
+
+// ----------------------------------------------------------------------
+// Nullable SerDe wrapper
+// ----------------------------------------------------------------------
+// Nullable tests focus on wrapper responsibilities: copying the outer null map, inserting default
+// nested values for null rows, treating a missing null_map as all non-null, appending to existing
+// columns, and rolling back outer state when the nested reader rejects the input.
+
+TEST(DataTypeSerDeDecodedValuesTest, NullablePropagatesNullMapAndReadsNested) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    std::vector<int32_t> values = {10, 20, 30, 40};
+    std::vector<uint8_t> null_map = {0, 1, 0, 1};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values, &null_map));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& nested_column =
+            assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+    ASSERT_EQ(4, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_TRUE(nullable_column.is_null_at(3));
+    EXPECT_EQ(10, nested_column.get_element(0));
+    EXPECT_EQ(0, nested_column.get_element(1));
+    EXPECT_EQ(30, nested_column.get_element(2));
+    EXPECT_EQ(0, nested_column.get_element(3));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableWithoutNullMapReadsAllNonNull) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    std::vector<std::string> storage = {"alpha", "beta"};
+    auto refs = string_refs(storage);
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(2, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    expect_binary_column(nullable_column.get_nested_column(), storage);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableAllNullDoesNotRequireNestedBuffer) {
+    std::vector<uint8_t> null_map = {1, 1};
+    std::vector<DataTypePtr> types = {
+            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateV2>()),
+            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal128>(18, 2)),
+            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
+            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>()),
+    };
+
+    for (const auto& type : types) {
+        DecodedColumnView view;
+        view.value_kind = type->get_name().find("String") != std::string::npos
+                                  ? DecodedValueKind::BINARY
+                                  : DecodedValueKind::INT32;
+        view.row_count = 2;
+        view.null_map = null_map.data();
+        auto result = read_column(type, view);
+        ASSERT_TRUE(result.status.ok()) << result.status << ", type=" << type->get_name();
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+        ASSERT_EQ(2, nullable_column.size());
+        EXPECT_TRUE(nullable_column.is_null_at(0));
+        EXPECT_TRUE(nullable_column.is_null_at(1));
+        EXPECT_EQ(2, nullable_column.get_nested_column().size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableAppendToExistingColumn) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    auto column = type->create_column();
+
+    std::vector<int32_t> first_values = {1, 2};
+    auto first_status = type->get_serde()->read_column_from_decoded_values(
+            *column, make_fixed_view(DecodedValueKind::INT32, first_values));
+    ASSERT_TRUE(first_status.ok()) << first_status;
+
+    std::vector<int32_t> second_values = {10, 20, 30};
+    std::vector<uint8_t> second_null_map = {0, 1, 0};
+    auto second_status = type->get_serde()->read_column_from_decoded_values(
+            *column, make_fixed_view(DecodedValueKind::INT32, second_values, &second_null_map));
+    ASSERT_TRUE(second_status.ok()) << second_status;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    const auto& nested_column =
+            assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+    ASSERT_EQ(5, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_TRUE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+    EXPECT_EQ(1, nested_column.get_element(0));
+    EXPECT_EQ(2, nested_column.get_element(1));
+    EXPECT_EQ(10, nested_column.get_element(2));
+    EXPECT_EQ(0, nested_column.get_element(3));
+    EXPECT_EQ(30, nested_column.get_element(4));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullablePropagatesNestedError) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    auto column = type->create_column();
+    std::vector<double> values = {1.0};
+    std::vector<uint8_t> null_map = {0};
+    auto view = make_fixed_view(DecodedValueKind::DOUBLE, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto status = type->get_serde()->read_column_from_decoded_values(*column, view);
+
+    expect_not_supported(status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableNonStrictModeNullsUnsupportedDecodedKindForAllTypes) {
+    struct Case {
+        DataTypePtr type;
+        DecodedValueKind kind;
+    };
+    std::vector<Case> cases = {
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeBool>()),
+             DecodedValueKind::INT32},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>()),
+             DecodedValueKind::DOUBLE},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>()),
+             DecodedValueKind::FLOAT},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
+             DecodedValueKind::INT64},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateV2>()),
+             DecodedValueKind::INT64},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTimeV2>(6)),
+             DecodedValueKind::DOUBLE},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeTimeV2>(6)),
+             DecodedValueKind::DOUBLE},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal128>(18, 2)),
+             DecodedValueKind::DOUBLE},
+    };
+
+    std::vector<int64_t> values = {1, 2};
+    for (const auto& test_case : cases) {
+        auto view = make_fixed_view(test_case.kind, values);
+
+        auto result = read_column(test_case.type, view);
+
+        ASSERT_TRUE(result.status.ok()) << result.status << ", type=" << test_case.type->get_name();
+        expect_nullable_all_null(*result.column, values.size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableStrictModeRejectsUnsupportedDecodedKind) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    std::vector<double> values = {1.0};
+    std::vector<uint8_t> null_map = {0};
+    auto view = make_fixed_view(DecodedValueKind::DOUBLE, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_not_supported(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableNonStrictModeNullsRowLevelDecodedConversionFailure) {
+    {
+        auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+        std::vector<StringRef> refs = {StringRef("ok", 2),
+                                       StringRef(static_cast<const char*>(nullptr), 2),
+                                       StringRef("", 0)};
+        auto view = make_binary_view(DecodedValueKind::BINARY, refs);
+
+        auto result = read_column(type, view);
+
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+        ASSERT_EQ(3, nullable_column.size());
+        EXPECT_FALSE(nullable_column.is_null_at(0));
+        EXPECT_TRUE(nullable_column.is_null_at(1));
+        EXPECT_FALSE(nullable_column.is_null_at(2));
+        expect_binary_column(nullable_column.get_nested_column(), {"ok", "", ""});
+    }
+    {
+        auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal128>(18, 2));
+        std::vector<StringRef> refs = {StringRef("\x30\x39", 2),
+                                       StringRef(static_cast<const char*>(nullptr), 2)};
+        auto view = make_binary_view(DecodedValueKind::BINARY, refs);
+
+        auto result = read_column(type, view);
+
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+        ASSERT_EQ(2, nullable_column.size());
+        EXPECT_FALSE(nullable_column.is_null_at(0));
+        EXPECT_TRUE(nullable_column.is_null_at(1));
+        expect_column_strings(*type, *result.column, {"123.45", "NULL"});
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableStrictModeRejectsRowLevelDecodedConversionFailure) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    std::vector<StringRef> refs = {StringRef("ok", 2),
+                                   StringRef(static_cast<const char*>(nullptr), 2)};
+    auto view = make_binary_view(DecodedValueKind::BINARY, refs);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_corruption(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+// ----------------------------------------------------------------------
+// read_field_from_decoded_value
+// ----------------------------------------------------------------------
+// The field path is used by Parquet min/max and pruning code. It must be covered independently
+// because it creates a one-row column, delegates to the batch reader, and extracts a Field value.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldPrimitiveValues) {
+    {
+        std::vector<uint8_t> values = {true};
+        auto field = read_field(std::make_shared<DataTypeBool>(), make_bool_view(values));
+        EXPECT_EQ(TYPE_BOOLEAN, field.get_type());
+        EXPECT_TRUE(field.get<TYPE_BOOLEAN>());
+    }
+    {
+        std::vector<int32_t> values = {-42};
+        auto field = read_field(std::make_shared<DataTypeInt32>(),
+                                make_fixed_view(DecodedValueKind::INT32, values));
+        EXPECT_EQ(TYPE_INT, field.get_type());
+        EXPECT_EQ(-42, field.get<TYPE_INT>());
+    }
+    {
+        std::vector<int64_t> values = {1234567890123LL};
+        auto field = read_field(std::make_shared<DataTypeInt64>(),
+                                make_fixed_view(DecodedValueKind::INT64, values));
+        EXPECT_EQ(TYPE_BIGINT, field.get_type());
+        EXPECT_EQ(1234567890123LL, field.get<TYPE_BIGINT>());
+    }
+    {
+        std::vector<int64_t> values = {-9};
+        auto field = read_field(std::make_shared<DataTypeInt128>(),
+                                make_fixed_view(DecodedValueKind::INT64, values));
+        EXPECT_EQ(TYPE_LARGEINT, field.get_type());
+        EXPECT_EQ(static_cast<__int128_t>(-9), field.get<TYPE_LARGEINT>());
+    }
+    {
+        std::vector<float> values = {std::numeric_limits<float>::quiet_NaN()};
+        auto field = read_field(std::make_shared<DataTypeFloat32>(),
+                                make_fixed_view(DecodedValueKind::FLOAT, values));
+        EXPECT_EQ(TYPE_FLOAT, field.get_type());
+        EXPECT_TRUE(std::isnan(field.get<TYPE_FLOAT>()));
+    }
+    {
+        std::vector<double> values = {std::numeric_limits<double>::infinity()};
+        auto field = read_field(std::make_shared<DataTypeFloat64>(),
+                                make_fixed_view(DecodedValueKind::DOUBLE, values));
+        EXPECT_EQ(TYPE_DOUBLE, field.get_type());
+        EXPECT_TRUE(std::isinf(field.get<TYPE_DOUBLE>()));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldLogicalIntegerCastsPhysicalValue) {
+    {
+        std::vector<int32_t> values = {32767};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, false);
+        auto field = read_field(std::make_shared<DataTypeInt16>(), view);
+        EXPECT_EQ(TYPE_SMALLINT, field.get_type());
+        EXPECT_EQ(255, field.get<TYPE_SMALLINT>());
+    }
+    {
+        std::vector<int32_t> values = {-1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::UINT32, values), 32, false);
+        auto field = read_field(std::make_shared<DataTypeInt64>(), view);
+        EXPECT_EQ(TYPE_BIGINT, field.get_type());
+        EXPECT_EQ(4294967295LL, field.get<TYPE_BIGINT>());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldStringValues) {
+    auto type = std::make_shared<DataTypeString>();
+    std::vector<std::string> storage = {std::string("a\0b", 3)};
+    auto refs = string_refs(storage);
+    auto field = read_field(type, make_binary_view(DecodedValueKind::BINARY, refs));
+    EXPECT_EQ(TYPE_STRING, field.get_type());
+    EXPECT_EQ(std::string("a\0b", 3), field.get<TYPE_STRING>());
+
+    std::vector<std::string> fixed_storage = {std::string("\x00\x01\x02\x03", 4)};
+    auto fixed_refs = string_refs(fixed_storage);
+    auto fixed_field =
+            read_field(type, make_binary_view(DecodedValueKind::FIXED_BINARY, fixed_refs, 4));
+    EXPECT_EQ(TYPE_STRING, fixed_field.get_type());
+    EXPECT_EQ(std::string("\x00\x01\x02\x03", 4), fixed_field.get<TYPE_STRING>());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldDateTimeAndTimeValues) {
+    {
+        auto type = std::make_shared<DataTypeDateV2>();
+        std::vector<int32_t> values = {18628};
+        auto field = read_field(type, make_fixed_view(DecodedValueKind::INT32, values));
+        EXPECT_EQ(TYPE_DATEV2, field.get_type());
+        EXPECT_EQ("2021-01-01", field.to_debug_string(0));
+    }
+    {
+        auto type = std::make_shared<DataTypeDateTimeV2>(6);
+        std::vector<int64_t> values = {1234567};
+        auto view = make_fixed_view(DecodedValueKind::INT64, values);
+        view.time_unit = DecodedTimeUnit::MICROS;
+        auto field = read_field(type, view);
+        EXPECT_EQ(TYPE_DATETIMEV2, field.get_type());
+        EXPECT_EQ("1970-01-01 00:00:01.234567", field.to_debug_string(6));
+    }
+    {
+        auto type = std::make_shared<DataTypeDateTimeV2>(6);
+        std::vector<int64_t> values = {1234};
+        auto view = make_fixed_view(DecodedValueKind::INT64, values);
+        view.time_unit = DecodedTimeUnit::MILLIS;
+        auto field = read_field(type, view);
+        EXPECT_EQ(TYPE_DATETIMEV2, field.get_type());
+        EXPECT_EQ("1970-01-01 00:00:01.234000", field.to_debug_string(6));
+    }
+    {
+        auto type = std::make_shared<DataTypeDateTimeV2>(6);
+        std::vector<TestInt96Timestamp> values = {{0, 2440588}};
+        auto field = read_field(type, make_fixed_view(DecodedValueKind::INT96, values));
+        EXPECT_EQ(TYPE_DATETIMEV2, field.get_type());
+        EXPECT_EQ("1970-01-01 00:00:00.000000", field.to_debug_string(6));
+    }
+    {
+        auto type = std::make_shared<DataTypeTimeV2>(6);
+        std::vector<int64_t> values = {3661000001LL};
+        auto view = make_fixed_view(DecodedValueKind::INT64, values);
+        view.time_unit = DecodedTimeUnit::MICROS;
+        auto field = read_field(type, view);
+        EXPECT_EQ(TYPE_TIMEV2, field.get_type());
+        auto column = type->create_column();
+        column->insert(field);
+        expect_column_strings(*type, *column, {"01:01:01.000001"});
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldDecimalValues) {
+    {
+        auto type = std::make_shared<DataTypeDecimal32>(9, 2);
+        std::vector<int32_t> values = {12345};
+        auto field = read_field(type, make_fixed_view(DecodedValueKind::INT32, values));
+        EXPECT_EQ(TYPE_DECIMAL32, field.get_type());
+        EXPECT_EQ("123.45", field.to_debug_string(2));
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal64>(18, 4);
+        std::vector<int64_t> values = {-1};
+        auto field = read_field(type, make_fixed_view(DecodedValueKind::INT64, values));
+        EXPECT_EQ(TYPE_DECIMAL64, field.get_type());
+        EXPECT_EQ("-0.0001", field.to_debug_string(4));
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {std::string("\x30\x39", 2)};
+        auto refs = string_refs(storage);
+        auto field = read_field(type, make_binary_view(DecodedValueKind::BINARY, refs));
+        EXPECT_EQ(TYPE_DECIMAL128I, field.get_type());
+        EXPECT_EQ("123.45", field.to_debug_string(2));
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal256>(76, 2);
+        std::vector<std::string> storage = {std::string(31, '\xff') + std::string("\xbd", 1)};
+        auto refs = string_refs(storage);
+        auto field = read_field(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 32));
+        EXPECT_EQ(TYPE_DECIMAL256, field.get_type());
+        EXPECT_EQ("-0.67", field.to_debug_string(2));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldPropagatesUnsupportedKind) {
+    {
+        auto type = std::make_shared<DataTypeString>();
+        std::vector<int32_t> values = {1};
+        expect_not_supported(
+                read_field_status(type, make_fixed_view(DecodedValueKind::INT32, values)));
+    }
+    {
+        auto type = std::make_shared<DataTypeInt32>();
+        std::vector<double> values = {1.0};
+        expect_not_supported(
+                read_field_status(type, make_fixed_view(DecodedValueKind::DOUBLE, values)));
+    }
+    {
+        auto type = std::make_shared<DataTypeDateV2>();
+        std::vector<int64_t> values = {0};
+        expect_not_supported(
+                read_field_status(type, make_fixed_view(DecodedValueKind::INT64, values)));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesDeathTest, ReadFieldRejectsInvalidRowCountDeathTest) {
+    auto type = std::make_shared<DataTypeInt32>();
+    std::vector<int32_t> values = {1, 2};
+    Field field;
+
+    auto zero_row_view = make_fixed_view(DecodedValueKind::INT32, values);
+    zero_row_view.row_count = 0;
+    EXPECT_DEATH(
+            {
+                auto status = type->get_serde()->read_field_from_decoded_value(*type, &field,
+                                                                               zero_row_view);
+                (void)status;
+            },
+            "view.row_count == 1");
+
+    auto two_row_view = make_fixed_view(DecodedValueKind::INT32, values);
+    two_row_view.row_count = 2;
+    EXPECT_DEATH(
+            {
+                auto status = type->get_serde()->read_field_from_decoded_value(*type, &field,
+                                                                               two_row_view);
+                (void)status;
+            },
+            "view.row_count == 1");
+}
+
+TEST(DataTypeSerDeDecodedValuesDeathTest, ReadFieldRejectsNullFieldPointerDeathTest) {
+    auto type = std::make_shared<DataTypeInt32>();
+    std::vector<int32_t> values = {1};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+
+    EXPECT_DEATH(
+            {
+                auto status =
+                        type->get_serde()->read_field_from_decoded_value(*type, nullptr, view);
+                (void)status;
+            },
+            "field != nullptr");
+}
+
+// ----------------------------------------------------------------------
+// Illegal kind matrix
+// ----------------------------------------------------------------------
+// This compact matrix complements the focused error tests above by ensuring each decoded-aware
+// family rejects representative illegal physical kinds without mutating an empty destination.
+
+TEST(DataTypeSerDeDecodedValuesTest, IllegalKindMatrixRejectsUnsupportedCombinations) {
+    struct Case {
+        DataTypePtr type;
+        std::vector<DecodedValueKind> illegal_kinds;
+    };
+    std::vector<Case> cases = {
+            {std::make_shared<DataTypeBool>(), {DecodedValueKind::INT32, DecodedValueKind::BINARY}},
+            {std::make_shared<DataTypeInt32>(),
+             {DecodedValueKind::BOOL, DecodedValueKind::FLOAT, DecodedValueKind::DOUBLE,
+              DecodedValueKind::BINARY}},
+            {std::make_shared<DataTypeFloat32>(),
+             {DecodedValueKind::DOUBLE, DecodedValueKind::INT32}},
+            {std::make_shared<DataTypeFloat64>(),
+             {DecodedValueKind::FLOAT, DecodedValueKind::INT64}},
+            {std::make_shared<DataTypeString>(),
+             {DecodedValueKind::INT32, DecodedValueKind::DOUBLE}},
+            {std::make_shared<DataTypeDateV2>(),
+             {DecodedValueKind::INT64, DecodedValueKind::BINARY}},
+            {std::make_shared<DataTypeDateTimeV2>(6),
+             {DecodedValueKind::INT32, DecodedValueKind::DOUBLE, DecodedValueKind::BINARY}},
+            {std::make_shared<DataTypeTimeV2>(6),
+             {DecodedValueKind::BOOL, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}},
+            {std::make_shared<DataTypeDecimal128>(18, 2),
+             {DecodedValueKind::BOOL, DecodedValueKind::UINT64, DecodedValueKind::FLOAT,
+              DecodedValueKind::DOUBLE}},
+    };
+
+    for (const auto& test_case : cases) {
+        for (auto kind : test_case.illegal_kinds) {
+            std::vector<int64_t> values = {0};
+            auto result = read_column(test_case.type, make_fixed_view(kind, values));
+            expect_not_supported(result.status);
+            EXPECT_EQ(0, result.column->size()) << test_case.type->get_name();
+        }
+    }
+}
+
+} // namespace doris
diff --git a/be/test/core/data_type_serde/data_type_serde_pb_test.cpp b/be/test/core/data_type_serde/data_type_serde_pb_test.cpp
index 986583982eb2bd..c1663bf7a9dd49 100644
--- a/be/test/core/data_type_serde/data_type_serde_pb_test.cpp
+++ b/be/test/core/data_type_serde/data_type_serde_pb_test.cpp
@@ -54,6 +54,7 @@
 #include "core/data_type/data_type_quantilestate.h"
 #include "core/data_type/data_type_string.h"
 #include "core/data_type/data_type_struct.h"
+#include "core/data_type/data_type_timestamptz.h"
 #include "core/data_type_serde/data_type_serde.h"
 #include "core/types.h"
 #include "core/value/bitmap_value.h"
@@ -646,6 +647,17 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestDateTime) {
     }
 }
 
+TEST(DataTypeSerDePbTest, DataTypeTimeStampTzToProtobufKeepsScale) {
+    DataTypePtr data_type(std::make_shared<DataTypeTimeStampTz>(6));
+    PTypeDesc type_desc;
+    data_type->to_protobuf(&type_desc);
+
+    ASSERT_EQ(type_desc.types_size(), 1);
+    const auto& scalar_type = type_desc.types(0).scalar_type();
+    EXPECT_EQ(scalar_type.type(), TPrimitiveType::TIMESTAMPTZ);
+    EXPECT_EQ(scalar_type.scale(), 6);
+}
+
 TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestLargeInt) {
     std::cout << "==== LargeInt === " << std::endl;
     // LargeInt
@@ -662,4 +674,4 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestLargeInt) {
         check_pb_col(data_type, *vec.get());
     }
 }
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp b/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp
index 403ef8713e4e67..b3e512734c6e73 100644
--- a/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp
+++ b/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp
@@ -18,10 +18,13 @@
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
+#include "core/data_type/data_type_number.h"
 #include "exec/runtime_filter/runtime_filter_selectivity.h"
 #include "exec/runtime_filter/runtime_filter_test_utils.h"
 #include "exprs/runtime_filter_expr.h"
+#include "exprs/vdirect_in_predicate.h"
 #include "exprs/vexpr_context.h"
+#include "exprs/vslot_ref.h"
 
 namespace doris {
 
@@ -178,4 +181,47 @@ TEST_F(RuntimeFilterExprSamplingTest, sampling_frequency_survives_context_recrea
     EXPECT_TRUE(selectivity.maybe_always_true_can_ignore());
 }
 
+// RuntimeFilterExpr exposes _impl->children(), but the wrapper itself does not own those
+// children in its own _children vector. Deep clone must therefore clone _impl explicitly.
+TEST_F(RuntimeFilterExprSamplingTest, deep_clone_clones_impl_tree) {
+    auto bool_type = TTypeDescBuilder()
+                             .set_types(TTypeNodeBuilder()
+                                                .set_type(TTypeNodeType::SCALAR)
+                                                .set_scalar_type(TPrimitiveType::BOOLEAN)
+                                                .build())
+                             .build();
+    TExprNode node = TExprNodeBuilder(TExprNodeType::IN_PRED, bool_type, 0).build();
+    node.in_predicate.__set_is_not_in(false);
+    node.__set_opcode(TExprOpcode::FILTER_IN);
+    node.__set_is_nullable(false);
+
+    auto slot = VSlotRef::create_shared(/*slot_id=*/0, /*column_id=*/0, /*column_uniq_id=*/10,
+                                        std::make_shared<DataTypeInt32>(), "c0");
+    auto impl = VDirectInPredicate::create_shared(node, nullptr);
+    impl->add_child(slot);
+
+    auto wrapper = RuntimeFilterExpr::create_shared(node, impl, 0.4, false, /*filter_id=*/7,
+                                                    /*sampling_frequency=*/32);
+
+    VExprSPtr cloned_expr;
+    ASSERT_TRUE(wrapper->deep_clone(&cloned_expr).ok());
+
+    auto* cloned_wrapper = dynamic_cast<RuntimeFilterExpr*>(cloned_expr.get());
+    ASSERT_NE(cloned_wrapper, nullptr);
+    EXPECT_NE(cloned_wrapper, wrapper.get());
+    EXPECT_EQ(cloned_wrapper->filter_id(), 7);
+
+    auto cloned_impl = cloned_wrapper->get_impl();
+    ASSERT_NE(cloned_impl, nullptr);
+    EXPECT_NE(cloned_impl.get(), impl.get());
+    ASSERT_EQ(cloned_impl->get_num_children(), 1);
+    EXPECT_NE(cloned_impl->children()[0].get(), slot.get());
+
+    auto* cloned_slot = dynamic_cast<VSlotRef*>(cloned_impl->children()[0].get());
+    ASSERT_NE(cloned_slot, nullptr);
+    EXPECT_EQ(cloned_slot->column_id(), 0);
+    EXPECT_EQ(cloned_slot->column_uniq_id(), 10);
+    EXPECT_EQ(cloned_slot->column_name(), "c0");
+}
+
 } // namespace doris
diff --git a/be/test/exec/scan/access_path_parser_test.cpp b/be/test/exec/scan/access_path_parser_test.cpp
new file mode 100644
index 00000000000000..d4bd6ab6c06360
--- /dev/null
+++ b/be/test/exec/scan/access_path_parser_test.cpp
@@ -0,0 +1,371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/scan/access_path_parser.h"
+
+#include <gen_cpp/Descriptors_types.h>
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/field.h"
+
+namespace doris {
+namespace {
+
+TColumnAccessPath data_access_path(std::vector<std::string> path) {
+    TColumnAccessPath access_path;
+    access_path.__set_type(TAccessPathType::DATA);
+    TDataAccessPath data_path;
+    data_path.__set_path(std::move(path));
+    access_path.__set_data_access_path(std::move(data_path));
+    return access_path;
+}
+
+TColumnAccessPath data_access_path_without_payload() {
+    TColumnAccessPath access_path;
+    access_path.__set_type(TAccessPathType::DATA);
+    return access_path;
+}
+
+TColumnAccessPath meta_access_path() {
+    TColumnAccessPath access_path;
+    access_path.__set_type(TAccessPathType::META);
+    return access_path;
+}
+
+format::ColumnDefinition field(int32_t id, std::string name, DataTypePtr type,
+                               std::vector<format::ColumnDefinition> children = {},
+                               std::vector<std::string> aliases = {}) {
+    return {
+            .identifier = Field::create_field<TYPE_INT>(id),
+            .name = std::move(name),
+            .name_mapping = std::move(aliases),
+            .type = std::move(type),
+            .children = std::move(children),
+    };
+}
+
+format::ColumnDefinition root_column(int32_t id, std::string name, DataTypePtr type) {
+    return {
+            .identifier = Field::create_field<TYPE_INT>(id),
+            .name = std::move(name),
+            .type = std::move(type),
+    };
+}
+
+void expect_child(const format::ColumnDefinition& child, int32_t id, const std::string& name) {
+    ASSERT_TRUE(child.has_identifier_field_id());
+    EXPECT_EQ(child.get_identifier_field_id(), id);
+    EXPECT_EQ(child.name, name);
+}
+
+const format::ColumnDefinition* find_child_by_name(const format::ColumnDefinition& parent,
+                                                   const std::string& name) {
+    for (const auto& child : parent.children) {
+        if (child.name == name) {
+            return &child;
+        }
+    }
+    return nullptr;
+}
+
+} // namespace
+
+// Scenario: primitive columns and scanner-materialized virtual columns should not build nested
+// children, even when their descriptor carries access paths that are not meaningful to the parser.
+TEST(AccessPathParserTest, IgnoresPrimitiveColumnsAndScannerVirtualColumns) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+
+    // Primitive columns have no nested children, so parser should not inspect even invalid paths.
+    auto primitive = root_column(1, "id", int_type);
+    auto status = AccessPathParser::build_nested_children(
+            &primitive, std::vector<TColumnAccessPath> {meta_access_path()}, nullptr);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_TRUE(primitive.children.empty());
+
+    // Iceberg rowid is materialized by scanner/table-reader logic and may carry a negative access
+    // path. Parser must leave it untouched.
+    auto rowid_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, std::make_shared<DataTypeInt64>(),
+                       std::make_shared<DataTypeInt32>(), string_type},
+            Strings {"file_path", "row_pos", "partition_spec_id", "partition_data_json"});
+    format::ColumnDefinition rowid {
+            .identifier = Field::create_field<TYPE_STRING>(BeConsts::ICEBERG_ROWID_COL),
+            .name = BeConsts::ICEBERG_ROWID_COL,
+            .type = rowid_type,
+    };
+    status = AccessPathParser::build_nested_children(
+            &rowid, std::vector<TColumnAccessPath> {data_access_path({"-1"})}, nullptr);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_TRUE(rowid.children.empty());
+}
+
+// Scenario: reject unsupported top-level inputs before recursive type parsing, including META
+// paths, missing DATA payloads, and access paths whose root does not match the projected slot.
+TEST(AccessPathParserTest, RejectsUnsupportedTopLevelAccessPathInputs) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"a"});
+
+    struct Case {
+        std::string name;
+        format::ColumnDefinition column;
+        std::vector<TColumnAccessPath> paths;
+    };
+    std::vector<Case> cases;
+    cases.push_back({"meta path", root_column(100, "s", struct_type), {meta_access_path()}});
+    cases.push_back({"missing DATA payload",
+                     root_column(100, "s", struct_type),
+                     {data_access_path_without_payload()}});
+    cases.push_back({"wrong root name",
+                     root_column(100, "s", struct_type),
+                     {data_access_path({"other", "a"})}});
+    cases.push_back({"wrong root field id",
+                     root_column(100, "s", struct_type),
+                     {data_access_path({"101", "a"})}});
+
+    for (auto& test_case : cases) {
+        auto status = AccessPathParser::build_nested_children(&test_case.column, test_case.paths,
+                                                              nullptr);
+        EXPECT_FALSE(status.ok()) << test_case.name;
+    }
+}
+
+// Scenario: struct access paths support field-id lookup, alias lookup, case-insensitive name
+// fallback, and whole-struct expansion; reserved array/map path tokens remain invalid.
+TEST(AccessPathParserTest, StructAccessPathMatrix) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, int_type}, Strings {"a", "b"});
+    format::ColumnDefinition schema {
+            .identifier = Field::create_field<TYPE_INT>(100),
+            .name = "s",
+            .type = struct_type,
+            .children =
+                    {
+                            field(101, "a", int_type),
+                            field(205, "b", int_type, {}, {"old_b"}),
+                    },
+    };
+
+    {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"s", "A"})}, nullptr);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 0, "a");
+    }
+    {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"100", "205"})},
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 205, "b");
+    }
+    {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"s", "old_b"})},
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 205, "b");
+        EXPECT_EQ(column.children[0].name_mapping, std::vector<std::string>({"old_b"}));
+    }
+    {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"s"})}, &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        expect_child(column.children[0], 101, "a");
+        expect_child(column.children[1], 205, "b");
+    }
+
+    for (const auto& invalid_child : {"OFFSET", "*", "KEYS", "VALUES", "missing"}) {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"s", invalid_child})},
+                &schema);
+        EXPECT_FALSE(status.ok()) << invalid_child;
+    }
+}
+
+// Scenario: array access paths must pass through the "*" element token, then reuse struct child
+// parsing under the element wrapper; invalid array tokens are rejected.
+TEST(AccessPathParserTest, ArrayAccessPathMatrix) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    auto element_type = std::make_shared<DataTypeStruct>(DataTypes {string_type, int_type},
+                                                         Strings {"item", "quantity"});
+    auto array_type = std::make_shared<DataTypeArray>(element_type);
+    format::ColumnDefinition schema {
+            .identifier = Field::create_field<TYPE_INT>(200),
+            .name = "items",
+            .type = array_type,
+            .children =
+                    {
+                            field(201, "element", element_type,
+                                  {
+                                          field(202, "item", string_type, {}, {"old_item"}),
+                                          field(203, "quantity", int_type),
+                                  }),
+                    },
+    };
+
+    {
+        auto column = root_column(200, "items", array_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column,
+                std::vector<TColumnAccessPath> {data_access_path({"items", "*", "old_item"})},
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 201, "element");
+        ASSERT_EQ(column.children[0].children.size(), 1);
+        expect_child(column.children[0].children[0], 202, "item");
+        EXPECT_EQ(column.children[0].children[0].name_mapping,
+                  std::vector<std::string>({"old_item"}));
+    }
+    {
+        auto column = root_column(200, "items", array_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"items"})}, &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 201, "element");
+        ASSERT_EQ(column.children[0].children.size(), 2);
+        expect_child(column.children[0].children[0], 202, "item");
+        expect_child(column.children[0].children[1], 203, "quantity");
+    }
+
+    for (const auto& invalid_path : std::vector<std::vector<std::string>> {
+                 {"items", "OFFSET"}, {"items", "item"}, {"items", "*", "missing"}}) {
+        auto column = root_column(200, "items", array_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path(invalid_path)}, &schema);
+        EXPECT_FALSE(status.ok()) << invalid_path.back();
+    }
+}
+
+// Scenario: map access paths split KEYS/VALUES, force the missing side needed for materialization,
+// merge repeated value-child requests, and reject unsupported map child tokens.
+TEST(AccessPathParserTest, MapAccessPathMatrix) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    auto value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, int_type, string_type}, Strings {"full_name", "age", "gender"});
+    auto map_type = std::make_shared<DataTypeMap>(string_type, value_type);
+    format::ColumnDefinition schema {
+            .identifier = Field::create_field<TYPE_INT>(300),
+            .name = "m",
+            .type = map_type,
+            .children =
+                    {
+                            field(301, "key", string_type),
+                            field(302, "value", value_type,
+                                  {
+                                          field(303, "full_name", string_type, {}, {"name"}),
+                                          field(304, "age", int_type),
+                                          field(305, "gender", string_type),
+                                  }),
+                    },
+    };
+
+    {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"m", "KEYS"})}, &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        expect_child(column.children[0], 301, "key");
+        expect_child(column.children[1], 302, "value");
+        ASSERT_EQ(column.children[1].children.size(), 3);
+        const auto* full_name = find_child_by_name(column.children[1], "full_name");
+        ASSERT_NE(full_name, nullptr);
+        expect_child(*full_name, 303, "full_name");
+        const auto* age = find_child_by_name(column.children[1], "age");
+        ASSERT_NE(age, nullptr);
+        expect_child(*age, 304, "age");
+        const auto* gender = find_child_by_name(column.children[1], "gender");
+        ASSERT_NE(gender, nullptr);
+        expect_child(*gender, 305, "gender");
+    }
+    {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"m", "VALUES", "age"})},
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        expect_child(column.children[0], 301, "key");
+        expect_child(column.children[1], 302, "value");
+        ASSERT_EQ(column.children[1].children.size(), 1);
+        expect_child(column.children[1].children[0], 304, "age");
+    }
+    {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column,
+                std::vector<TColumnAccessPath> {
+                        data_access_path({"m", "VALUES", "name"}),
+                        data_access_path({"m", "*", "gender"}),
+                },
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        ASSERT_EQ(column.children[1].children.size(), 2);
+        const auto* full_name = find_child_by_name(column.children[1], "full_name");
+        ASSERT_NE(full_name, nullptr);
+        expect_child(*full_name, 303, "full_name");
+        EXPECT_EQ(full_name->name_mapping, std::vector<std::string>({"name"}));
+        const auto* gender = find_child_by_name(column.children[1], "gender");
+        ASSERT_NE(gender, nullptr);
+        expect_child(*gender, 305, "gender");
+    }
+    {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"m"})}, &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        ASSERT_EQ(column.children[1].children.size(), 3);
+    }
+
+    for (const auto& invalid_path : std::vector<std::vector<std::string>> {
+                 {"m", "OFFSET"}, {"m", "ENTRY"}, {"m", "VALUES", "missing"}}) {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path(invalid_path)}, &schema);
+        EXPECT_FALSE(status.ok()) << invalid_path.back();
+    }
+}
+
+} // namespace doris
diff --git a/be/test/exec/scan/file_scanner_v2_test.cpp b/be/test/exec/scan/file_scanner_v2_test.cpp
new file mode 100644
index 00000000000000..d3f0507aca1122
--- /dev/null
+++ b/be/test/exec/scan/file_scanner_v2_test.cpp
@@ -0,0 +1,301 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/scan/file_scanner_v2.h"
+
+#include <gen_cpp/PlanNodes_types.h>
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "exec/scan/split_source_connector.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/expr/cast.h"
+
+namespace doris {
+namespace {
+
+TFileRangeDesc range_with_format(std::string table_format, TFileFormatType::type format_type) {
+    TFileRangeDesc range;
+    range.__set_format_type(format_type);
+    if (!table_format.empty()) {
+        TTableFormatFileDesc table_desc;
+        table_desc.__set_table_format_type(std::move(table_format));
+        range.__set_table_format_params(std::move(table_desc));
+    }
+    return range;
+}
+
+TFileRangeDesc hudi_range_with_delta_logs() {
+    auto range = range_with_format("hudi", TFileFormatType::FORMAT_PARQUET);
+    THudiFileDesc hudi_params;
+    hudi_params.__set_delta_logs({"delta.log"});
+    range.table_format_params.__set_hudi_params(std::move(hudi_params));
+    return range;
+}
+
+TScanRangeParams scan_range_param(const TFileRangeDesc& range) {
+    TScanRangeParams params;
+    params.scan_range.ext_scan_range.file_scan_range.ranges.push_back(range);
+    return params;
+}
+
+VExprSPtr slot_ref(int slot_id, int column_id, DataTypePtr type, const std::string& name) {
+    return VSlotRef::create_shared(slot_id, column_id, -1, std::move(type), name);
+}
+
+} // namespace
+
+// Scenario: FileScannerV2::is_supported should honor table format, scan params format, and the
+// optional per-range file format override as a single matrix.
+TEST(FileScannerV2Test, SupportedFormatMatrix) {
+    struct Case {
+        std::string table_format;
+        TFileFormatType::type params_format;
+        std::optional<TFileFormatType::type> range_format;
+        bool expected;
+    };
+
+    const std::vector<Case> cases {
+            {"", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"tvf", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"iceberg", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"paimon", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"hudi", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"jdbc", TFileFormatType::FORMAT_PARQUET, std::nullopt, false},
+            {"", TFileFormatType::FORMAT_JNI, std::nullopt, false},
+            {"hive", TFileFormatType::FORMAT_ORC, std::nullopt, false},
+            {"jdbc", TFileFormatType::FORMAT_JNI, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_JNI, std::nullopt, false},
+            {"", TFileFormatType::FORMAT_CSV_PLAIN, std::nullopt, true},
+            {"tvf", TFileFormatType::FORMAT_CSV_GZ, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_BZ2, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_LZ4FRAME, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_LZ4BLOCK, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_LZOP, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_DEFLATE, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_SNAPPYBLOCK, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_PROTO, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_TEXT, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_JSON, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_ORC, false},
+            {"hive", TFileFormatType::FORMAT_ORC, TFileFormatType::FORMAT_PARQUET, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_CSV_PLAIN, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_TEXT, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_JSON, true},
+    };
+
+    for (const auto& test_case : cases) {
+        TFileScanRangeParams params;
+        params.__set_format_type(test_case.params_format);
+        auto range = range_with_format(test_case.table_format,
+                                       test_case.range_format.value_or(test_case.params_format));
+        if (!test_case.range_format.has_value()) {
+            range.__isset.format_type = false;
+        }
+        EXPECT_EQ(FileScannerV2::is_supported(params, range), test_case.expected)
+                << "table_format=" << test_case.table_format
+                << ", params_format=" << static_cast<int>(test_case.params_format)
+                << ", range_has_format=" << test_case.range_format.has_value();
+    }
+
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    EXPECT_FALSE(FileScannerV2::is_supported(params, hudi_range_with_delta_logs()));
+}
+
+// Scenario: SplitSourceConnector should route to FileScannerV2 only when every scan range in the
+// source is supported; one unsupported table format or file format must make the match fail.
+TEST(FileScannerV2Test, SplitSourceAllScanRangesMatchRequiresEveryRangeSupported) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+
+    const auto supported = range_with_format("hive", TFileFormatType::FORMAT_PARQUET);
+    const auto unsupported_table = range_with_format("lakesoul", TFileFormatType::FORMAT_PARQUET);
+    const auto unsupported_format = range_with_format("hive", TFileFormatType::FORMAT_ORC);
+
+    LocalSplitSourceConnector all_supported(
+            {scan_range_param(supported),
+             scan_range_param(range_with_format("iceberg", TFileFormatType::FORMAT_PARQUET))},
+            1);
+    EXPECT_TRUE(all_supported.all_scan_ranges_match(params, FileScannerV2::is_supported));
+
+    LocalSplitSourceConnector hudi_supported(
+            {scan_range_param(supported),
+             scan_range_param(range_with_format("hudi", TFileFormatType::FORMAT_PARQUET))},
+            1);
+    EXPECT_TRUE(hudi_supported.all_scan_ranges_match(params, FileScannerV2::is_supported));
+
+    LocalSplitSourceConnector table_mismatch(
+            {scan_range_param(supported), scan_range_param(unsupported_table)}, 1);
+    EXPECT_FALSE(table_mismatch.all_scan_ranges_match(params, FileScannerV2::is_supported));
+
+    LocalSplitSourceConnector format_mismatch(
+            {scan_range_param(supported), scan_range_param(unsupported_format)}, 1);
+    EXPECT_FALSE(format_mismatch.all_scan_ranges_match(params, FileScannerV2::is_supported));
+}
+
+// Scenario: FileScannerV2 converts only the file formats implemented by format_v2 readers and
+// rejects everything else before TableReader::init sees an unsupported FileFormat.
+TEST(FileScannerV2Test, FileFormatConversionMatrix) {
+    struct Case {
+        TFileFormatType::type input;
+        std::optional<format::FileFormat> expected;
+    };
+    const std::vector<Case> cases {
+            {TFileFormatType::FORMAT_PARQUET, format::FileFormat::PARQUET},
+            {TFileFormatType::FORMAT_JNI, format::FileFormat::JNI},
+            {TFileFormatType::FORMAT_CSV_PLAIN, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_GZ, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_BZ2, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_LZ4FRAME, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_LZ4BLOCK, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_LZOP, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_DEFLATE, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_SNAPPYBLOCK, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_PROTO, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_TEXT, format::FileFormat::TEXT},
+            {TFileFormatType::FORMAT_JSON, format::FileFormat::JSON},
+            {TFileFormatType::FORMAT_ORC, std::nullopt},
+    };
+
+    for (const auto& test_case : cases) {
+        format::FileFormat file_format = format::FileFormat::PARQUET;
+        const auto status = FileScannerV2::TEST_to_file_format(test_case.input, &file_format);
+        if (test_case.expected.has_value()) {
+            ASSERT_TRUE(status.ok()) << status;
+            EXPECT_EQ(file_format, *test_case.expected);
+        } else {
+            EXPECT_FALSE(status.ok());
+        }
+    }
+}
+
+// Scenario: partition slots are identified from the explicit FE category when present, otherwise
+// from the legacy is_file_slot flag. Scanner-generated rowid columns must never be treated as
+// partition columns even if FE marks them as non-file slots.
+TEST(FileScannerV2Test, PartitionSlotClassificationMatrix) {
+    TFileScanSlotInfo legacy_partition;
+    legacy_partition.__set_is_file_slot(false);
+    EXPECT_TRUE(FileScannerV2::TEST_is_partition_slot(legacy_partition, "dt"));
+
+    TFileScanSlotInfo legacy_file;
+    legacy_file.__set_is_file_slot(true);
+    EXPECT_FALSE(FileScannerV2::TEST_is_partition_slot(legacy_file, "value"));
+
+    TFileScanSlotInfo categorized_partition;
+    categorized_partition.__set_is_file_slot(true);
+    categorized_partition.__set_category(TColumnCategory::PARTITION_KEY);
+    EXPECT_TRUE(FileScannerV2::TEST_is_partition_slot(categorized_partition, "p"));
+
+    TFileScanSlotInfo categorized_regular;
+    categorized_regular.__set_is_file_slot(false);
+    categorized_regular.__set_category(TColumnCategory::REGULAR);
+    EXPECT_FALSE(FileScannerV2::TEST_is_partition_slot(categorized_regular, "regular_col"));
+
+    EXPECT_FALSE(
+            FileScannerV2::TEST_is_partition_slot(legacy_partition, BeConsts::GLOBAL_ROWID_COL));
+    EXPECT_FALSE(
+            FileScannerV2::TEST_is_partition_slot(legacy_partition, BeConsts::ICEBERG_ROWID_COL));
+}
+
+// Scenario: data-file slots are the complement of partition/default/synthesized columns for
+// formats without embedded schema. FE may send either the new category or the old is_file_slot
+// flag, and scanner-generated rowid columns must never be passed to a physical file reader.
+TEST(FileScannerV2Test, DataFileSlotClassificationMatrix) {
+    TFileScanSlotInfo legacy_file;
+    legacy_file.__set_is_file_slot(true);
+    EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(legacy_file, "value"));
+
+    TFileScanSlotInfo legacy_partition;
+    legacy_partition.__set_is_file_slot(false);
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_partition, "dt"));
+
+    TFileScanSlotInfo categorized_regular;
+    categorized_regular.__set_is_file_slot(false);
+    categorized_regular.__set_category(TColumnCategory::REGULAR);
+    EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(categorized_regular, "regular_col"));
+
+    TFileScanSlotInfo categorized_generated;
+    categorized_generated.__set_is_file_slot(false);
+    categorized_generated.__set_category(TColumnCategory::GENERATED);
+    EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(categorized_generated, "generated_col"));
+
+    TFileScanSlotInfo categorized_partition;
+    categorized_partition.__set_is_file_slot(true);
+    categorized_partition.__set_category(TColumnCategory::PARTITION_KEY);
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(categorized_partition, "p"));
+
+    TFileScanSlotInfo categorized_synthesized;
+    categorized_synthesized.__set_is_file_slot(true);
+    categorized_synthesized.__set_category(TColumnCategory::SYNTHESIZED);
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(categorized_synthesized, "virtual_col"));
+
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_file, BeConsts::GLOBAL_ROWID_COL));
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_file, BeConsts::ICEBERG_ROWID_COL));
+}
+
+// Scenario: table conjuncts are cloned into global-index space before they are handed to
+// TableReader. Explicit slot-id mappings use the required_slots order; missing mappings fall back
+// to the slot id itself for legacy descriptors.
+TEST(FileScannerV2Test, RewriteSlotRefsToGlobalIndexMatrix) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    {
+        auto expr = slot_ref(42, 99, int_type, "value");
+        const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index(
+                &expr, {{42, format::GlobalIndex(3)}});
+        ASSERT_TRUE(status.ok()) << status;
+        const auto* rewritten = assert_cast<const VSlotRef*>(expr.get());
+        EXPECT_EQ(rewritten->slot_id(), 3);
+        EXPECT_EQ(rewritten->column_id(), 3);
+        EXPECT_EQ(rewritten->column_name(), "value");
+    }
+    {
+        auto expr = slot_ref(7, 99, int_type, "legacy_value");
+        const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index(&expr, {});
+        ASSERT_TRUE(status.ok()) << status;
+        const auto* rewritten = assert_cast<const VSlotRef*>(expr.get());
+        EXPECT_EQ(rewritten->slot_id(), 7);
+        EXPECT_EQ(rewritten->column_id(), 7);
+        EXPECT_EQ(rewritten->column_name(), "legacy_value");
+    }
+    {
+        auto cast_expr = format::Cast::create_shared(int_type);
+        cast_expr->add_child(slot_ref(9, 9, int_type, "nested_value"));
+        VExprSPtr expr = cast_expr;
+        const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index(
+                &expr, {{9, format::GlobalIndex(1)}});
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(expr->get_num_children(), 1);
+        const auto* rewritten_child = assert_cast<const VSlotRef*>(expr->children()[0].get());
+        EXPECT_EQ(rewritten_child->slot_id(), 1);
+        EXPECT_EQ(rewritten_child->column_id(), 1);
+        EXPECT_EQ(rewritten_child->column_name(), "nested_value");
+    }
+}
+
+} // namespace doris
diff --git a/be/test/exec/scan/vfile_scanner_exception_test.cpp b/be/test/exec/scan/vfile_scanner_exception_test.cpp
index 64b17a6a86b87b..70b3d07f8eff48 100644
--- a/be/test/exec/scan/vfile_scanner_exception_test.cpp
+++ b/be/test/exec/scan/vfile_scanner_exception_test.cpp
@@ -18,13 +18,19 @@
 #include <gen_cpp/PlanNodes_types.h>
 #include <gtest/gtest.h>
 
+#include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "common/object_pool.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
 #include "cpp/sync_point.h"
 #include "exec/operator/file_scan_operator.h"
 #include "exec/scan/file_scanner.h"
+#include "exec/scan/split_source_connector.h"
+#include "format_v2/table/hive_reader.h"
 #include "io/fs/local_file_system.h"
 #include "load/group_commit/wal/wal_manager.h"
 #include "runtime/cluster_info.h"
@@ -34,7 +40,6 @@
 #include "runtime/user_function_cache.h"
 
 namespace doris {
-
 class TestSplitSourceConnectorStub : public SplitSourceConnector {
 private:
     std::mutex _range_lock;
@@ -336,4 +341,112 @@ TEST_F(VfileScannerExceptionTest, process_late_arrival_conjuncts_retain) {
     WARN_IF_ERROR(scanner->close(&_runtime_state), "fail to close scanner");
 }
 
+TEST(HiveReaderPositionMappingTest, PositionMappingUsesColumnIdxsForFileSlots) {
+    TQueryOptions query_options;
+    query_options.hive_parquet_use_column_names = false;
+    RuntimeState runtime_state(query_options, TQueryGlobals());
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    params.__set_column_idxs({2, 0});
+    format::ProjectedColumnBuildContext context {
+            .scan_params = &params,
+            .runtime_state = &runtime_state,
+    };
+    format::hive::HiveReader reader;
+
+    TFileScanSlotInfo id_slot;
+    id_slot.__set_is_file_slot(true);
+    format::ColumnDefinition id_column {
+            .identifier = Field::create_field<TYPE_STRING>("id"),
+            .name = "id",
+            .type = std::make_shared<DataTypeInt32>(),
+    };
+
+    TFileScanSlotInfo name_slot;
+    name_slot.__set_is_file_slot(true);
+    format::ColumnDefinition name_column {
+            .identifier = Field::create_field<TYPE_STRING>("name"),
+            .name = "name",
+            .type = std::make_shared<DataTypeString>(),
+    };
+
+    ASSERT_TRUE(reader.annotate_projected_column(id_slot, &context, &id_column).ok());
+    ASSERT_TRUE(id_column.has_identifier_field_id());
+    EXPECT_EQ(id_column.get_identifier_position(), 2);
+    EXPECT_EQ(context.next_file_column_idx, 1);
+
+    ASSERT_TRUE(reader.annotate_projected_column(name_slot, &context, &name_column).ok());
+    ASSERT_TRUE(name_column.has_identifier_field_id());
+    EXPECT_EQ(name_column.get_identifier_position(), 0);
+    EXPECT_EQ(context.next_file_column_idx, 2);
+    ASSERT_TRUE(reader.validate_projected_columns(context).ok());
+}
+
+TEST(HiveReaderPositionMappingTest, PositionMappingDoesNotConsumePartitionSlots) {
+    TQueryOptions query_options;
+    query_options.hive_parquet_use_column_names = false;
+    RuntimeState runtime_state(query_options, TQueryGlobals());
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    params.__set_column_idxs({3});
+    format::ProjectedColumnBuildContext context {
+            .scan_params = &params,
+            .runtime_state = &runtime_state,
+    };
+    format::hive::HiveReader reader;
+
+    TFileScanSlotInfo partition_slot;
+    partition_slot.__set_is_file_slot(false);
+    partition_slot.__set_category(TColumnCategory::PARTITION_KEY);
+    format::ColumnDefinition partition_column {
+            .identifier = Field::create_field<TYPE_STRING>("year"),
+            .name = "year",
+            .type = std::make_shared<DataTypeInt32>(),
+    };
+
+    TFileScanSlotInfo value_slot;
+    value_slot.__set_is_file_slot(true);
+    format::ColumnDefinition value_column {
+            .identifier = Field::create_field<TYPE_STRING>("value"),
+            .name = "value",
+            .type = std::make_shared<DataTypeInt32>(),
+    };
+
+    ASSERT_TRUE(reader.annotate_projected_column(partition_slot, &context, &partition_column).ok());
+    ASSERT_TRUE(partition_column.has_identifier_name());
+    EXPECT_EQ(partition_column.get_identifier_name(), "year");
+    EXPECT_EQ(context.next_file_column_idx, 0);
+
+    ASSERT_TRUE(reader.annotate_projected_column(value_slot, &context, &value_column).ok());
+    ASSERT_TRUE(value_column.has_identifier_field_id());
+    EXPECT_EQ(value_column.get_identifier_position(), 3);
+    EXPECT_EQ(context.next_file_column_idx, 1);
+    ASSERT_TRUE(reader.validate_projected_columns(context).ok());
+}
+
+TEST(HiveReaderPositionMappingTest, PositionMappingFailsWhenColumnIdxsMissing) {
+    TQueryOptions query_options;
+    query_options.hive_parquet_use_column_names = false;
+    RuntimeState runtime_state(query_options, TQueryGlobals());
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    format::ProjectedColumnBuildContext context {
+            .scan_params = &params,
+            .runtime_state = &runtime_state,
+    };
+    format::hive::HiveReader reader;
+
+    TFileScanSlotInfo value_slot;
+    value_slot.__set_is_file_slot(true);
+    format::ColumnDefinition value_column {
+            .identifier = Field::create_field<TYPE_STRING>("value"),
+            .name = "value",
+            .type = std::make_shared<DataTypeInt32>(),
+    };
+
+    auto status = reader.annotate_projected_column(value_slot, &context, &value_column);
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(context.next_file_column_idx, 0);
+}
+
 } // namespace doris
diff --git a/be/test/format_v2/column_mapper_test.cpp b/be/test/format_v2/column_mapper_test.cpp
new file mode 100644
index 00000000000000..392adfa8126af7
--- /dev/null
+++ b/be/test/format_v2/column_mapper_test.cpp
@@ -0,0 +1,4066 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/column_mapper.h"
+
+#include <gtest/gtest.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_decimal.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/data_type_timestamptz.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vin_predicate.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/column_mapper_nested.h"
+#include "format_v2/expr/cast.h"
+#include "format_v2/schema_projection.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Exprs_types.h"
+#include "runtime/descriptors.h"
+#include "storage/predicate/predicate_creator.h"
+#include "testutil/column_helper.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format {
+namespace {
+
+DataTypePtr i32() {
+    return std::make_shared<DataTypeInt32>();
+}
+
+DataTypePtr i64() {
+    return std::make_shared<DataTypeInt64>();
+}
+
+DataTypePtr f32() {
+    return std::make_shared<DataTypeFloat32>();
+}
+
+DataTypePtr f64() {
+    return std::make_shared<DataTypeFloat64>();
+}
+
+DataTypePtr dec32(uint32_t precision, uint32_t scale) {
+    return std::make_shared<DataTypeDecimal32>(precision, scale);
+}
+
+DataTypePtr str() {
+    return std::make_shared<DataTypeString>();
+}
+
+DataTypePtr timestamptz(uint32_t scale) {
+    return std::make_shared<DataTypeTimeStampTz>(scale);
+}
+
+DataTypePtr u8() {
+    return std::make_shared<DataTypeUInt8>();
+}
+
+ColumnDefinition field_id_col(const std::string& name, int32_t field_id, DataTypePtr type,
+                              int32_t local_id = -1) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_INT>(field_id);
+    column.local_id = local_id;
+    column.name = name;
+    column.type = std::move(type);
+    return column;
+}
+
+ColumnDefinition name_col(const std::string& name, DataTypePtr type, int32_t local_id = -1) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_STRING>(name);
+    column.local_id = local_id;
+    column.name = name;
+    column.type = std::move(type);
+    return column;
+}
+
+ColumnDefinition name_id_col(const std::string& name, const std::string& identifier,
+                             DataTypePtr type, int32_t local_id = -1) {
+    ColumnDefinition column = name_col(name, std::move(type), local_id);
+    column.identifier = Field::create_field<TYPE_STRING>(identifier);
+    return column;
+}
+
+ColumnDefinition position_col(const std::string& name, int32_t file_position, DataTypePtr type) {
+    return field_id_col(name, file_position, std::move(type));
+}
+
+ColumnDefinition struct_col(const std::string& name, int32_t field_id,
+                            std::vector<ColumnDefinition> children, int32_t local_id = -1) {
+    DataTypes child_types;
+    Strings child_names;
+    child_types.reserve(children.size());
+    child_names.reserve(children.size());
+    for (const auto& child : children) {
+        child_types.push_back(child.type);
+        child_names.push_back(child.name);
+    }
+    auto column = field_id_col(
+            name, field_id, std::make_shared<DataTypeStruct>(child_types, child_names), local_id);
+    column.children = std::move(children);
+    return column;
+}
+
+ColumnDefinition struct_name_col(const std::string& name, std::vector<ColumnDefinition> children,
+                                 int32_t local_id = -1) {
+    auto column = struct_col(name, -1, std::move(children), local_id);
+    column.identifier = Field::create_field<TYPE_STRING>(name);
+    return column;
+}
+
+ColumnDefinition array_col(const std::string& name, int32_t field_id, ColumnDefinition element,
+                           int32_t local_id = -1) {
+    auto column =
+            field_id_col(name, field_id, std::make_shared<DataTypeArray>(element.type), local_id);
+    column.children = {std::move(element)};
+    return column;
+}
+
+ColumnDefinition map_col(const std::string& name, int32_t field_id,
+                         std::vector<ColumnDefinition> children, const DataTypePtr& key_type,
+                         const DataTypePtr& value_type, int32_t local_id = -1) {
+    auto column = field_id_col(name, field_id, std::make_shared<DataTypeMap>(key_type, value_type),
+                               local_id);
+    column.children = std::move(children);
+    return column;
+}
+
+void set_name_identifiers(ColumnDefinition* column, int32_t local_id) {
+    DORIS_CHECK(column != nullptr);
+    column->identifier = Field::create_field<TYPE_STRING>(column->name);
+    column->local_id = local_id;
+    for (size_t idx = 0; idx < column->children.size(); ++idx) {
+        set_name_identifiers(&column->children[idx], static_cast<int32_t>(idx));
+    }
+}
+
+std::vector<int32_t> projection_ids(const std::vector<LocalColumnIndex>& projections) {
+    std::vector<int32_t> ids;
+    ids.reserve(projections.size());
+    for (const auto& projection : projections) {
+        ids.push_back(projection.local_id());
+    }
+    return ids;
+}
+
+std::vector<std::string> target_names(const FileStructPredicateTarget* target) {
+    std::vector<std::string> names;
+    for (const auto* current = target; current != nullptr; current = current->child.get()) {
+        names.push_back(current->file_child_name);
+    }
+    return names;
+}
+
+void expect_mapping(const ColumnMapping& mapping, size_t global_index,
+                    const std::string& table_name, int32_t file_local_id,
+                    const std::string& file_name, const DataTypePtr& file_type,
+                    const DataTypePtr& table_type) {
+    EXPECT_EQ(mapping.global_index, GlobalIndex(global_index));
+    EXPECT_EQ(mapping.table_column_name, table_name);
+    ASSERT_TRUE(mapping.file_local_id.has_value());
+    EXPECT_EQ(*mapping.file_local_id, file_local_id);
+    EXPECT_EQ(mapping.file_column_name, file_name);
+    ASSERT_NE(mapping.file_type, nullptr);
+    ASSERT_NE(mapping.table_type, nullptr);
+    EXPECT_TRUE(mapping.file_type->equals(*file_type));
+    EXPECT_TRUE(mapping.table_type->equals(*table_type));
+}
+
+void expect_constant(const TableColumnMapper& mapper, const ColumnMapping& mapping,
+                     size_t global_index, const DataTypePtr& table_type) {
+    EXPECT_FALSE(mapping.file_local_id.has_value());
+    ASSERT_TRUE(mapping.constant_index.has_value());
+    ASSERT_LT(mapping.constant_index->value(), mapper.constant_map().size());
+    const auto& entry = mapper.constant_map().get(*mapping.constant_index);
+    EXPECT_EQ(entry.global_index, GlobalIndex(global_index));
+    EXPECT_TRUE(entry.type->equals(*table_type));
+    EXPECT_EQ(entry.expr, mapping.default_expr);
+}
+
+void expect_missing(const ColumnMapping& mapping) {
+    EXPECT_FALSE(mapping.file_local_id.has_value());
+    EXPECT_FALSE(mapping.constant_index.has_value());
+    EXPECT_EQ(mapping.virtual_column_type, TableVirtualColumnType::INVALID);
+}
+
+class TestFunctionExpr final : public VExpr {
+public:
+    TestFunctionExpr(std::string function_name, DataTypePtr data_type,
+                     TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL,
+                     TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE)
+            : VExpr(std::move(data_type), false), _expr_name(std::move(function_name)) {
+        set_node_type(node_type);
+        _opcode = opcode;
+        TFunctionName fn_name;
+        fn_name.__set_function_name(_expr_name);
+        _fn.__set_name(fn_name);
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr =
+                std::make_shared<TestFunctionExpr>(_expr_name, data_type(), node_type(), _opcode);
+        return Status::OK();
+    }
+
+    Status execute_column_impl(VExprContext*, const Block*, const Selector*, size_t,
+                               ColumnPtr&) const override {
+        return Status::NotSupported("TestFunctionExpr is only used for ColumnMapper analysis");
+    }
+
+private:
+    std::string _expr_name;
+};
+
+VExprSPtr table_slot(int slot_id, int column_id, DataTypePtr type, const std::string& name) {
+    return VSlotRef::create_shared(slot_id, column_id, -1, std::move(type), name);
+}
+
+VExprSPtr literal(DataTypePtr type, Field value) {
+    return VLiteral::create_shared(std::move(type), std::move(value));
+}
+
+VExprSPtr struct_element(const VExprSPtr& parent, DataTypePtr child_type,
+                         const std::string& child_name) {
+    auto expr = std::make_shared<TestFunctionExpr>("struct_element", child_type);
+    expr->add_child(parent);
+    expr->add_child(literal(str(), Field::create_field<TYPE_STRING>(child_name)));
+    return expr;
+}
+
+VExprSPtr element_at(const VExprSPtr& parent, DataTypePtr child_type,
+                     const std::string& child_name) {
+    auto expr = std::make_shared<TestFunctionExpr>("element_at", std::move(child_type));
+    expr->add_child(parent);
+    expr->add_child(literal(str(), Field::create_field<TYPE_STRING>(child_name)));
+    return expr;
+}
+
+VExprSPtr array_element_at(const VExprSPtr& parent, DataTypePtr child_type, int64_t ordinal) {
+    auto expr = std::make_shared<TestFunctionExpr>("element_at", std::move(child_type));
+    expr->add_child(parent);
+    expr->add_child(literal(i64(), Field::create_field<TYPE_BIGINT>(ordinal)));
+    return expr;
+}
+
+VExprSPtr map_values(const VExprSPtr& parent, DataTypePtr value_type) {
+    auto expr = std::make_shared<TestFunctionExpr>(
+            "map_values", std::make_shared<DataTypeArray>(std::move(value_type)));
+    expr->add_child(parent);
+    return expr;
+}
+
+VExprSPtr map_keys(const VExprSPtr& parent, DataTypePtr key_type) {
+    auto expr = std::make_shared<TestFunctionExpr>(
+            "map_keys", std::make_shared<DataTypeArray>(std::move(key_type)));
+    expr->add_child(parent);
+    return expr;
+}
+
+VExprSPtr array_contains(const VExprSPtr& array, const VExprSPtr& value) {
+    auto expr = std::make_shared<TestFunctionExpr>("array_contains", u8());
+    expr->add_child(array);
+    expr->add_child(value);
+    return expr;
+}
+
+VExprSPtr like_expr(const VExprSPtr& left, const std::string& pattern) {
+    auto expr = std::make_shared<TestFunctionExpr>("like", u8());
+    expr->add_child(left);
+    expr->add_child(literal(str(), Field::create_field<TYPE_STRING>(pattern)));
+    return expr;
+}
+
+VExprSPtr struct_element_by_selector(const VExprSPtr& parent, DataTypePtr child_type,
+                                     const VExprSPtr& selector) {
+    auto expr = std::make_shared<TestFunctionExpr>("struct_element", std::move(child_type));
+    expr->add_child(parent);
+    expr->add_child(selector);
+    return expr;
+}
+
+VExprSPtr int_gt(const VExprSPtr& left, int32_t value) {
+    auto expr = std::make_shared<TestFunctionExpr>("gt", u8(), TExprNodeType::BINARY_PRED,
+                                                   TExprOpcode::GT);
+    expr->add_child(left);
+    expr->add_child(literal(i32(), Field::create_field<TYPE_INT>(value)));
+    return expr;
+}
+
+VExprSPtr binary_predicate(TExprOpcode::type opcode, const VExprSPtr& left,
+                           const VExprSPtr& right) {
+    auto expr = std::make_shared<TestFunctionExpr>("binary_predicate", u8(),
+                                                   TExprNodeType::BINARY_PRED, opcode);
+    expr->add_child(left);
+    expr->add_child(right);
+    return expr;
+}
+
+VExprSPtr in_predicate(const VExprSPtr& probe, const DataTypePtr& literal_type,
+                       const std::vector<Field>& values) {
+    auto expr = std::make_shared<TestFunctionExpr>("in", u8(), TExprNodeType::IN_PRED);
+    expr->add_child(probe);
+    for (const auto& value : values) {
+        expr->add_child(literal(literal_type, value));
+    }
+    return expr;
+}
+
+VExprSPtr null_predicate(const VExprSPtr& child, bool is_null) {
+    auto expr =
+            std::make_shared<TestFunctionExpr>(is_null ? "is_null_pred" : "is_not_null_pred", u8());
+    expr->add_child(child);
+    return expr;
+}
+
+VExprSPtr cast_expr(const VExprSPtr& child, DataTypePtr target_type) {
+    auto expr = Cast::create_shared(std::move(target_type));
+    expr->add_child(child);
+    return expr;
+}
+
+VExprSPtr compound_predicate(TExprOpcode::type opcode, const VExprSPtr& left,
+                             const VExprSPtr& right) {
+    auto expr = std::make_shared<TestFunctionExpr>("compound", u8(), TExprNodeType::COMPOUND_PRED,
+                                                   opcode);
+    expr->add_child(left);
+    expr->add_child(right);
+    return expr;
+}
+
+ColumnMapping mapped_struct_column(int32_t root_file_local_id, const std::string& child_name,
+                                   int32_t child_file_local_id, DataTypePtr child_type) {
+    ColumnDefinition file_child = name_col(child_name, child_type, child_file_local_id);
+    ColumnMapping root;
+    root.global_index = GlobalIndex(0);
+    root.table_column_name = "s";
+    root.file_local_id = root_file_local_id;
+    root.file_column_name = "s";
+    root.table_type =
+            std::make_shared<DataTypeStruct>(DataTypes {child_type}, Strings {child_name});
+    root.file_type = root.table_type;
+    root.original_file_type = root.table_type;
+    root.original_file_children = {file_child};
+    root.projected_file_children = {file_child};
+    return root;
+}
+
+std::vector<NestedStructPath> collect_paths(const VExprSPtr& expr) {
+    std::vector<NestedStructPath> paths;
+    collect_nested_struct_paths(expr, &paths);
+    return paths;
+}
+
+void expect_name_selector(const StructChildSelector& selector, const std::string& name) {
+    EXPECT_TRUE(selector.by_name);
+    EXPECT_EQ(selector.name, name);
+}
+
+void expect_ordinal_selector(const StructChildSelector& selector, size_t ordinal) {
+    EXPECT_FALSE(selector.by_name);
+    EXPECT_EQ(selector.ordinal, ordinal);
+}
+
+void expect_path_root(const NestedStructPath& path, size_t global_index) {
+    EXPECT_EQ(path.root_global_index, GlobalIndex(global_index));
+}
+
+class ColumnMapperCastTest : public testing::Test {
+protected:
+    void SetUp() override { state.set_enable_strict_cast(true); }
+
+    Status prepare_open_execute(VExprContext* context, Block* block, int* result_column_id) {
+        RETURN_IF_ERROR(context->prepare(&state, RowDescriptor()));
+        RETURN_IF_ERROR(context->open(&state));
+        return context->execute(block, result_column_id);
+    }
+
+    MockRuntimeState state;
+};
+
+class Int64ChildGreaterThanExpr final : public VExpr {
+public:
+    explicit Int64ChildGreaterThanExpr(int64_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false), _value(value) {}
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        ColumnPtr child_column;
+        RETURN_IF_ERROR(
+                get_child(0)->execute_column(context, block, selector, count, child_column));
+        const auto& input = assert_cast<const ColumnInt64&>(*child_column);
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            result_data[row] = input.get_element(row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<Int64ChildGreaterThanExpr>(_value);
+        return Status::OK();
+    }
+
+private:
+    const int64_t _value;
+    const std::string _expr_name = "Int64ChildGreaterThanExpr";
+};
+
+class Int64BinaryPredicateExpr final : public VExpr {
+public:
+    explicit Int64BinaryPredicateExpr(TExprOpcode::type opcode)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false) {
+        set_node_type(TExprNodeType::BINARY_PRED);
+        _opcode = opcode;
+    }
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        ColumnPtr left_column;
+        RETURN_IF_ERROR(get_child(0)->execute_column(context, block, selector, count, left_column));
+        ColumnPtr right_column;
+        RETURN_IF_ERROR(
+                get_child(1)->execute_column(context, block, selector, count, right_column));
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto left = left_column->get_int(row);
+            const auto right = right_column->get_int(row);
+            switch (_opcode) {
+            case TExprOpcode::GT:
+                result_data[row] = left > right;
+                break;
+            case TExprOpcode::LT:
+                result_data[row] = left < right;
+                break;
+            default:
+                return Status::InternalError("Unsupported test opcode {}", _opcode);
+            }
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<Int64BinaryPredicateExpr>(_opcode);
+        return Status::OK();
+    }
+
+private:
+    const std::string _expr_name = "Int64BinaryPredicateExpr";
+};
+
+VExprSPtr create_in_predicate() {
+    TExprNode node;
+    node.__set_node_type(TExprNodeType::IN_PRED);
+    node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+    node.__set_is_nullable(false);
+    node.__set_num_children(0);
+    TInPredicate in_predicate;
+    in_predicate.__set_is_not_in(false);
+    node.__set_in_predicate(in_predicate);
+    return VInPredicate::create_shared(node);
+}
+
+// ----------------------------------------------------------------------
+// L0 schema projection helper tests.
+// These tests isolate LocalColumnIndex projection semantics before
+// TableColumnMapper starts mutating ColumnMapping state.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperSchemaProjectionTest, ProjectsStructByLocalIdAndKeepsFileOrder) {
+    auto a = field_id_col("a", 101, i32(), 0);
+    auto b = field_id_col("b", 102, str(), 1);
+    auto root = struct_col("s", 100, {a, b}, 7);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(7);
+    projection.children.push_back(LocalColumnIndex::local(1));
+    projection.children.push_back(LocalColumnIndex::local(0));
+
+    ColumnDefinition projected;
+    ASSERT_TRUE(project_column_definition(root, projection, &projected).ok());
+    ASSERT_EQ(projected.children.size(), 2);
+    EXPECT_EQ(projected.children[0].name, "a");
+    EXPECT_EQ(projected.children[1].name, "b");
+
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(projected.type).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 2);
+    EXPECT_EQ(projected_type->get_element_name(0), "a");
+    EXPECT_EQ(projected_type->get_element_name(1), "b");
+}
+
+TEST(ColumnMapperSchemaProjectionTest, ProjectsArrayElementStructLeaf) {
+    auto a = field_id_col("a", 1, i32(), 0);
+    auto b = field_id_col("b", 2, str(), 1);
+    auto element = struct_col("element", 10, {a, b}, 0);
+    auto array = array_col("items", 100, element, 5);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(5);
+    auto element_projection = LocalColumnIndex::partial_local(0);
+    element_projection.children.push_back(LocalColumnIndex::local(1));
+    projection.children.push_back(std::move(element_projection));
+
+    ColumnDefinition projected;
+    ASSERT_TRUE(project_column_definition(array, projection, &projected).ok());
+    ASSERT_EQ(projected.children.size(), 1);
+    ASSERT_EQ(projected.children[0].children.size(), 1);
+    EXPECT_EQ(projected.children[0].children[0].name, "b");
+
+    const auto* array_type =
+            assert_cast<const DataTypeArray*>(remove_nullable(projected.type).get());
+    const auto* element_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(array_type->get_nested_type()).get());
+    ASSERT_EQ(element_type->get_elements().size(), 1);
+    EXPECT_EQ(element_type->get_element_name(0), "b");
+}
+
+TEST(ColumnMapperSchemaProjectionTest, ProjectsMapValueStructLeaf) {
+    auto key = field_id_col("key", 1, str(), 0);
+    auto value_a = field_id_col("a", 2, i32(), 0);
+    auto value_b = field_id_col("b", 3, str(), 1);
+    auto value_type =
+            std::make_shared<DataTypeStruct>(DataTypes {i32(), str()}, Strings {"a", "b"});
+    ColumnDefinition value = field_id_col("value", 4, value_type, 1);
+    value.children = {value_a, value_b};
+    auto map = map_col("m", 100, {key, value}, str(), value_type, 9);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(9);
+    projection.children.push_back(LocalColumnIndex::local(0));
+    auto value_projection = LocalColumnIndex::partial_local(1);
+    value_projection.children.push_back(LocalColumnIndex::local(1));
+    projection.children.push_back(std::move(value_projection));
+
+    ColumnDefinition projected;
+    ASSERT_TRUE(project_column_definition(map, projection, &projected).ok());
+    ASSERT_EQ(projected.children.size(), 2);
+    EXPECT_EQ(projected.children[0].name, "key");
+    EXPECT_TRUE(projected.children[0].children.empty());
+    EXPECT_EQ(projected.children[1].name, "value");
+    ASSERT_EQ(projected.children[1].children.size(), 1);
+    EXPECT_EQ(projected.children[1].children[0].name, "b");
+
+    const auto* map_type = assert_cast<const DataTypeMap*>(remove_nullable(projected.type).get());
+    const auto* projected_value =
+            assert_cast<const DataTypeStruct*>(remove_nullable(map_type->get_value_type()).get());
+    ASSERT_EQ(projected_value->get_elements().size(), 1);
+    EXPECT_EQ(projected_value->get_element_name(0), "b");
+}
+
+TEST(ColumnMapperSchemaProjectionTest, RejectsMapKeyOnlyProjection) {
+    auto key = field_id_col("key", 1, str(), 0);
+    auto value = field_id_col("value", 2, i32(), 1);
+    auto map = map_col("m", 100, {key, value}, str(), i32(), 9);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(9);
+    projection.children.push_back(LocalColumnIndex::local(0));
+
+    ColumnDefinition projected;
+    const auto status = project_column_definition(map, projection, &projected);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no value child"), std::string::npos);
+}
+
+TEST(ColumnMapperSchemaProjectionTest, RejectsInvalidProjectionChildIdWithFieldName) {
+    auto root = struct_col("s", 100, {field_id_col("a", 101, i32(), 0)}, 7);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(7);
+    projection.children.push_back(LocalColumnIndex::local(99));
+
+    ColumnDefinition projected;
+    const auto status = project_column_definition(root, projection, &projected);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid projection child id 99 for field s"),
+              std::string::npos);
+}
+
+TEST(ColumnMapperSchemaProjectionTest, RejectsEmptyProjectionPathWithFieldName) {
+    auto root = struct_col("s", 100, {field_id_col("a", 101, i32(), 0)}, 7);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(7);
+    projection.children.push_back(LocalColumnIndex::local(-1));
+
+    ColumnDefinition projected;
+    const auto status = project_column_definition(root, projection, &projected);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Empty projection path for field s"), std::string::npos);
+}
+
+TEST(ColumnMapperSchemaProjectionTest, RejectsInvalidChildProjectionForPrimitiveField) {
+    auto root = field_id_col("i", 1, i32(), 7);
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(7);
+    projection.children.push_back(LocalColumnIndex::local(0));
+
+    ColumnDefinition projected;
+    const auto status = project_column_definition(root, projection, &projected);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid projection child id 0 for field i"),
+              std::string::npos);
+}
+
+// ----------------------------------------------------------------------
+// L0 nested helper tests.
+// These tests cover child ordering, direct schema path resolution, and
+// predicate-filter merging without going through create_scan_request().
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperNestedHelperTest, PresentChildMappingsAreSortedByFileLocalId) {
+    ColumnMapping b;
+    b.table_column_name = "b";
+    b.file_local_id = 2;
+    ColumnMapping missing;
+    missing.table_column_name = "missing";
+    ColumnMapping a;
+    a.table_column_name = "a";
+    a.file_local_id = 1;
+
+    const std::vector<ColumnMapping> child_mappings = {b, missing, a};
+    const auto present = present_child_mappings_in_file_order(child_mappings);
+    ASSERT_EQ(present.size(), 2);
+    EXPECT_EQ(present[0]->table_column_name, "a");
+    EXPECT_EQ(present[1]->table_column_name, "b");
+}
+
+TEST(ColumnMapperNestedHelperTest, BuildsProjectionByNameAndOrdinalSelectors) {
+    auto leaf = field_id_col("leaf", 3, i32(), 0);
+    auto nested = struct_col("nested", 2, {leaf}, 1);
+    auto first = field_id_col("first", 1, str(), 0);
+    const std::vector<ColumnDefinition> children = {first, nested};
+
+    const std::vector<StructChildSelector> by_name = {
+            {.by_name = true, .name = "nested", .ordinal = 0},
+            {.by_name = true, .name = "leaf", .ordinal = 0},
+    };
+    LocalColumnIndex named_projection;
+    ASSERT_TRUE(build_file_child_projection_from_schema(children, by_name, &named_projection).ok());
+    EXPECT_EQ(named_projection.local_id(), 1);
+    ASSERT_EQ(named_projection.children.size(), 1);
+    EXPECT_EQ(named_projection.children[0].local_id(), 0);
+
+    const std::vector<StructChildSelector> by_ordinal = {
+            {.by_name = false, .name = "", .ordinal = 2},
+            {.by_name = false, .name = "", .ordinal = 1},
+    };
+    LocalColumnIndex ordinal_projection;
+    ASSERT_TRUE(build_file_child_projection_from_schema(children, by_ordinal, &ordinal_projection)
+                        .ok());
+    EXPECT_EQ(ordinal_projection.local_id(), 1);
+    ASSERT_EQ(ordinal_projection.children.size(), 1);
+    EXPECT_EQ(ordinal_projection.children[0].local_id(), 0);
+}
+
+TEST(ColumnMapperNestedHelperTest, MergesPredicateFiltersForSameNestedTarget) {
+    FileColumnPredicateFilter gt_filter;
+    gt_filter.target = FileNestedPredicateTarget(
+            LocalColumnId(7), std::make_unique<FileStructPredicateTarget>(2, "score"));
+    gt_filter.file_column_id = LocalColumnId(7);
+    gt_filter.file_child_id_path = {2};
+    gt_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            7, "score", i32(), Field::create_field<TYPE_INT>(10), false));
+
+    FileColumnPredicateFilter lt_filter;
+    lt_filter.target = FileNestedPredicateTarget(
+            LocalColumnId(7), std::make_unique<FileStructPredicateTarget>(2, "score"));
+    lt_filter.file_column_id = LocalColumnId(7);
+    lt_filter.file_child_id_path = {2};
+    lt_filter.predicates.push_back(create_comparison_predicate<PredicateType::LT>(
+            7, "score", i32(), Field::create_field<TYPE_INT>(100), false));
+
+    std::vector<FileColumnPredicateFilter> filters;
+    merge_column_predicate_filter(std::move(gt_filter), &filters);
+    merge_column_predicate_filter(std::move(lt_filter), &filters);
+
+    ASSERT_EQ(filters.size(), 1);
+    EXPECT_EQ(filters[0].effective_file_column_id(), LocalColumnId(7));
+    EXPECT_EQ(filters[0].effective_file_child_id_path(), std::vector<int32_t>({2}));
+    ASSERT_EQ(filters[0].predicates.size(), 2);
+    EXPECT_EQ(target_names(filters[0].target.struct_target.get()),
+              std::vector<std::string>({"score"}));
+}
+
+TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersFromOr) {
+    const auto int_type = i32();
+    const auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"a"});
+    const auto slot = table_slot(0, 0, struct_type, "s");
+    const auto left = int_gt(struct_element(slot, int_type, "a"), 10);
+    const auto right = int_gt(struct_element(slot, int_type, "a"), 20);
+    const auto or_expr = compound_predicate(TExprOpcode::COMPOUND_OR, left, right);
+
+    std::vector<FileColumnPredicateFilter> filters;
+    collect_nested_column_predicate_filters(or_expr, {mapped_struct_column(5, "a", 0, int_type)},
+                                            &filters);
+
+    EXPECT_TRUE(filters.empty());
+}
+
+TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersFromUnsupportedExpression) {
+    const auto int_type = i32();
+    const auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"a"});
+    auto add_expr = std::make_shared<TestFunctionExpr>("add", int_type);
+    add_expr->add_child(struct_element(table_slot(0, 0, struct_type, "s"), int_type, "a"));
+    add_expr->add_child(literal(int_type, Field::create_field<TYPE_INT>(1)));
+
+    std::vector<FileColumnPredicateFilter> filters;
+    collect_nested_column_predicate_filters(add_expr, {mapped_struct_column(5, "a", 0, int_type)},
+                                            &filters);
+
+    EXPECT_TRUE(filters.empty());
+}
+
+TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersThroughUnsafeCast) {
+    const auto file_type = i64();
+    const auto table_type = i32();
+    const auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {file_type}, Strings {"a"});
+    const auto nested_leaf = struct_element(table_slot(0, 0, struct_type, "s"), file_type, "a");
+    const auto filter_expr = int_gt(cast_expr(nested_leaf, table_type), 10);
+
+    std::vector<FileColumnPredicateFilter> filters;
+    collect_nested_column_predicate_filters(filter_expr,
+                                            {mapped_struct_column(5, "a", 0, file_type)}, &filters);
+
+    EXPECT_TRUE(filters.empty());
+}
+
+// ----------------------------------------------------------------------
+// collect_nested_struct_paths() helper tests.
+// These tests assert the entry helper for nested scan projection: it only discovers
+// table-side struct paths. Later localization decides whether to build pruning predicates.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperCollectNestedStructPathsTest, CollectsNameOrdinalAndBooleanSelectors) {
+    const auto leaf_type = i32();
+    const auto inner_type =
+            std::make_shared<DataTypeStruct>(DataTypes {leaf_type, leaf_type}, Strings {"x", "y"});
+    const auto root_type = std::make_shared<DataTypeStruct>(DataTypes {inner_type, leaf_type},
+                                                            Strings {"nested", "missing"});
+    const auto root = table_slot(0, 3, root_type, "s");
+
+    const auto nested_by_ordinal = struct_element_by_selector(
+            struct_element_by_selector(root, inner_type,
+                                       literal(i32(), Field::create_field<TYPE_INT>(1))),
+            leaf_type, literal(i32(), Field::create_field<TYPE_INT>(2)));
+    auto paths = collect_paths(nested_by_ordinal);
+    ASSERT_EQ(paths.size(), 1);
+    expect_path_root(paths[0], 3);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+    expect_ordinal_selector(paths[0].selectors[0], 1);
+    expect_ordinal_selector(paths[0].selectors[1], 2);
+
+    const std::vector<VExprSPtr> positive_ordinal_selectors = {
+            literal(std::make_shared<DataTypeInt8>(),
+                    Field::create_field<TYPE_TINYINT>(static_cast<int8_t>(1))),
+            literal(std::make_shared<DataTypeInt16>(),
+                    Field::create_field<TYPE_SMALLINT>(static_cast<int16_t>(2))),
+            literal(i32(), Field::create_field<TYPE_INT>(3)),
+            literal(i64(), Field::create_field<TYPE_BIGINT>(4)),
+            literal(u8(), Field::create_field<TYPE_BOOLEAN>(true)),
+    };
+    for (size_t idx = 0; idx < positive_ordinal_selectors.size(); ++idx) {
+        const auto selected =
+                struct_element_by_selector(root, leaf_type, positive_ordinal_selectors[idx]);
+        paths = collect_paths(selected);
+        ASSERT_EQ(paths.size(), 1);
+        ASSERT_EQ(paths[0].selectors.size(), 1);
+        expect_ordinal_selector(paths[0].selectors[0], idx == 4 ? 1 : idx + 1);
+    }
+
+    paths = collect_paths(struct_element(root, leaf_type, "missing"));
+    ASSERT_EQ(paths.size(), 1);
+    ASSERT_EQ(paths[0].selectors.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "missing");
+}
+
+TEST(ColumnMapperCollectNestedStructPathsTest, IgnoresInvalidSelectorsAndNonPathRoots) {
+    const auto leaf_type = i32();
+    const auto root_type = std::make_shared<DataTypeStruct>(DataTypes {leaf_type}, Strings {"a"});
+    const auto root = table_slot(0, 0, root_type, "s");
+
+    const std::vector<VExprSPtr> invalid_selectors = {
+            literal(i32(), Field::create_field<TYPE_INT>(0)),
+            literal(i32(), Field::create_field<TYPE_INT>(-1)),
+            literal(u8(), Field::create_field<TYPE_BOOLEAN>(false)),
+            literal(f32(), Field::create_field<TYPE_FLOAT>(1.0F)),
+            literal(f64(), Field::create_field<TYPE_DOUBLE>(1.0)),
+            table_slot(1, 1, i32(), "selector"),
+    };
+    for (const auto& selector : invalid_selectors) {
+        EXPECT_TRUE(collect_paths(struct_element_by_selector(root, leaf_type, selector)).empty());
+    }
+
+    auto wrong_arity = std::make_shared<TestFunctionExpr>("struct_element", leaf_type);
+    wrong_arity->add_child(root);
+    EXPECT_TRUE(collect_paths(wrong_arity).empty());
+
+    auto not_struct_element = std::make_shared<TestFunctionExpr>("other_function", leaf_type);
+    not_struct_element->add_child(root);
+    not_struct_element->add_child(literal(str(), Field::create_field<TYPE_STRING>("a")));
+    EXPECT_TRUE(collect_paths(not_struct_element).empty());
+
+    EXPECT_TRUE(collect_paths(struct_element(literal(str(), Field::create_field<TYPE_STRING>("x")),
+                                             leaf_type, "a"))
+                        .empty());
+    EXPECT_TRUE(collect_paths(nullptr).empty());
+}
+
+TEST(ColumnMapperCollectNestedStructPathsTest, RecursesThroughExpressionsAndKeepsCompletePath) {
+    const auto leaf_type = i32();
+    const auto inner_type = std::make_shared<DataTypeStruct>(DataTypes {leaf_type}, Strings {"b"});
+    const auto root_type =
+            std::make_shared<DataTypeStruct>(DataTypes {inner_type, leaf_type}, Strings {"a", "c"});
+    const auto root = table_slot(0, 2, root_type, "s");
+    const auto path_a = struct_element_by_selector(
+            root, inner_type, literal(str(), Field::create_field<TYPE_STRING>("a")));
+    const auto path_ab = struct_element_by_selector(
+            path_a, leaf_type, literal(str(), Field::create_field<TYPE_STRING>("b")));
+    const auto path_c = struct_element_by_selector(
+            root, leaf_type, literal(str(), Field::create_field<TYPE_STRING>("c")));
+
+    auto paths = collect_paths(binary_predicate(
+            TExprOpcode::GT, path_ab, literal(leaf_type, Field::create_field<TYPE_INT>(1))));
+    ASSERT_EQ(paths.size(), 1);
+    expect_path_root(paths[0], 2);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+    expect_name_selector(paths[0].selectors[0], "a");
+    expect_name_selector(paths[0].selectors[1], "b");
+
+    paths = collect_paths(compound_predicate(
+            TExprOpcode::COMPOUND_OR,
+            binary_predicate(TExprOpcode::GT, path_ab,
+                             literal(leaf_type, Field::create_field<TYPE_INT>(1))),
+            binary_predicate(TExprOpcode::LT, path_c,
+                             literal(leaf_type, Field::create_field<TYPE_INT>(2)))));
+    ASSERT_EQ(paths.size(), 2);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+    ASSERT_EQ(paths[1].selectors.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "a");
+    expect_name_selector(paths[0].selectors[1], "b");
+    expect_name_selector(paths[1].selectors[0], "c");
+
+    auto fn = std::make_shared<TestFunctionExpr>("fn", leaf_type);
+    fn->add_child(path_ab);
+    fn->add_child(table_slot(3, 4, leaf_type, "other"));
+    paths = collect_paths(fn);
+    ASSERT_EQ(paths.size(), 1);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+
+    auto if_expr = std::make_shared<TestFunctionExpr>("if", leaf_type);
+    if_expr->add_child(literal(u8(), Field::create_field<TYPE_BOOLEAN>(true)));
+    if_expr->add_child(path_ab);
+    if_expr->add_child(path_c);
+    paths = collect_paths(if_expr);
+    ASSERT_EQ(paths.size(), 2);
+
+    paths = collect_paths(compound_predicate(TExprOpcode::COMPOUND_AND, path_ab, path_ab));
+    ASSERT_EQ(paths.size(), 2);
+
+    paths = collect_paths(path_ab);
+    ASSERT_EQ(paths.size(), 1);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+}
+
+TEST(ColumnMapperCollectNestedStructPathsTest, CastBehaviorSeparatesProjectionAndPruningRules) {
+    const auto int_type = i32();
+    const auto bigint_type = i64();
+    const auto float_type = f32();
+    const auto double_type = f64();
+    const auto decimal_small = dec32(8, 2);
+    const auto decimal_wide = dec32(9, 2);
+    const auto decimal_changed_scale = dec32(9, 3);
+
+    const auto root_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, float_type, decimal_small}, Strings {"i", "f", "d"});
+    const auto root = table_slot(0, 0, root_type, "s");
+    const auto int_path = struct_element(root, int_type, "i");
+    const auto float_path = struct_element(root, float_type, "f");
+    const auto decimal_path = struct_element(root, decimal_small, "d");
+
+    auto paths = collect_paths(cast_expr(int_path, bigint_type));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "i");
+
+    paths = collect_paths(cast_expr(float_path, double_type));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "f");
+
+    paths = collect_paths(cast_expr(decimal_path, decimal_wide));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "d");
+
+    paths = collect_paths(
+            cast_expr(struct_element(root, make_nullable(int_type), "i"), make_nullable(int_type)));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "i");
+
+    // Unsafe casts are not accepted as pruning paths, but collect_nested_struct_paths() still
+    // recurses into children so scan projection can read the column needed by row-level filters.
+    paths = collect_paths(cast_expr(struct_element(root, bigint_type, "i"), int_type));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "i");
+
+    paths = collect_paths(cast_expr(decimal_path, decimal_changed_scale));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "d");
+
+    EXPECT_TRUE(collect_paths(cast_expr(table_slot(1, 1, int_type, "plain"), bigint_type)).empty());
+}
+
+TEST(ColumnMapperCollectNestedStructPathsTest, ProjectionMergeKeepsFilterOnlyPathAndDeduplicates) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type);
+    auto table_b = name_col("b", int_type);
+    auto table_output = struct_name_col("s", {table_a});
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", int_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok());
+
+    const auto path_b =
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "b");
+    auto filter_expr = compound_predicate(
+            TExprOpcode::COMPOUND_AND,
+            binary_predicate(TExprOpcode::GT, path_b,
+                             literal(int_type, Field::create_field<TYPE_INT>(1))),
+            binary_predicate(TExprOpcode::LT, path_b,
+                             literal(int_type, Field::create_field<TYPE_INT>(10))));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_output}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5));
+    ASSERT_FALSE(request.predicate_columns[0].project_all_children);
+    EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector<int32_t>({0, 1}));
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({1}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 2);
+}
+
+// Scenario: row-oriented readers such as CSV/Text cannot lazy-read predicate columns separately.
+// For a complex root that is both projected and referenced by a filter, the materialized mapper
+// keeps one non-predicate scan entry and asks the reader to read the full top-level struct.
+TEST(ColumnMapperScanRequestTest, MaterializedMapperUsesSingleScanColumnList) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type, 0);
+    auto table_b = name_col("b", int_type, 1);
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+    auto table_output = struct_name_col("s", {table_a});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", int_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok());
+
+    const auto path_b =
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "b");
+    auto filter_expr = binary_predicate(TExprOpcode::GT, path_b,
+                                        literal(int_type, Field::create_field<TYPE_INT>(1)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_output}, &request).ok());
+
+    EXPECT_TRUE(request.predicate_columns.empty());
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: a FileReader must expose semantic children for complex file columns. If it returns a
+// complex DataType but leaves ColumnDefinition::children empty, mapper should return a diagnostic
+// error instead of aborting inside ARRAY/MAP/STRUCT child lookup.
+TEST(ColumnMapperScanRequestTest, MalformedComplexFileSchemaReturnsError) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type, 0);
+    auto table_b = name_col("b", string_type, 1);
+    auto table_struct = struct_name_col("s", {table_a, table_b});
+    auto file_struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    auto malformed_file_struct = name_col("s", file_struct_type, 5);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    const auto status = mapper.create_mapping({table_struct}, {}, {malformed_file_struct});
+
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Malformed complex file schema"), std::string::npos)
+            << status;
+}
+
+// Scenario: when the projected table schema contains the child referenced by the filter, the
+// materialized mapper can still rewrite the table-level struct child predicate into a file-local
+// conjunct. It remains a single full-root scan column; only the expression is localized.
+TEST(ColumnMapperScanRequestTest, MaterializedMapperLocalizesMappedStructChildConjunct) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type, 0);
+    auto table_b = name_col("b", int_type, 1);
+    auto table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", int_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto path_b = struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "b");
+    auto filter_expr = binary_predicate(TExprOpcode::GT, path_b,
+                                        literal(int_type, Field::create_field<TYPE_INT>(1)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    EXPECT_TRUE(request.predicate_columns.empty());
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+    ASSERT_EQ(request.conjuncts.size(), 1);
+}
+
+// Scenario: even output-only partial complex projections such as `SELECT s.a` must scan the full
+// top-level struct for materialized readers, because delimited text formats cannot physically read
+// only one nested child from a single text field.
+TEST(ColumnMapperScanRequestTest, MaterializedMapperScansFullComplexRootForOutputOnlyProjection) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type, 0);
+    auto table_output = struct_name_col("s", {table_a});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", int_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_output}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: array/map nested projections also scan the full top-level complex root for
+// materialized readers. This keeps row-oriented formats from receiving Parquet-style partial
+// projections for `array<struct>` elements or map value structs.
+TEST(ColumnMapperScanRequestTest, MaterializedMapperScansFullArrayAndMapRoots) {
+    const auto key_type = str();
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_array_child = name_col("b", string_type);
+    auto table_array_element = struct_name_col("element", {table_array_child});
+    auto table_array = array_col("items", -1, table_array_element);
+    table_array.identifier = Field::create_field<TYPE_STRING>("items");
+    set_name_identifiers(&table_array, -1);
+
+    auto file_array_a = name_col("a", int_type, 0);
+    auto file_array_b = name_col("b", string_type, 1);
+    auto file_array_element = struct_name_col("element", {file_array_a, file_array_b}, 0);
+    auto file_array = array_col("items", -1, file_array_element, 4);
+    file_array.identifier = Field::create_field<TYPE_STRING>("items");
+    set_name_identifiers(&file_array, 4);
+
+    auto table_value_b = name_col("b", string_type);
+    auto table_value = struct_name_col("value", {table_value_b});
+    auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type);
+    table_map.identifier = Field::create_field<TYPE_STRING>("m");
+    set_name_identifiers(&table_map, -1);
+
+    auto file_key = name_col("key", key_type, 0);
+    auto file_value_a = name_col("a", int_type, 0);
+    auto file_value_b = name_col("b", string_type, 1);
+    auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1);
+    auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 6);
+    file_map.identifier = Field::create_field<TYPE_STRING>("m");
+    set_name_identifiers(&file_map, 6);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_array, table_map}, {}, {file_array, file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array, table_map}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 2);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(4));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_EQ(request.non_predicate_columns[1].column_id(), LocalColumnId(6));
+    EXPECT_TRUE(request.non_predicate_columns[1].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[1].children.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// ----------------------------------------------------------------------
+// L1 create_mapping root matching tests.
+// These cases cover the three supported root matching modes and the
+// missing/default behavior that each mode feeds into later scan requests.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperCreateMappingTest, ByNameMatchesCaseIdentifierAndAliases) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            name_col("ID", int_type),
+            name_id_col("renamed", "legacy_name", int_type),
+            [] {
+                auto column = name_col("current_alias", i32());
+                column.name_mapping = {"old_alias"};
+                return column;
+            }(),
+            name_col("file_alias", int_type),
+    };
+    std::vector<ColumnDefinition> file_schema = {
+            name_col("id", int_type, 0),
+            name_col("legacy_name", int_type, 1),
+            name_col("old_alias", int_type, 2),
+            [] {
+                auto column = name_col("physical_name", i32(), 3);
+                column.name_mapping = {"file_alias"};
+                return column;
+            }(),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 4);
+    expect_mapping(mapper.mappings()[0], 0, "ID", 0, "id", int_type, int_type);
+    expect_mapping(mapper.mappings()[1], 1, "renamed", 1, "legacy_name", int_type, int_type);
+    expect_mapping(mapper.mappings()[2], 2, "current_alias", 2, "old_alias", int_type, int_type);
+    expect_mapping(mapper.mappings()[3], 3, "file_alias", 3, "physical_name", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByNameUsesFirstMatchingFileFieldWhenAmbiguous) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            name_col("id", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("ID", int_type, 0),
+            name_col("id", int_type, 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "id", 0, "ID", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, TimestampTzScaleMismatchDoesNotAddFinalizeCast) {
+    // Scenario: HDFS TVF may expose a table slot as TIMESTAMPTZ(0), while a Parquet logical UTC
+    // timestamp file schema is materialized as TIMESTAMPTZ(6). Finalization must not add a SQL
+    // cast from scale 6 to scale 0, because that cast rounds fractional seconds:
+    //   2025-06-01 12:34:56.789+08:00 -> 2025-06-01 12:34:57+08:00
+    // Reader finalization should pass the column through; the output slot type controls display
+    // scale and hides the fractional part without changing the stored instant.
+    const auto table_type = timestamptz(0);
+    const auto file_type = timestamptz(6);
+    const std::vector<ColumnDefinition> table_schema = {name_col("ts_tz", table_type)};
+    const std::vector<ColumnDefinition> file_schema = {name_col("ts_tz", file_type, 0)};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "ts_tz", 0, "ts_tz", file_type, table_type);
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+    EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::COPY_DIRECTLY);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByNameUsesNameMappingForRenamedColumn) {
+    const auto int_type = i32();
+    auto table_column = name_col("current_id", int_type);
+    table_column.name_mapping = {"legacy_id"};
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("legacy_id", int_type, 0),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_column}, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "current_id", 0, "legacy_id", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByNameUsesNameMappingForNestedSchemaEvolution) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_country = name_col("country", string_type);
+    table_country.name_mapping = {"old_country"};
+    auto table_city = name_col("city", string_type);
+    auto table_struct = struct_name_col("struct_column", {table_country, table_city});
+    set_name_identifiers(&table_struct, -1);
+
+    auto table_item = name_col("item", string_type);
+    table_item.name_mapping = {"product"};
+    auto table_quantity = name_col("quantity", int_type);
+    auto table_element = struct_name_col("element", {table_item, table_quantity});
+    auto table_array = array_col("array_column", -1, table_element);
+    set_name_identifiers(&table_array, -1);
+
+    auto table_key = name_col("key", string_type);
+    auto table_full_name = name_col("full_name", string_type);
+    table_full_name.name_mapping = {"name"};
+    auto table_age = name_col("age", int_type);
+    auto table_value = struct_name_col("value", {table_full_name, table_age});
+    auto table_map =
+            map_col("new_map_column", -1, {table_key, table_value}, string_type, table_value.type);
+    table_map.name_mapping = {"map_column"};
+    set_name_identifiers(&table_map, -1);
+
+    auto file_old_country = name_col("old_country", string_type, 0);
+    auto file_city = name_col("city", string_type, 1);
+    auto file_struct = struct_name_col("struct_column", {file_old_country, file_city}, 3);
+    set_name_identifiers(&file_struct, 3);
+
+    auto file_product = name_col("product", string_type, 0);
+    auto file_element = struct_name_col("list", {file_product}, 0);
+    auto file_array = array_col("array_column", -1, file_element, 4);
+    set_name_identifiers(&file_array, 4);
+
+    auto file_key = name_col("key", string_type, 0);
+    auto file_name = name_col("name", string_type, 0);
+    auto file_age = name_col("age", int_type, 1);
+    auto file_value = struct_name_col("value", {file_name, file_age}, 1);
+    auto file_map =
+            map_col("map_column", -1, {file_key, file_value}, string_type, file_value.type, 5);
+    set_name_identifiers(&file_map, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct, table_array, table_map}, {},
+                                      {file_struct, file_array, file_map})
+                        .ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    const auto& struct_mapping = mapper.mappings()[0];
+    expect_mapping(struct_mapping, 0, "struct_column", 3, "struct_column", file_struct.type,
+                   table_struct.type);
+    ASSERT_EQ(struct_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(struct_mapping.child_mappings[0].file_column_name, "old_country");
+    EXPECT_EQ(*struct_mapping.child_mappings[0].file_local_id, 0);
+    EXPECT_EQ(struct_mapping.child_mappings[1].file_column_name, "city");
+    EXPECT_EQ(*struct_mapping.child_mappings[1].file_local_id, 1);
+
+    const auto& array_mapping = mapper.mappings()[1];
+    expect_mapping(array_mapping, 1, "array_column", 4, "array_column", file_array.type,
+                   table_array.type);
+    ASSERT_EQ(array_mapping.child_mappings.size(), 1);
+    const auto& element_mapping = array_mapping.child_mappings[0];
+    EXPECT_EQ(element_mapping.file_column_name, "list");
+    EXPECT_EQ(*element_mapping.file_local_id, 0);
+    ASSERT_EQ(element_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(element_mapping.child_mappings[0].file_column_name, "product");
+    EXPECT_EQ(*element_mapping.child_mappings[0].file_local_id, 0);
+    expect_missing(element_mapping.child_mappings[1]);
+
+    const auto& map_mapping = mapper.mappings()[2];
+    expect_mapping(map_mapping, 2, "new_map_column", 5, "map_column", file_map.type,
+                   table_map.type);
+    ASSERT_EQ(map_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key");
+    EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0);
+    const auto& value_mapping = map_mapping.child_mappings[1];
+    EXPECT_EQ(value_mapping.file_column_name, "value");
+    EXPECT_EQ(*value_mapping.file_local_id, 1);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "name");
+    EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 0);
+    EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "age");
+    EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 1);
+}
+
+// Scenario: SELECT * can carry only the full complex DataType without expanded nested
+// ColumnDefinitions. When an old file has map value STRUCT<age, name> and the table type is
+// STRUCT<age, full_name, gender>, the mapper must still build child mappings instead of letting
+// TableReader cast between incompatible struct shapes.
+TEST(ColumnMapperCreateMappingTest, SynthesizesMissingMapValueStructChildrenFromType) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const auto table_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"});
+    const auto file_value_type = std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type},
+                                                                  Strings {"age", "name"});
+
+    auto table_map = name_col("new_map_column",
+                              std::make_shared<DataTypeMap>(string_type, table_value_type));
+    table_map.name_mapping = {"map_column"};
+    set_name_identifiers(&table_map, -1);
+
+    auto file_age = name_col("age", int_type, 0);
+    auto file_name = name_col("name", string_type, 1);
+    auto file_value = struct_name_col("value", {file_age, file_name}, 1);
+    auto file_key = name_col("key", string_type, 0);
+    auto file_map =
+            map_col("map_column", -1, {file_key, file_value}, string_type, file_value_type, 5);
+    set_name_identifiers(&file_map, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto& map_mapping = mapper.mappings()[0];
+    ASSERT_EQ(map_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key");
+    EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key");
+    EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0);
+
+    const auto& value_mapping = map_mapping.child_mappings[1];
+    EXPECT_EQ(value_mapping.table_column_name, "value");
+    EXPECT_EQ(value_mapping.file_column_name, "value");
+    EXPECT_EQ(*value_mapping.file_local_id, 1);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age");
+    EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age");
+    EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 0);
+    EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name");
+    EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name");
+    EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 1);
+    EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender");
+    expect_missing(value_mapping.child_mappings[2]);
+    EXPECT_FALSE(value_mapping.is_trivial);
+}
+
+// Scenario: MAP_KEYS(new_map_column) may build a key-only nested projection, while SELECT * still
+// needs the whole map root. The mapper must add a synthetic value child and recursively map the old
+// value struct instead of treating Struct(name, age) as a leaf to CAST into the table value struct.
+TEST(ColumnMapperCreateMappingTest, KeyOnlyMapProjectionStillMapsEvolvedValueStruct) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const auto table_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"});
+    const auto file_value_type = std::make_shared<DataTypeStruct>(DataTypes {string_type, int_type},
+                                                                  Strings {"name", "age"});
+
+    auto table_key = name_col("key", string_type);
+    auto table_map = map_col("new_map_column", -1, {table_key}, string_type, table_value_type);
+    table_map.name_mapping = {"map_column"};
+    set_name_identifiers(&table_map, -1);
+
+    auto file_key = name_col("key", string_type, 0);
+    auto file_name = name_col("name", string_type, 0);
+    auto file_age = name_col("age", int_type, 1);
+    auto file_value = struct_name_col("value", {file_name, file_age}, 1);
+    auto file_map =
+            map_col("map_column", -1, {file_key, file_value}, string_type, file_value_type, 5);
+    set_name_identifiers(&file_map, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto& map_mapping = mapper.mappings()[0];
+    ASSERT_EQ(map_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key");
+    EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key");
+    EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0);
+
+    const auto& value_mapping = map_mapping.child_mappings[1];
+    EXPECT_EQ(value_mapping.table_column_name, "value");
+    EXPECT_EQ(value_mapping.file_column_name, "value");
+    EXPECT_EQ(*value_mapping.file_local_id, 1);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age");
+    EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age");
+    EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 1);
+    EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name");
+    EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name");
+    EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 0);
+    EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender");
+    expect_missing(value_mapping.child_mappings[2]);
+    EXPECT_FALSE(value_mapping.is_trivial);
+}
+
+// Scenario: Iceberg uses field-id mapping, but a key-only map projection may force the mapper to
+// synthesize the missing value struct from DataType names, which do not carry field ids. The mapper
+// must name-match synthesized children before ordinal fallback, otherwise `age` would read old
+// file child `name` and the later materialization would build the value struct incorrectly.
+TEST(ColumnMapperCreateMappingTest,
+     KeyOnlyMapProjectionSynthesizedValueStructNameMatchesBeforeOrdinalFallback) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const auto table_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"});
+    const auto file_value_type = std::make_shared<DataTypeStruct>(DataTypes {string_type, int_type},
+                                                                  Strings {"name", "age"});
+
+    auto table_key = field_id_col("key", 10, string_type, 0);
+    auto table_map = map_col("new_map_column", 2, {table_key}, string_type, table_value_type);
+
+    auto file_key = field_id_col("key", 10, string_type, 0);
+    auto file_name = field_id_col("name", 7, string_type, 0);
+    auto file_age = field_id_col("age", 8, int_type, 1);
+    auto file_value = struct_col("value", 11, {file_name, file_age}, 1);
+    auto file_map =
+            map_col("new_map_column", 2, {file_key, file_value}, string_type, file_value_type, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto& map_mapping = mapper.mappings()[0];
+    ASSERT_EQ(map_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key");
+    EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key");
+    EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0);
+
+    const auto& value_mapping = map_mapping.child_mappings[1];
+    EXPECT_EQ(value_mapping.table_column_name, "value");
+    EXPECT_EQ(value_mapping.file_column_name, "value");
+    EXPECT_EQ(*value_mapping.file_local_id, 1);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age");
+    EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age");
+    EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 1);
+    EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name");
+    EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name");
+    EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 0);
+    EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender");
+    expect_missing(value_mapping.child_mappings[2]);
+    EXPECT_FALSE(value_mapping.is_trivial);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByFieldIdDoesNotFallbackToNameAndUsesFirstDuplicate) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("renamed", 10, int_type),
+            name_col("same_name", int_type),
+            field_id_col("negative", -7, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("first", 10, int_type, 0),
+            field_id_col("second", 10, int_type, 1),
+            field_id_col("same_name", 99, int_type, 2),
+            field_id_col("negative_file", -7, int_type, 3),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "renamed", 0, "first", int_type, int_type);
+    expect_missing(mapper.mappings()[1]);
+    expect_mapping(mapper.mappings()[2], 2, "negative", 3, "negative_file", int_type, int_type);
+}
+
+// Scenario: Iceberg TopN lazy materialization uses BY_FIELD_ID for schema evolution and also asks
+// the file reader to synthesize GLOBAL_ROWID. GLOBAL_ROWID is matched by ColumnType before the
+// field-id matcher, so keeping BY_FIELD_ID does not make the mapper look for a numeric field id for
+// that virtual column.
+TEST(ColumnMapperCreateMappingTest, ByFieldIdMapsGlobalRowIdByVirtualColumnType) {
+    const auto int_type = i32();
+    auto table_rowid = global_rowid_column_definition();
+    table_rowid.name = BeConsts::GLOBAL_ROWID_COL + "equality_delete_par_1";
+    table_rowid.identifier = Field::create_field<TYPE_STRING>(table_rowid.name);
+
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("new_new_id", 1, int_type),
+            table_rowid,
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("id", 1, int_type, 0),
+            global_rowid_column_definition(),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    expect_mapping(mapper.mappings()[0], 0, "new_new_id", 0, "id", int_type, int_type);
+    expect_mapping(mapper.mappings()[1], 1, table_rowid.name, GLOBAL_ROWID_COLUMN_ID,
+                   BeConsts::GLOBAL_ROWID_COL, str(), str());
+}
+
+TEST(ColumnMapperCreateMappingTest, ByFieldIdTreatsSameNameDifferentFieldIdAsMissing) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("same_name", 10, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("same_name", 20, int_type, 0),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_missing(mapper.mappings()[0]);
+}
+
+TEST(ColumnMapperCreateMappingTest, NestedFieldIdTreatsSameNameDifferentFieldIdAsMissing) {
+    const auto int_type = i32();
+    auto table_child = field_id_col("child", 10, int_type);
+    auto table_root = struct_col("root", 1, {table_child});
+
+    auto file_child = field_id_col("child", 20, int_type, 0);
+    auto file_root = struct_col("root", 1, {file_child}, 0);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    const auto status = mapper.create_mapping({table_root}, {}, {file_root});
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "root", 0, "root", file_root.type, table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 1);
+    expect_missing(mapper.mappings()[0].child_mappings[0]);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexMapsTopLevelColumnsByPositionIgnoringFileNames) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("user_id", 0, int_type),
+            position_col("user_name", 1, string_type),
+            position_col("age", 2, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+            field_id_col("_col1", 101, string_type, 1),
+            field_id_col("_col2", 102, int_type, 2),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "user_id", 0, "_col0", int_type, int_type);
+    expect_mapping(mapper.mappings()[1], 1, "user_name", 1, "_col1", string_type, string_type);
+    expect_mapping(mapper.mappings()[2], 2, "age", 2, "_col2", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexSupportsSparseProjection) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("age", 2, int_type),
+            position_col("score", 4, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0), field_id_col("_col1", 101, int_type, 1),
+            field_id_col("_col2", 102, int_type, 2), field_id_col("_col3", 103, int_type, 3),
+            field_id_col("_col4", 104, int_type, 4),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    expect_mapping(mapper.mappings()[0], 0, "age", 2, "_col2", int_type, int_type);
+    expect_mapping(mapper.mappings()[1], 1, "score", 4, "_col4", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest,
+     ByIndexMatchesNestedStructChildrenByNameEvenWhenChildrenHaveFieldIds) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    // Hive positional mapping only applies to top-level columns. FE/history schema metadata can
+    // still put field-id style integer identifiers on nested struct children. Those nested
+    // identifiers must not be interpreted as file positions.
+    auto table_root = struct_col("profile", 1,
+                                 {
+                                         field_id_col("id", 100, int_type),
+                                         field_id_col("name", 101, string_type),
+                                 });
+    // Reverse the file child order so a wrong positional match either misses the child or reads
+    // the wrong physical child. The expected mapping below proves the children are matched by name.
+    auto file_root = struct_name_col("_col1",
+                                     {
+                                             name_col("name", string_type, 0),
+                                             name_col("id", int_type, 1),
+                                     },
+                                     1);
+    const std::vector<ColumnDefinition> table_schema = {table_root};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 1000, string_type, 0),
+            file_root,
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "profile", 1, "_col1", file_root.type, table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2);
+    expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "id", 1, "id", int_type, int_type);
+    expect_mapping(mapper.mappings()[0].child_mappings[1], 0, "name", 0, "name", string_type,
+                   string_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexNestedStructDoesNotUseChildOrdinalIdentifier) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    // This is the dangerous variant of the previous case: the nested integer identifiers happen
+    // to be valid child ordinals. BY_INDEX must still ignore them below the top-level root.
+    auto table_root = struct_col("profile", 1,
+                                 {
+                                         field_id_col("id", 0, int_type),
+                                         field_id_col("name", 1, string_type),
+                                 });
+    // If the implementation uses child ordinal matching, id/name will be swapped here.
+    auto file_root = struct_name_col("_col1",
+                                     {
+                                             name_col("name", string_type, 0),
+                                             name_col("id", int_type, 1),
+                                     },
+                                     1);
+    const std::vector<ColumnDefinition> table_schema = {table_root};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 1000, string_type, 0),
+            file_root,
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "profile", 1, "_col1", file_root.type, table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2);
+    expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "id", 1, "id", int_type, int_type);
+    expect_mapping(mapper.mappings()[0].child_mappings[1], 0, "name", 0, "name", string_type,
+                   string_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexArrayElementStructChildrenMatchByName) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    // The top-level ARRAY column is selected by file position. After that, ARRAY has a single
+    // structural child, and the element STRUCT should use Hive's nested-by-name behavior.
+    auto table_element = struct_col("element", 10,
+                                    {
+                                            field_id_col("id", 100, int_type),
+                                            field_id_col("name", 101, string_type),
+                                    });
+    auto table_root = array_col("profiles", 1, table_element);
+    // Reverse the element struct children to distinguish name matching from position matching.
+    auto file_element = struct_name_col("element",
+                                        {
+                                                name_col("name", string_type, 0),
+                                                name_col("id", int_type, 1),
+                                        },
+                                        0);
+    auto file_root = array_col("_col1", 1001, file_element, 1);
+    const std::vector<ColumnDefinition> table_schema = {table_root};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 1000, string_type, 0),
+            file_root,
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "profiles", 1, "_col1", file_root.type,
+                   table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 1);
+    const auto& element_mapping = mapper.mappings()[0].child_mappings[0];
+    expect_mapping(element_mapping, 0, "element", 0, "element", file_element.type,
+                   table_element.type);
+    ASSERT_EQ(element_mapping.child_mappings.size(), 2);
+    expect_mapping(element_mapping.child_mappings[0], 0, "id", 1, "id", int_type, int_type);
+    expect_mapping(element_mapping.child_mappings[1], 0, "name", 0, "name", string_type,
+                   string_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexMapValueStructChildrenMatchByName) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const auto key_type = str();
+    // MAP key/value are structural children, so BY_INDEX should not reinterpret their nested
+    // integer identifiers as arbitrary positions. The value STRUCT then follows name matching.
+    auto table_key = field_id_col("key", 10, key_type);
+    auto table_value = struct_col("value", 11,
+                                  {
+                                          field_id_col("id", 100, int_type),
+                                          field_id_col("name", 101, string_type),
+                                  });
+    auto table_root = map_col("profiles", 1, {table_key, table_value}, key_type, table_value.type);
+    auto file_key = name_col("key", key_type, 0);
+    // Reverse value struct children. A positional nested match would produce name/id swapped.
+    auto file_value = struct_name_col("value",
+                                      {
+                                              name_col("name", string_type, 0),
+                                              name_col("id", int_type, 1),
+                                      },
+                                      1);
+    auto file_root = map_col("_col1", 1001, {file_key, file_value}, key_type, file_value.type, 1);
+    const std::vector<ColumnDefinition> table_schema = {table_root};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 1000, string_type, 0),
+            file_root,
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "profiles", 1, "_col1", file_root.type,
+                   table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2);
+    expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "key", 0, "key", key_type, key_type);
+    const auto& value_mapping = mapper.mappings()[0].child_mappings[1];
+    expect_mapping(value_mapping, 0, "value", 1, "value", file_value.type, table_value.type);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 2);
+    expect_mapping(value_mapping.child_mappings[0], 0, "id", 1, "id", int_type, int_type);
+    expect_mapping(value_mapping.child_mappings[1], 0, "name", 0, "name", string_type, string_type);
+}
+
+TEST(ColumnMapperCreateMappingTest,
+     ByIndexPartitionColumnsTakeConstantAndDoNotConsumeFilePosition) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto partition = name_col("dt", string_type);
+    partition.is_partition_key = true;
+    const std::vector<ColumnDefinition> table_schema = {
+            partition,
+            position_col("user_id", 0, int_type),
+            position_col("score", 1, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+            field_id_col("_col1", 101, int_type, 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema,
+                                      {{"dt", Field::create_field<TYPE_STRING>("2026-06-11")}},
+                                      file_schema)
+                        .ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_constant(mapper, mapper.mappings()[0], 0, string_type);
+    expect_mapping(mapper.mappings()[1], 1, "user_id", 0, "_col0", int_type, int_type);
+    expect_mapping(mapper.mappings()[2], 2, "score", 1, "_col1", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexOutOfRangeFallsBackToDefaultOrMissing) {
+    const auto int_type = i32();
+    auto with_default = position_col("extra_default", 5, int_type);
+    const auto literal_expr =
+            VExprContext::create_shared(literal(int_type, Field::create_field<TYPE_INT>(42)));
+    with_default.default_expr = literal_expr;
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 0, int_type),
+            with_default,
+            position_col("extra_missing", 99, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+            field_id_col("_col1", 101, int_type, 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type);
+    expect_constant(mapper, mapper.mappings()[1], 1, int_type);
+    EXPECT_EQ(mapper.mappings()[1].default_expr, literal_expr);
+    expect_missing(mapper.mappings()[2]);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexMissingIdentifierFallsBackToDefaultOrMissing) {
+    const auto int_type = i32();
+    auto with_default = name_col("extra_default", int_type);
+    const auto literal_expr =
+            VExprContext::create_shared(literal(int_type, Field::create_field<TYPE_INT>(42)));
+    with_default.default_expr = literal_expr;
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 0, int_type),
+            with_default,
+            name_col("extra_missing", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type);
+    expect_constant(mapper, mapper.mappings()[1], 1, int_type);
+    EXPECT_EQ(mapper.mappings()[1].default_expr, literal_expr);
+    expect_missing(mapper.mappings()[2]);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexOutOfRangeFallsBackToMissing) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 0, int_type),
+            position_col("b", 5, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type);
+    expect_missing(mapper.mappings()[1]);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexIgnoresExtraFileColumns) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 0, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+            field_id_col("_col1", 101, int_type, 1),
+            field_id_col("_col2", 102, int_type, 2),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexIgnoresFileColumnNames) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 1, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("a", 100, int_type, 10),
+            field_id_col("b", 101, int_type, 20),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "a", 20, "b", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, MissingColumnFallsBackToMissingMapping) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    const auto status = mapper.create_mapping({name_col("missing", i32())}, {},
+                                              {name_col("present", i32(), 0)});
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_missing(mapper.mappings()[0]);
+}
+
+// ----------------------------------------------------------------------
+// L1 constants and virtual columns.
+// These tests verify non-file-backed mappings before TableReader materializes
+// their final values.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperConstantTest, PartitionDefaultAndVirtualColumnsUseDedicatedBranches) {
+    auto partition_column = name_col("dt", str());
+    partition_column.is_partition_key = true;
+
+    auto default_column = name_col("new_value", i32());
+    default_column.default_expr =
+            VExprContext::create_shared(literal(i32(), Field::create_field<TYPE_INT>(42)));
+
+    auto row_id_column = name_col("_row_id", make_nullable(i64()));
+    auto sequence_column = name_col("_last_updated_sequence_number", make_nullable(i64()));
+    auto iceberg_rowid_column = name_col(BeConsts::ICEBERG_ROWID_COL, str());
+
+    const std::vector<ColumnDefinition> table_schema = {
+            partition_column, default_column, row_id_column, sequence_column, iceberg_rowid_column};
+    const std::map<std::string, Field> partition_values = {
+            {"dt", Field::create_field<TYPE_STRING>("2026-06-11")},
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, partition_values, {}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 5);
+    expect_constant(mapper, mapper.mappings()[0], 0, str());
+    expect_constant(mapper, mapper.mappings()[1], 1, i32());
+    EXPECT_EQ(mapper.mappings()[2].virtual_column_type, TableVirtualColumnType::ROW_ID);
+    EXPECT_EQ(mapper.mappings()[3].virtual_column_type,
+              TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER);
+    EXPECT_EQ(mapper.mappings()[4].virtual_column_type, TableVirtualColumnType::ICEBERG_ROWID);
+}
+
+TEST(ColumnMapperConstantTest, PhysicalRowLineageFiltersStayFinalizeOnly) {
+    auto row_id_column = name_col("_row_id", make_nullable(i64()));
+    auto sequence_column = name_col("_last_updated_sequence_number", make_nullable(i64()));
+    const std::vector<ColumnDefinition> table_schema = {row_id_column, sequence_column};
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("_row_id", make_nullable(i64()), 2147483540),
+            name_col("_last_updated_sequence_number", make_nullable(i64()), 2147483539),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    EXPECT_EQ(mapper.mappings()[0].virtual_column_type, TableVirtualColumnType::ROW_ID);
+    EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::FINALIZE_ONLY);
+    EXPECT_EQ(mapper.mappings()[1].virtual_column_type,
+              TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER);
+    EXPECT_EQ(mapper.mappings()[1].filter_conversion, FilterConversionType::FINALIZE_ONLY);
+
+    auto row_id_filter =
+            binary_predicate(TExprOpcode::EQ, table_slot(0, 0, make_nullable(i64()), "_row_id"),
+                             literal(i64(), Field::create_field<TYPE_BIGINT>(1001)));
+    auto sequence_filter = binary_predicate(
+            TExprOpcode::EQ,
+            table_slot(1, 1, make_nullable(i64()), "_last_updated_sequence_number"),
+            literal(i64(), Field::create_field<TYPE_BIGINT>(77)));
+    TableFilter row_id_table_filter {.conjunct = VExprContext::create_shared(row_id_filter),
+                                     .global_indices = {GlobalIndex(0)}};
+    TableFilter sequence_table_filter {.conjunct = VExprContext::create_shared(sequence_filter),
+                                       .global_indices = {GlobalIndex(1)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({row_id_table_filter, sequence_table_filter}, {},
+                                           table_schema, &request)
+                        .ok());
+
+    EXPECT_TRUE(request.conjuncts.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_EQ(projection_ids(request.non_predicate_columns),
+              std::vector<int32_t>({2147483540, 2147483539}));
+}
+
+TEST(ColumnMapperConstantTest, MissingRowLineageDefaultExprStillUsesVirtualMapping) {
+    auto id_column = field_id_col("id", 1, make_nullable(i32()));
+    auto row_id_column = field_id_col("renamed_row_id", 2147483540, make_nullable(i64()));
+    row_id_column.default_expr = VExprContext::create_shared(
+            literal(make_nullable(i64()), Field::create_field<TYPE_BIGINT>(0)));
+    auto sequence_column =
+            field_id_col("renamed_last_updated_sequence_number", 2147483539, make_nullable(i64()));
+    sequence_column.default_expr = VExprContext::create_shared(
+            literal(make_nullable(i64()), Field::create_field<TYPE_BIGINT>(0)));
+
+    const std::vector<ColumnDefinition> table_schema = {id_column, row_id_column, sequence_column};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("id", 1, make_nullable(i32()), 0),
+            field_id_col("name", 2, make_nullable(str()), 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "id", 0, "id", make_nullable(i32()),
+                   make_nullable(i32()));
+    EXPECT_EQ(mapper.mappings()[1].virtual_column_type, TableVirtualColumnType::ROW_ID);
+    EXPECT_FALSE(mapper.mappings()[1].constant_index.has_value());
+    EXPECT_EQ(mapper.mappings()[2].virtual_column_type,
+              TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER);
+    EXPECT_FALSE(mapper.mappings()[2].constant_index.has_value());
+    EXPECT_TRUE(mapper.constant_map().empty());
+}
+
+TEST(ColumnMapperConstantTest, ByFieldIdDoesNotTreatSameNameDifferentIdAsRowLineage) {
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("_row_id", 100, make_nullable(i64())),
+            field_id_col("_last_updated_sequence_number", 101, make_nullable(i64())),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_row_id", 100, make_nullable(i64()), 0),
+            field_id_col("_last_updated_sequence_number", 101, make_nullable(i64()), 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    expect_mapping(mapper.mappings()[0], 0, "_row_id", 0, "_row_id", make_nullable(i64()),
+                   make_nullable(i64()));
+    EXPECT_EQ(mapper.mappings()[0].virtual_column_type, TableVirtualColumnType::INVALID);
+    EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::COPY_DIRECTLY);
+    expect_mapping(mapper.mappings()[1], 1, "_last_updated_sequence_number", 1,
+                   "_last_updated_sequence_number", make_nullable(i64()), make_nullable(i64()));
+    EXPECT_EQ(mapper.mappings()[1].virtual_column_type, TableVirtualColumnType::INVALID);
+    EXPECT_EQ(mapper.mappings()[1].filter_conversion, FilterConversionType::COPY_DIRECTLY);
+}
+
+TEST(ColumnMapperConstantTest, PartitionAliasResolvesRenamedValue) {
+    auto partition_column = name_col("current_dt", str());
+    partition_column.name_mapping = {"legacy_dt"};
+    partition_column.is_partition_key = true;
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(
+                              {partition_column},
+                              {{"legacy_dt", Field::create_field<TYPE_STRING>("2026-06-11")}}, {})
+                        .ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_constant(mapper, mapper.mappings()[0], 0, str());
+}
+
+TEST(ColumnMapperConstantTest, PartitionConstantFilterEntryDoesNotReadFileColumns) {
+    auto partition_column = name_col("part", i32());
+    partition_column.is_partition_key = true;
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({partition_column},
+                                      {{"part", Field::create_field<TYPE_INT>(7)}}, {})
+                        .ok());
+
+    TableFilter filter {
+            .conjunct = VExprContext::create_shared(int_gt(table_slot(0, 0, i32(), "part"), 1)),
+            .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {partition_column}, &request).ok());
+
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).constant_index(),
+              *mapper.mappings()[0].constant_index);
+    EXPECT_TRUE(request.local_positions.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    EXPECT_TRUE(request.conjuncts.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+TEST(ColumnMapperConstantTest, DefaultConstantFilterEntryUsesDefaultExpression) {
+    auto default_column = name_col("new_value", i32());
+    default_column.default_expr =
+            VExprContext::create_shared(literal(i32(), Field::create_field<TYPE_INT>(42)));
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({default_column}, {}, {}).ok());
+
+    TableFilter filter {.conjunct = VExprContext::create_shared(
+                                int_gt(table_slot(0, 0, i32(), "new_value"), 1)),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {default_column}, &request).ok());
+
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant());
+    const auto constant_index = mapper.filter_entries().at(GlobalIndex(0)).constant_index();
+    EXPECT_EQ(constant_index, *mapper.mappings()[0].constant_index);
+    EXPECT_EQ(mapper.constant_map().get(constant_index).expr, default_column.default_expr);
+    EXPECT_TRUE(request.local_positions.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    EXPECT_TRUE(request.conjuncts.empty());
+}
+
+TEST(ColumnMapperConstantTest, MixedConstantAndFileFilterKeepsOnlyFileScanColumn) {
+    auto partition_column = name_col("part", i32());
+    partition_column.is_partition_key = true;
+    const auto file_column = name_col("score", i32(), 3);
+    const std::vector<ColumnDefinition> table_schema = {partition_column, file_column};
+    const std::vector<ColumnDefinition> file_schema = {file_column};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {{"part", Field::create_field<TYPE_INT>(7)}},
+                                      file_schema)
+                        .ok());
+
+    TableFilter constant_filter {
+            .conjunct = VExprContext::create_shared(int_gt(table_slot(0, 0, i32(), "part"), 1)),
+            .global_indices = {GlobalIndex(0)}};
+    TableFilter file_filter {
+            .conjunct = VExprContext::create_shared(int_gt(table_slot(1, 1, i32(), "score"), 10)),
+            .global_indices = {GlobalIndex(1)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({constant_filter, file_filter}, {}, table_schema, &request)
+                    .ok());
+
+    ASSERT_EQ(mapper.filter_entries().size(), 2);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant());
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(1)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(1)).local_index(), LocalIndex(0));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(3));
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+}
+
+// ----------------------------------------------------------------------
+// L1 direct filter localization tests.
+// These tests call localize_filters() directly to pin the core interface
+// contract apart from create_scan_request() initialization.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperLocalizeFiltersTest, VisibleLocalFilterAddsPredicateColumnAndConjunct) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {name_col("id", int_type)};
+    const std::vector<ColumnDefinition> file_schema = {name_col("id", int_type, 7)};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(11, 0, int_type, "id")),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(7));
+    ASSERT_EQ(request.local_positions.size(), 1);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(7)), LocalIndex(0));
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0));
+
+    ASSERT_EQ(request.conjuncts.size(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(request.conjuncts[0]->root().get());
+    EXPECT_EQ(localized_slot->slot_id(), 11);
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_EQ(localized_slot->column_name(), "id");
+    EXPECT_TRUE(localized_slot->data_type()->equals(*int_type));
+}
+
+TEST(ColumnMapperLocalizeFiltersTest, ConstantFilterBuildsEntryWithoutFileScanColumn) {
+    auto partition_column = name_col("part", i32());
+    partition_column.is_partition_key = true;
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({partition_column},
+                                      {{"part", Field::create_field<TYPE_INT>(7)}}, {})
+                        .ok());
+
+    TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(3, 0, i32(), "part")),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok());
+
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    EXPECT_TRUE(request.local_positions.empty());
+    EXPECT_TRUE(request.conjuncts.empty());
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).constant_index(),
+              mapper.mappings()[0].constant_index);
+}
+
+TEST(ColumnMapperLocalizeFiltersTest, ColumnPredicatesUseOnlyExistingLocalPositions) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {name_col("id", int_type)};
+    const std::vector<ColumnDefinition> file_schema = {name_col("id", int_type, 3)};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    TableColumnPredicates predicates;
+    predicates[GlobalIndex(0)] = {create_comparison_predicate<PredicateType::GT>(
+            0, "id", int_type, Field::create_field<TYPE_INT>(10), false)};
+
+    FileScanRequest request_without_local_position;
+    ASSERT_TRUE(mapper.localize_filters({}, predicates, &request_without_local_position).ok());
+    EXPECT_TRUE(request_without_local_position.column_predicate_filters.empty());
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    EXPECT_FALSE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+
+    FileScanRequest request_with_local_position;
+    request_with_local_position.non_predicate_columns.push_back(
+            LocalColumnIndex::top_level(LocalColumnId(3)));
+    request_with_local_position.local_positions.emplace(LocalColumnId(3), LocalIndex(0));
+    ASSERT_TRUE(mapper.localize_filters({}, predicates, &request_with_local_position).ok());
+
+    ASSERT_EQ(request_with_local_position.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request_with_local_position.non_predicate_columns[0].column_id(), LocalColumnId(3));
+    EXPECT_TRUE(request_with_local_position.predicate_columns.empty());
+    ASSERT_EQ(request_with_local_position.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request_with_local_position.column_predicate_filters[0].effective_file_column_id(),
+              LocalColumnId(3));
+    ASSERT_EQ(request_with_local_position.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request_with_local_position.column_predicate_filters[0].predicates[0]->type(),
+              PredicateType::GT);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0));
+}
+
+TEST(ColumnMapperLocalizeFiltersTest, NestedFilterOnlyChildMergesIntoPredicateProjection) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = name_col("a", int_type);
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr = int_gt(
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a"), 10);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5));
+    ASSERT_FALSE(request.predicate_columns[0].project_all_children);
+    EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector<int32_t>({0, 1}));
+    ASSERT_EQ(request.local_positions.size(), 1);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(5)), LocalIndex(0));
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0));
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"a"}));
+}
+
+TEST(ColumnMapperLocalizeFiltersTest, PreservesExistingScanStateWhenAddingPredicateColumn) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            name_col("id", int_type),
+            name_col("score", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("id", int_type, 3),
+            name_col("score", int_type, 4),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(2, 0, int_type, "id")),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    request.non_predicate_columns.push_back(LocalColumnIndex::top_level(LocalColumnId(4)));
+    request.local_positions.emplace(LocalColumnId(4), LocalIndex(0));
+    ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(4));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(3));
+    ASSERT_EQ(request.local_positions.size(), 2);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(4)), LocalIndex(0));
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(3)), LocalIndex(1));
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(1));
+}
+
+// ----------------------------------------------------------------------
+// L1 scan request and filter localization tests.
+// These tests assert predicate/non-predicate split, local positions, hidden
+// filter mappings, and nested predicate targets.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperScanRequestTest, ColumnPredicatesDoNotForceRowPredicateMaterialization) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const std::vector<ColumnDefinition> table_schema = {
+            name_col("id", int_type),
+            name_col("name", string_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("id", int_type, 0),
+            name_col("name", string_type, 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    TableColumnPredicates predicates;
+    predicates[GlobalIndex(0)] = {create_comparison_predicate<PredicateType::GT>(
+            0, "id", int_type, Field::create_field<TYPE_INT>(10), false)};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, predicates, table_schema, &request).ok());
+
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_EQ(projection_ids(request.non_predicate_columns), std::vector<int32_t>({0, 1}));
+    ASSERT_EQ(request.local_positions.size(), 2);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(0)), LocalIndex(0));
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(1)), LocalIndex(1));
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(0));
+}
+
+TEST(ColumnMapperScanRequestTest, HiddenTopLevelFilterMappingUsesNameFallback) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("id", 1, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("id", 1, int_type, 0),
+            field_id_col("score", 2, int_type, 1),
+    };
+
+    auto filter_expr = int_gt(table_slot(7, 1, int_type, "score"), 10);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(1)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, table_schema, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(0));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(1)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(1)).local_index(), LocalIndex(1));
+}
+
+TEST(ColumnMapperScanRequestTest, StructOutputAndFilterOnlyChildAreMerged) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = name_col("a", int_type);
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr = int_gt(
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a"), 10);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5));
+    ASSERT_FALSE(request.predicate_columns[0].project_all_children);
+    EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector<int32_t>({0, 1}));
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"a"}));
+}
+
+TEST(ColumnMapperScanRequestTest, RenamedNestedPredicateTargetsMappedFileChild) {
+    const auto int_type = i32();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b});
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_struct = struct_col("s", 10, {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr = int_gt(
+            struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "renamed_b"), 10);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({1}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"b"}));
+}
+
+TEST(ColumnMapperScanRequestTest, NestedInNullAndReverseComparisonFiltersAreMerged) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = name_col("a", int_type);
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto nested_a =
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a");
+    auto in_filter =
+            in_predicate(nested_a, int_type,
+                         {Field::create_field<TYPE_INT>(5), Field::create_field<TYPE_INT>(7)});
+    auto reverse_filter = binary_predicate(
+            TExprOpcode::LT, literal(int_type, Field::create_field<TYPE_INT>(3)), nested_a);
+    auto null_filter = null_predicate(nested_a, true);
+    auto not_null_filter = null_predicate(nested_a, false);
+    auto filter_expr = compound_predicate(
+            TExprOpcode::COMPOUND_AND,
+            compound_predicate(TExprOpcode::COMPOUND_AND, in_filter, reverse_filter),
+            compound_predicate(TExprOpcode::COMPOUND_AND, null_filter, not_null_filter));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"a"}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 4);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::IN_LIST);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[1]->type(), PredicateType::GT);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[2]->type(), PredicateType::IS_NULL);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[3]->type(),
+              PredicateType::IS_NOT_NULL);
+}
+
+TEST(ColumnMapperScanRequestTest, NestedPredicateFilterThroughSafeCast) {
+    const auto file_int_type = i32();
+    const auto table_bigint_type = i64();
+    const auto string_type = str();
+
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = std::make_shared<DataTypeStruct>(
+            DataTypes {table_bigint_type, string_type}, Strings {"a", "b"});
+
+    auto file_a = name_col("a", file_int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto nested_a =
+            struct_element(table_slot(0, 0, full_table_struct, "s"), file_int_type, "a");
+    auto filter_expr =
+            binary_predicate(TExprOpcode::GT, cast_expr(nested_a, table_bigint_type),
+                             literal(table_bigint_type, Field::create_field<TYPE_BIGINT>(5)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::GT);
+}
+
+TEST(ColumnMapperScanRequestTest, UnsafeCastDoesNotBuildNestedPredicateFilter) {
+    const auto file_bigint_type = i64();
+    const auto table_int_type = i32();
+    const auto string_type = str();
+
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = std::make_shared<DataTypeStruct>(
+            DataTypes {table_int_type, string_type}, Strings {"a", "b"});
+
+    auto file_a = name_col("a", file_bigint_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto nested_a =
+            struct_element(table_slot(0, 0, full_table_struct, "s"), file_bigint_type, "a");
+    auto filter_expr = binary_predicate(TExprOpcode::GT, cast_expr(nested_a, table_int_type),
+                                        literal(table_int_type, Field::create_field<TYPE_INT>(5)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector<int32_t>({0, 1}));
+}
+
+TEST(ColumnMapperScanRequestTest, DeepNestedPredicateTargetsLeafPath) {
+    const auto id_type = i32();
+    const auto name_type = str();
+    const auto string_type = str();
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+
+    auto full_table_inner_type =
+            std::make_shared<DataTypeStruct>(DataTypes {id_type, name_type}, Strings {"id", "n"});
+    auto full_table_struct_type = std::make_shared<DataTypeStruct>(
+            DataTypes {full_table_inner_type, string_type}, Strings {"a", "b"});
+
+    auto file_id = name_col("id", id_type, 0);
+    auto file_name = name_col("n", name_type, 1);
+    auto file_a = struct_name_col("a", {file_id, file_name}, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto nested_id =
+            struct_element(struct_element(table_slot(0, 0, full_table_struct_type, "s"),
+                                          full_table_inner_type, "a"),
+                           id_type, "id");
+    auto filter_expr =
+            in_predicate(nested_id, id_type,
+                         {Field::create_field<TYPE_INT>(5), Field::create_field<TYPE_INT>(7)});
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0, 0}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"a", "id"}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::IN_LIST);
+}
+
+TEST(ColumnMapperScanRequestTest, ArrayStructProjectionPrunesElementChildren) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_b = name_col("b", string_type);
+    auto table_element = struct_name_col("element", {table_b});
+    auto table_array = array_col("items", -1, table_element);
+    table_array.identifier = Field::create_field<TYPE_STRING>("items");
+    set_name_identifiers(&table_array, -1);
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_element = struct_name_col("element", {file_a, file_b}, 0);
+    auto file_array = array_col("items", -1, file_element, 4);
+    file_array.identifier = Field::create_field<TYPE_STRING>("items");
+    set_name_identifiers(&file_array, 4);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(4));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 0);
+    ASSERT_EQ(projection.children[0].children.size(), 1);
+    EXPECT_EQ(projection.children[0].children[0].local_id(), 1);
+
+    const auto* mapped_array = assert_cast<const DataTypeArray*>(
+            remove_nullable(mapper.mappings()[0].file_type).get());
+    const auto* mapped_element = assert_cast<const DataTypeStruct*>(
+            remove_nullable(mapped_array->get_nested_type()).get());
+    ASSERT_EQ(mapped_element->get_elements().size(), 1);
+    EXPECT_EQ(mapped_element->get_element_name(0), "b");
+}
+
+TEST(ColumnMapperScanRequestTest, MapValueStructProjectionPrunesValueChildren) {
+    const auto key_type = str();
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_value_b = name_col("b", string_type);
+    auto table_value = struct_name_col("value", {table_value_b});
+    auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type);
+    table_map.identifier = Field::create_field<TYPE_STRING>("m");
+    set_name_identifiers(&table_map, -1);
+
+    auto file_key = name_col("key", key_type, 0);
+    auto file_value_a = name_col("a", int_type, 0);
+    auto file_value_b = name_col("b", string_type, 1);
+    auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1);
+    auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 6);
+    file_map.identifier = Field::create_field<TYPE_STRING>("m");
+    set_name_identifiers(&file_map, 6);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_map}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(6));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 1);
+    ASSERT_EQ(projection.children[0].children.size(), 1);
+    EXPECT_EQ(projection.children[0].children[0].local_id(), 1);
+
+    const auto* mapped_map =
+            assert_cast<const DataTypeMap*>(remove_nullable(mapper.mappings()[0].file_type).get());
+    const auto* mapped_value =
+            assert_cast<const DataTypeStruct*>(remove_nullable(mapped_map->get_value_type()).get());
+    ASSERT_EQ(mapped_value->get_elements().size(), 1);
+    EXPECT_EQ(mapped_value->get_element_name(0), "b");
+}
+
+// Scenario: a table struct projects only child `b`, while the file struct stores `a,b`.
+// BY_NAME mapping should read only the physical child `b` and rebuild the mapped file type to the
+// projected struct shape.
+TEST(ColumnMapperScanRequestTest, StructProjectionPrunesChildrenByName) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    set_name_identifiers(&table_struct, 0);
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 0);
+    set_name_identifiers(&file_struct, 0);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(0));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 1);
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto* projected_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(mapper.mappings()[0].file_type).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "b");
+}
+
+// Scenario: a row filter reaches a struct child through an array wrapper
+// (`items.item.a > 5`). The nested predicate filter path only supports direct struct paths, so
+// the mapper keeps this as a row predicate and reads the full array root for predicate evaluation.
+TEST(ColumnMapperScanRequestTest, ArrayWrapperDoesNotBuildNestedPredicateFilter) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_element = struct_name_col("item", {file_a, file_b}, 0);
+    auto file_array = array_col("items", -1, file_element, 0);
+    set_name_identifiers(&file_array, 0);
+
+    auto table_array = file_array;
+
+    const auto item_type = file_element.type;
+    auto item_expr = struct_element(table_slot(0, 0, table_array.type, "items"), item_type, "item");
+    auto filter_expr = int_gt(struct_element(item_expr, int_type, "a"), 5);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_array}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(0));
+    EXPECT_TRUE(request.predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.predicate_columns[0].children.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: a map value struct projects child `b`, while a row filter reads value child `a`.
+// The filter is too complex to become a file-local nested predicate, but the predicate projection
+// must replace the output projection for the same map root and contain both physical value children.
+TEST(ColumnMapperScanRequestTest, MapFilterOnlyValueChildMergesWithOutputProjection) {
+    const auto key_type = i32();
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_value_b = name_col("b", string_type);
+    auto table_value = struct_name_col("value", {table_value_b});
+    auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type);
+    set_name_identifiers(&table_map, 0);
+
+    auto file_key = name_col("key", key_type, 0);
+    auto file_value_a = name_col("a", int_type, 0);
+    auto file_value_b = name_col("b", string_type, 1);
+    auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1);
+    auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 0);
+    set_name_identifiers(&file_map, 0);
+
+    auto full_value_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    auto full_map_type = std::make_shared<DataTypeMap>(key_type, full_value_type);
+    auto value_expr =
+            struct_element(table_slot(0, 0, full_map_type, "m"), full_value_type, "value");
+    auto filter_expr = int_gt(struct_element(value_expr, int_type, "a"), 5);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    const auto& projection = request.predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(0));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 1);
+    EXPECT_EQ(projection_ids(projection.children[0].children), std::vector<int32_t>({0, 1}));
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: when projected struct children are an in-order prefix of the file struct, the mapper can
+// read those physical children directly without rebuilding the file-side complex type.
+TEST(ColumnMapperScanRequestTest, MatchingProjectedStructDoesNotNeedComplexRematerialize) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_b = field_id_col("b", 2, string_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_b});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, string_type, 1);
+    auto file_c = field_id_col("c", 3, int_type, 2);
+    auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_FALSE(projection.project_all_children);
+    EXPECT_EQ(projection_ids(projection.children), std::vector<int32_t>({0, 1}));
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: Iceberg field-id mapping sees a renamed struct child, but the physical child order and
+// types still match, so projection remains a full physical read instead of rebuilding a new type.
+TEST(ColumnMapperScanRequestTest, RenameOnlyProjectedStructDoesNotRebuildFileProjection) {
+    const auto int_type = i32();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_struct = struct_col("s", 10, {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+    EXPECT_EQ(mapper.mappings()[0].projected_file_children.size(),
+              mapper.mappings()[0].original_file_children.size());
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2);
+    EXPECT_EQ(mapper.mappings()[0].child_mappings[1].table_column_name, "renamed_b");
+    EXPECT_EQ(mapper.mappings()[0].child_mappings[1].file_column_name, "b");
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: a row filter references an unprojected struct child, so the predicate projection is
+// merged with the output projection and the mapper rebuilds the projected file struct type.
+TEST(ColumnMapperScanRequestTest, PredicateProjectionRebuildsProjectedStructFileType) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_b = field_id_col("b", 2, string_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_b});
+    auto full_table_c = field_id_col("c", 3, int_type);
+    auto full_table_struct = struct_col("s", 10, {table_a, table_b, full_table_c});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, string_type, 1);
+    auto file_c = field_id_col("c", 3, int_type, 2);
+    auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr =
+            int_gt(struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "c"), 0);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    const auto& projection = request.predicate_columns[0];
+    EXPECT_FALSE(projection.project_all_children);
+    EXPECT_EQ(projection_ids(projection.children), std::vector<int32_t>({0, 1, 2}));
+
+    const auto* mapped_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(mapper.mappings()[0].file_type).get());
+    ASSERT_EQ(mapped_type->get_elements().size(), 3);
+    EXPECT_EQ(mapped_type->get_element_name(0), "a");
+    EXPECT_EQ(mapped_type->get_element_name(1), "b");
+    EXPECT_EQ(mapped_type->get_element_name(2), "c");
+    EXPECT_FALSE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: a filter references a top-level column that is not projected by the query; the mapper
+// creates a hidden filter mapping without adding that hidden column to visible table mappings.
+TEST(ColumnMapperScanRequestTest, PredicateOnlyTopLevelColumnUsesHiddenMapping) {
+    const auto int_type = i32();
+
+    auto table_id = field_id_col("id", 0, int_type);
+    auto table_c = field_id_col("c", 11, int_type);
+    auto table_struct = struct_col("s", 10, {table_c});
+
+    auto file_id = field_id_col("id", 0, int_type, 0);
+    auto file_c = field_id_col("c", 11, int_type, 0);
+    auto file_struct = struct_col("s", 10, {file_c}, 10);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_id}, {}, {file_id, file_struct}).ok());
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_EQ(mapper.mappings()[0].table_column_name, "id");
+
+    auto filter_expr =
+            int_gt(struct_element(table_slot(7, 1, table_struct.type, "s"), int_type, "c"), 0);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(1)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_id}, &request).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_EQ(mapper.mappings()[0].table_column_name, "id");
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(0));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(10));
+    EXPECT_TRUE(request.predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.predicate_columns[0].children.empty());
+
+    ASSERT_EQ(request.conjuncts.size(), 1);
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(10));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+}
+
+// Scenario: a nested predicate targets a table-side renamed struct field; both predicate pruning and
+// scan projection must resolve that field to the old physical file child.
+TEST(ColumnMapperScanRequestTest, NestedPredicateProjectionUsesMappedRenamedChild) {
+    const auto int_type = i32();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_struct = struct_col("s", 10, {file_a, file_b}, 10);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr = int_gt(
+            struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "renamed_b"), 0);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(10));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({1}));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_TRUE(request.predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.predicate_columns[0].children.empty());
+}
+
+// Scenario: element_at(struct, 'table_name') in a row filter is localized to the physical file
+// child name, matching the struct_element rewrite and nested predicate filter resolution paths.
+TEST(ColumnMapperScanRequestTest,
+     FileLocalElementAtConjunctUsesFileChildNameForRenamedStructField) {
+    const auto int_type = i32();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_struct = struct_col("s", 10, {file_a, file_b}, 10);
+
+    auto child_expr = element_at(table_slot(0, 0, table_struct.type, table_struct.name), int_type,
+                                 "renamed_b");
+    auto filter_expr = int_gt(child_expr, 0);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.conjuncts.size(), 1);
+    const auto& localized_child = request.conjuncts[0]->root()->children()[0];
+    EXPECT_EQ(localized_child->expr_name(), "element_at");
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_name(), "s");
+    EXPECT_EQ(localized_slot->column_id(), 0);
+
+    const auto* localized_literal =
+            assert_cast<const VLiteral*>(localized_child->children()[1].get());
+    Field localized_field;
+    localized_literal->get_column_ptr()->get(0, localized_field);
+    ASSERT_EQ(localized_field.get_type(), TYPE_STRING);
+    EXPECT_EQ(std::string(localized_field.as_string_view()), "b");
+}
+
+// Scenario: nested element_at(struct, name) localization rewrites both selector names and
+// intermediate return types. The outer selector must be prepared against the projected file child
+// struct, not the table child struct or the full historical file child struct.
+TEST(ColumnMapperScanRequestTest, NestedElementAtConjunctUsesFileChildTypeForRenamedLeaf) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_new_aa = field_id_col("new_aa", 23, int_type);
+    auto table_bb = field_id_col("bb", 24, string_type);
+    auto table_new_a = struct_col("new_a", 20, {table_new_aa, table_bb});
+    auto table_struct = struct_col("struct_column2", 19, {table_new_a});
+
+    auto file_aa = field_id_col("aa", 23, int_type, 0);
+    auto file_bb = field_id_col("bb", 24, string_type, 1);
+    auto file_new_a = struct_col("new_a", 20, {file_aa, file_bb}, 0);
+    auto file_struct = struct_col("struct_column2", 19, {file_new_a}, 10);
+
+    const auto table_slot_expr = table_slot(0, 0, table_struct.type, "struct_column2");
+    const auto table_parent_expr = element_at(table_slot_expr, table_new_a.type, "new_a");
+    const auto table_leaf_expr = element_at(table_parent_expr, int_type, "new_aa");
+    auto filter_expr = binary_predicate(TExprOpcode::EQ, table_leaf_expr,
+                                        literal(int_type, Field::create_field<TYPE_INT>(50)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+    ASSERT_EQ(request.conjuncts.size(), 1);
+
+    const auto& localized_leaf = request.conjuncts[0]->root()->children()[0];
+    ASSERT_EQ(localized_leaf->expr_name(), "element_at");
+    const auto& localized_parent = localized_leaf->children()[0];
+    ASSERT_EQ(localized_parent->expr_name(), "element_at");
+
+    const auto* localized_leaf_selector =
+            assert_cast<const VLiteral*>(localized_leaf->children()[1].get());
+    Field localized_leaf_field;
+    localized_leaf_selector->get_column_ptr()->get(0, localized_leaf_field);
+    ASSERT_EQ(localized_leaf_field.get_type(), TYPE_STRING);
+    EXPECT_EQ(std::string(localized_leaf_field.as_string_view()), "aa");
+
+    const auto* localized_parent_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(localized_parent->data_type()).get());
+    ASSERT_EQ(localized_parent_type->get_elements().size(), 2);
+    EXPECT_EQ(localized_parent_type->get_element_name(0), "aa");
+    EXPECT_EQ(localized_parent_type->get_element_name(1), "bb");
+}
+
+// Scenario: output projection reads one struct child while the row filter reads a different nested
+// struct child. File-local conjunct rewrite must use the merged scan projection type. In the SQL
+// shape below, `SELECT element_at(s, 'c') WHERE element_at(element_at(s, 'b'), 'cc') LIKE ...`
+// reads file children `b.cc` and `c`; the localized inner `element_at(s, 'b')` returns
+// `Struct(cc)`, not the full old file child `Struct(cc, new_dd)`.
+TEST(ColumnMapperScanRequestTest, NestedElementAtConjunctUsesMergedScanProjectionChildType) {
+    const auto string_type = str();
+    const auto int_type = i32();
+
+    auto table_cc = field_id_col("cc", 23, string_type);
+    auto table_new_dd = field_id_col("new_dd", 24, int_type);
+    auto table_b = struct_col("b", 20, {table_cc, table_new_dd});
+    auto table_c = field_id_col("c", 25, string_type);
+    auto full_table_struct = struct_col("struct_column2", 19, {table_b, table_c});
+    auto projected_table_struct = struct_col("struct_column2", 19, {table_c});
+
+    auto file_cc = field_id_col("cc", 23, string_type, 0);
+    auto file_new_dd = field_id_col("new_dd", 24, int_type, 1);
+    auto file_b = struct_col("b", 20, {file_cc, file_new_dd}, 0);
+    auto file_c = field_id_col("c", 25, string_type, 1);
+    auto file_struct = struct_col("new_struct_column", 19, {file_b, file_c}, 10);
+
+    const auto table_slot_expr = table_slot(0, 0, full_table_struct.type, "struct_column2");
+    const auto table_parent_expr = element_at(table_slot_expr, table_b.type, "b");
+    const auto table_leaf_expr = element_at(table_parent_expr, string_type, "cc");
+    auto filter_expr = like_expr(table_leaf_expr, "NestedC%");
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({projected_table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {projected_table_struct}, &request).ok());
+    ASSERT_EQ(request.conjuncts.size(), 1);
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(10));
+
+    const auto& localized_leaf = request.conjuncts[0]->root()->children()[0];
+    ASSERT_EQ(localized_leaf->expr_name(), "element_at");
+    const auto& localized_parent = localized_leaf->children()[0];
+    ASSERT_EQ(localized_parent->expr_name(), "element_at");
+
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(localized_parent->children()[0].get());
+    EXPECT_EQ(localized_slot->column_name(), "new_struct_column");
+    // The scan projection keeps the top-level file column id above, while the localized conjunct
+    // executes on the file-reader Block. The VSlotRef column id is therefore the block position of
+    // `new_struct_column` in this request, not the file schema id 10.
+    EXPECT_EQ(localized_slot->column_id(), 0);
+
+    const auto* localized_parent_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(localized_parent->data_type()).get());
+    ASSERT_EQ(localized_parent_type->get_elements().size(), 1);
+    EXPECT_EQ(localized_parent_type->get_element_name(0), "cc");
+}
+
+// Scenario: struct child access through a computed map/array parent is not localized as a file
+// conjunct, because the projected value struct can have a different physical child order.
+TEST(ColumnMapperScanRequestTest, MapValuesStructChildConjunctStaysTableLevel) {
+    const auto key_type = str();
+    const auto string_type = str();
+    const auto int_type = i32();
+
+    auto table_gender = field_id_col("gender", 17, string_type);
+    auto table_full_name = field_id_col("full_name", 7, string_type);
+    auto table_value = struct_col("value", 6, {table_gender, table_full_name});
+    auto table_map = map_col("new_map_column", 2, {table_value}, key_type, table_value.type);
+
+    auto file_key = field_id_col("key", 5, key_type, 0);
+    auto file_age = field_id_col("age", 8, int_type, 0);
+    auto file_full_name = field_id_col("full_name", 7, string_type, 1);
+    auto file_gender = field_id_col("gender", 17, string_type, 2);
+    auto file_value = struct_col("value", 6, {file_age, file_full_name, file_gender}, 1);
+    auto file_map =
+            map_col("new_map_column", 2, {file_key, file_value}, key_type, file_value.type, 1);
+
+    const auto map_slot = table_slot(0, 0, table_map.type, "new_map_column");
+    const auto values_expr = map_values(map_slot, table_value.type);
+    const auto first_value = array_element_at(values_expr, table_value.type, 1);
+    const auto full_name_expr = element_at(first_value, string_type, "full_name");
+    auto filter_expr = like_expr(full_name_expr, "J%");
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok());
+
+    EXPECT_TRUE(request.conjuncts.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+    ASSERT_FALSE(request.predicate_columns[0].project_all_children);
+    ASSERT_EQ(request.predicate_columns[0].children.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].children[0].local_id(), 1);
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: MAP_KEYS only reads map keys, but localizing it by wrapping the evolved file map slot
+// in CAST(file_map AS table_map) would still cast the old value struct to the new value struct.
+// Keep the conjunct table-level when the map value schema changed.
+TEST(ColumnMapperScanRequestTest, MapKeysConjunctWithEvolvedValueStructStaysTableLevel) {
+    const auto key_type = str();
+    const auto string_type = str();
+    const auto int_type = i32();
+
+    auto table_age = field_id_col("age", 8, int_type);
+    auto table_full_name = field_id_col("full_name", 7, string_type);
+    auto table_gender = field_id_col("gender", 17, string_type);
+    auto table_value = struct_col("value", 6, {table_age, table_full_name, table_gender});
+    auto table_key = field_id_col("key", 5, key_type);
+    auto table_map =
+            map_col("new_map_column", 2, {table_key, table_value}, key_type, table_value.type);
+
+    auto file_key = field_id_col("key", 5, key_type, 0);
+    auto file_name = field_id_col("name", 18, string_type, 0);
+    auto file_age = field_id_col("age", 8, int_type, 1);
+    auto file_value = struct_col("value", 6, {file_name, file_age}, 1);
+    auto file_map = map_col("map_column", 2, {file_key, file_value}, key_type, file_value.type, 1);
+
+    const auto map_slot = table_slot(0, 0, table_map.type, "new_map_column");
+    const auto keys_expr = map_keys(map_slot, key_type);
+    auto filter_expr = array_contains(
+            keys_expr, literal(key_type, Field::create_field<TYPE_STRING>("person5")));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok());
+
+    EXPECT_TRUE(request.conjuncts.empty());
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: an array element struct projection only contains missing/default children; the mapper
+// falls back to reading the full physical element so the reader never gets an empty projection.
+TEST(ColumnMapperScanRequestTest, ArrayStructOnlyMissingElementChildUsesFullFileProjection) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_element = struct_col("element", 0, {file_a, file_b}, 0);
+    auto file_array = array_col("xs", 10, file_element, 10);
+
+    auto missing_child = field_id_col("missing_child", 99, string_type);
+    auto table_element = struct_col("element", 0, {missing_child});
+    auto table_array = array_col("xs", 10, table_element);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(10));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_FALSE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: a map value struct projection only contains missing/default children; the mapper keeps
+// the map key/value shape and reads the full physical value struct instead of an empty value child.
+TEST(ColumnMapperScanRequestTest, MapValueStructOnlyMissingChildUsesFullValueProjection) {
+    const auto key_type = i32();
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto file_key = field_id_col("key", 0, key_type, 0);
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_value = struct_col("value", 1, {file_a, file_b}, 1);
+    auto file_map = map_col("m", 10, {file_key, file_value}, key_type, file_value.type, 10);
+
+    auto missing_child = field_id_col("missing_child", 99, string_type);
+    auto table_value = struct_col("value", 1, {missing_child});
+    auto table_map = map_col("m", 10, {table_value}, key_type, table_value.type);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_map}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(10));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 1);
+    EXPECT_TRUE(projection.children[0].project_all_children);
+    EXPECT_TRUE(projection.children[0].children.empty());
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_FALSE(mapper.mappings()[0].is_trivial);
+}
+
+// ----------------------------------------------------------------------
+// L1 complex schema evolution and split isolation.
+// These tests call the mapper repeatedly with different file schemas and
+// verify that split-local state is rebuilt instead of leaked.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperSchemaEvolutionTest, StructChildrenHandleMissingRenameReorderAndDroppedFields) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, string_type);
+    auto table_c = field_id_col("c", 3, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b, table_c});
+
+    auto v1_a = field_id_col("a", 1, int_type, 0);
+    auto v1_b = field_id_col("b", 2, string_type, 1);
+    auto file_v1 = struct_col("s", 10, {v1_a, v1_b}, 5);
+
+    auto v2_b = field_id_col("b", 2, string_type, 0);
+    auto v2_a = field_id_col("a", 1, int_type, 1);
+    auto v2_c = field_id_col("c", 3, int_type, 2);
+    auto file_v2 = struct_col("s", 10, {v2_b, v2_a, v2_c}, 8);
+
+    TableColumnMapper v1_mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(v1_mapper.create_mapping({table_struct}, {}, {file_v1}).ok());
+    FileScanRequest v1_request;
+    ASSERT_TRUE(v1_mapper.create_scan_request({}, {}, {table_struct}, &v1_request).ok());
+
+    const auto& v1_mapping = v1_mapper.mappings()[0];
+    ASSERT_EQ(v1_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(*v1_mapping.child_mappings[0].file_local_id, 0);
+    EXPECT_EQ(*v1_mapping.child_mappings[1].file_local_id, 1);
+    EXPECT_FALSE(v1_mapping.child_mappings[2].file_local_id.has_value());
+    ASSERT_EQ(v1_request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(v1_request.non_predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_TRUE(v1_request.non_predicate_columns[0].project_all_children);
+
+    TableColumnMapper v2_mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(v2_mapper.create_mapping({table_struct}, {}, {file_v2}).ok());
+    FileScanRequest v2_request;
+    ASSERT_TRUE(v2_mapper.create_scan_request({}, {}, {table_struct}, &v2_request).ok());
+
+    const auto& v2_mapping = v2_mapper.mappings()[0];
+    ASSERT_EQ(v2_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(*v2_mapping.child_mappings[0].file_local_id, 1);
+    EXPECT_EQ(*v2_mapping.child_mappings[1].file_local_id, 0);
+    EXPECT_EQ(*v2_mapping.child_mappings[2].file_local_id, 2);
+    ASSERT_EQ(v2_request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(v2_request.non_predicate_columns[0].column_id(), LocalColumnId(8));
+    EXPECT_TRUE(v2_request.non_predicate_columns[0].project_all_children);
+}
+
+TEST(ColumnMapperSchemaEvolutionTest, DroppedStructChildrenAreNotRead) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_struct = struct_col("s", 10, {table_a});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, string_type, 1);
+    auto file_c = field_id_col("c", 3, int_type, 2);
+    auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(5));
+    ASSERT_FALSE(projection.project_all_children);
+    EXPECT_EQ(projection_ids(projection.children), std::vector<int32_t>({0}));
+}
+
+TEST(ColumnMapperSchemaEvolutionTest, ReusedMapperClearsSplitLocalConstantsAndFileIds) {
+    const auto int_type = i32();
+    auto id = name_col("id", int_type);
+    auto added = name_col("added", int_type);
+    added.default_expr =
+            VExprContext::create_shared(literal(int_type, Field::create_field<TYPE_INT>(7)));
+    const std::vector<ColumnDefinition> table_schema = {id, added};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, {name_col("id", int_type, 0)}).ok());
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    EXPECT_EQ(*mapper.mappings()[0].file_local_id, 0);
+    expect_constant(mapper, mapper.mappings()[1], 1, int_type);
+
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {},
+                                      {name_col("id", int_type, 3), name_col("added", int_type, 4)})
+                        .ok());
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    EXPECT_EQ(*mapper.mappings()[0].file_local_id, 3);
+    EXPECT_EQ(*mapper.mappings()[1].file_local_id, 4);
+    EXPECT_TRUE(mapper.constant_map().empty());
+}
+
+// ----------------------------------------------------------------------
+// L2 cast-aware filter localization tests.
+// These tests belong to TableColumnMapper rather than Cast: they assert when the mapper builds
+// projection casts, rewrites table predicates to file-local slot casts, converts literals to the
+// current split's file type, and keeps repeated scan-request rewrites idempotent.
+// ----------------------------------------------------------------------
+
+// Scenario: table/file primitive types differ, so the visible mapping must build a cast projection.
+TEST_F(ColumnMapperCastTest, ColumnMapperBuildsCastProjectionForTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    FileScanRequest file_request;
+    status = mapper.create_scan_request({}, {}, projected_columns, &file_request);
+    ASSERT_TRUE(status.ok()) << status;
+    const auto& mapping = mapper.mappings()[0];
+    EXPECT_FALSE(mapping.is_trivial);
+    ASSERT_NE(mapping.projection, nullptr);
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 22}));
+    int result_column_id = -1;
+    status = prepare_open_execute(mapping.projection.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& result_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(result_column_id).column);
+    EXPECT_EQ(result_column.get_data()[0], 11);
+    EXPECT_EQ(result_column.get_data()[1], 22);
+
+    mapping.projection->close();
+}
+
+// Scenario: equivalent table/file types keep the mapping trivial and avoid unnecessary projection casts.
+TEST_F(ColumnMapperCastTest, ColumnMapperTreatsEquivalentTypesAsTrivial) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i32());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: a table predicate on a widened type is localized by casting the file slot to table type.
+TEST_F(ColumnMapperCastTest, ColumnMapperBuildsCastFilterForTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64ChildGreaterThanExpr>(15);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 1);
+    const auto& localized_child = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_child.get()), nullptr);
+    ASSERT_EQ(localized_child->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type));
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 22}));
+    auto* conjunct = file_request.conjuncts[0].get();
+    status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+
+    file_request.conjuncts[0]->close();
+}
+
+// Scenario: an already prepared table filter can still be cloned, rewritten, prepared, and opened as a file-local filter.
+TEST_F(ColumnMapperCastTest, ColumnMapperRepreparesRewrittenPreparedFilter) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto cast = Cast::create_shared(table_column.type);
+    cast->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(cast);
+    table_filter.global_indices = {GlobalIndex(0)};
+    status = table_filter.conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = table_filter.conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_expr.get()), nullptr);
+    ASSERT_EQ(localized_expr->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+
+    status = file_request.conjuncts[0]->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = file_request.conjuncts[0]->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+
+    file_request.conjuncts[0]->close();
+}
+
+// Scenario: slot-literal comparison rewrites the literal to the current file type when conversion is safe.
+TEST_F(ColumnMapperCastTest, ColumnMapperCastsLiteralForSlotLiteralPredicateTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::GT);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 2);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    const auto& localized_literal = localized_expr->children()[1];
+    EXPECT_TRUE(localized_literal->is_literal());
+    EXPECT_TRUE(localized_literal->data_type()->equals(*file_field.type));
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 22}));
+    auto* conjunct = file_request.conjuncts[0].get();
+    status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+
+    file_request.conjuncts[0]->close();
+}
+
+// Scenario: literal-slot comparison also rewrites the literal side and preserves operand order.
+TEST_F(ColumnMapperCastTest, ColumnMapperCastsLiteralForLiteralSlotPredicateTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::LT);
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 2);
+    const auto& localized_literal = localized_expr->children()[0];
+    EXPECT_TRUE(localized_literal->is_literal());
+    EXPECT_TRUE(localized_literal->data_type()->equals(*file_field.type));
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_expr->children()[1].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 22}));
+    auto* conjunct = file_request.conjuncts[0].get();
+    status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+
+    file_request.conjuncts[0]->close();
+}
+
+// Scenario: IN predicate literals are all rewritten to file type when every literal conversion is safe.
+TEST_F(ColumnMapperCastTest, ColumnMapperCastsInPredicateLiteralsForTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = create_in_predicate();
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(22)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 3);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_expr->children()[2]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[2]->data_type()->equals(*file_field.type));
+}
+
+// Scenario: IN predicate falls back to casting the file slot when any literal cannot be converted safely.
+TEST_F(ColumnMapperCastTest, ColumnMapperFallsBackToSlotCastWhenInPredicateLiteralRewriteFails) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", str());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = create_in_predicate();
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_STRING>("10")));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_STRING>("bad")));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 3);
+    const auto& localized_child = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_child.get()), nullptr);
+    ASSERT_EQ(localized_child->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type));
+    EXPECT_TRUE(localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*table_column.type));
+    EXPECT_TRUE(localized_expr->children()[2]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[2]->data_type()->equals(*table_column.type));
+}
+
+// Scenario: split-local IN literal rewrites do not mutate the original table filter across different file schemas.
+TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotLeakRewrittenInPredicateLiteralAcrossSplits) {
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto predicate = create_in_predicate();
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(22)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    auto int_file_field = name_col("value", i32(), 0);
+    TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok());
+    FileScanRequest int_request;
+    ASSERT_TRUE(int_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &int_request,
+                                             &state)
+                        .ok());
+    ASSERT_EQ(int_request.conjuncts.size(), 1);
+    const auto& int_localized_expr = int_request.conjuncts[0]->root();
+    ASSERT_EQ(int_localized_expr->get_num_children(), 3);
+    EXPECT_TRUE(int_localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(int_localized_expr->children()[1]->data_type()->equals(*int_file_field.type));
+    EXPECT_TRUE(int_localized_expr->children()[2]->is_literal());
+    EXPECT_TRUE(int_localized_expr->children()[2]->data_type()->equals(*int_file_field.type));
+
+    auto bigint_file_field = name_col("value", i64(), 0);
+    TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok());
+    FileScanRequest bigint_request;
+    ASSERT_TRUE(bigint_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &bigint_request,
+                                             &state)
+                        .ok());
+    ASSERT_EQ(bigint_request.conjuncts.size(), 1);
+    const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root();
+    ASSERT_EQ(bigint_localized_expr->get_num_children(), 3);
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(bigint_localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type));
+    EXPECT_TRUE(bigint_localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(bigint_localized_expr->children()[1]->data_type()->equals(*bigint_file_field.type));
+    EXPECT_TRUE(bigint_localized_expr->children()[2]->is_literal());
+    EXPECT_TRUE(bigint_localized_expr->children()[2]->data_type()->equals(*bigint_file_field.type));
+}
+
+// Scenario: binary predicate falls back to casting the file slot when literal conversion fails.
+TEST_F(ColumnMapperCastTest, ColumnMapperFallsBackToSlotCastWhenLiteralRewriteFails) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", str());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::GT);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_STRING>("bad")));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 2);
+    const auto& localized_child = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_child.get()), nullptr);
+    ASSERT_EQ(localized_child->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type));
+    EXPECT_TRUE(localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*table_column.type));
+}
+
+// Scenario: split-local binary literal rewrite does not leak into a later split with a different file type.
+TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotLeakRewrittenLiteralAcrossSplits) {
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::GT);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    auto int_file_field = name_col("value", i32(), 0);
+    TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok());
+    FileScanRequest int_request;
+    ASSERT_TRUE(int_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &int_request,
+                                             &state)
+                        .ok());
+    ASSERT_EQ(int_request.conjuncts.size(), 1);
+    const auto& int_localized_expr = int_request.conjuncts[0]->root();
+    ASSERT_EQ(int_localized_expr->get_num_children(), 2);
+    EXPECT_TRUE(int_localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(int_localized_expr->children()[1]->data_type()->equals(*int_file_field.type));
+
+    auto bigint_file_field = name_col("value", i64(), 0);
+    TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok());
+    FileScanRequest bigint_request;
+    ASSERT_TRUE(bigint_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &bigint_request,
+                                             &state)
+                        .ok());
+    ASSERT_EQ(bigint_request.conjuncts.size(), 1);
+    const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root();
+    ASSERT_EQ(bigint_localized_expr->get_num_children(), 2);
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(bigint_localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type));
+    EXPECT_TRUE(bigint_localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(bigint_localized_expr->children()[1]->data_type()->equals(*bigint_file_field.type));
+}
+
+// Scenario: an explicit user/table cast is preserved while the underlying slot is localized correctly.
+TEST_F(ColumnMapperCastTest, ColumnMapperKeepsExplicitSlotCastInSlotLiteralPredicate) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto explicit_cast = Cast::create_shared(std::make_shared<DataTypeString>());
+    explicit_cast->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::GT);
+    predicate->add_child(explicit_cast);
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 2);
+    const auto& localized_cast = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_cast.get()), nullptr);
+    EXPECT_TRUE(localized_cast->data_type()->equals(DataTypeString()));
+    ASSERT_EQ(localized_cast->get_num_children(), 1);
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_cast->children()[0].get()), nullptr);
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(localized_cast->children()[0]->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+}
+
+// Scenario: repeated scan request creation stays idempotent and does not wrap Cast(Cast(slot)).
+TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotNestCastFilterAcrossScanRequests) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64ChildGreaterThanExpr>(15);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest first_request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, projected_columns, &first_request,
+                                           &state)
+                        .ok());
+    FileScanRequest second_request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, projected_columns, &second_request,
+                                           &state)
+                        .ok());
+
+    ASSERT_EQ(second_request.conjuncts.size(), 1);
+    const auto& localized_expr = second_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 1);
+    const auto& localized_child = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_child.get()), nullptr);
+    ASSERT_EQ(localized_child->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+}
+
+// Scenario: a filter cloned from a previous cast rewrite is adjusted to the next split's matching file type.
+TEST_F(ColumnMapperCastTest, ColumnMapperRewritesPreviousCastFilterToMatchingSplitType) {
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto predicate = std::make_shared<Int64ChildGreaterThanExpr>(15);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    auto int_file_field = name_col("value", i32(), 0);
+
+    TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok());
+    FileScanRequest int_request;
+    ASSERT_TRUE(int_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &int_request,
+                                             &state)
+                        .ok());
+
+    const auto& int_localized_expr = int_request.conjuncts[0]->root();
+    ASSERT_EQ(int_localized_expr->get_num_children(), 1);
+    ASSERT_NE(dynamic_cast<const Cast*>(int_localized_expr->children()[0].get()), nullptr);
+
+    auto bigint_file_field = name_col("value", i64(), 0);
+
+    TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok());
+    FileScanRequest bigint_request;
+    ASSERT_TRUE(bigint_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &bigint_request,
+                                             &state)
+                        .ok());
+
+    const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root();
+    ASSERT_EQ(bigint_localized_expr->get_num_children(), 1);
+    const auto& bigint_localized_child = bigint_localized_expr->children()[0];
+    const auto* localized_slot = assert_cast<const VSlotRef*>(bigint_localized_child.get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type));
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt64>({11, 22}));
+    auto* conjunct = bigint_request.conjuncts[0].get();
+    auto status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+    conjunct->close();
+}
+
+// Scenario: localized slot keeps table slot id while column id tracks the file block position.
+TEST_F(ColumnMapperCastTest, ColumnMapperKeepsTableSlotIdWhenFileBlockPositionChanges) {
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i64(), 10);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, {file_field}).ok());
+
+    auto predicate = std::make_shared<Int64ChildGreaterThanExpr>(15);
+    predicate->add_child(VSlotRef::create_shared(7, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest first_request;
+    ASSERT_TRUE(mapper.localize_filters({table_filter}, {}, &first_request, &state).ok());
+    ASSERT_EQ(first_request.conjuncts.size(), 1);
+    const auto* first_slot =
+            assert_cast<const VSlotRef*>(first_request.conjuncts[0]->root()->children()[0].get());
+    EXPECT_EQ(first_slot->slot_id(), 7);
+    EXPECT_EQ(first_slot->column_id(), 0);
+
+    FileScanRequest second_request;
+    second_request.local_positions.emplace(LocalColumnId(9), LocalIndex(0));
+    second_request.local_positions.emplace(LocalColumnId(10), LocalIndex(1));
+    second_request.non_predicate_columns.push_back(LocalColumnIndex::top_level(LocalColumnId(9)));
+    ASSERT_TRUE(mapper.localize_filters({table_filter}, {}, &second_request, &state).ok());
+    ASSERT_EQ(second_request.conjuncts.size(), 1);
+    const auto* second_slot =
+            assert_cast<const VSlotRef*>(second_request.conjuncts[0]->root()->children()[0].get());
+    EXPECT_EQ(second_slot->slot_id(), 7);
+    EXPECT_EQ(second_slot->column_id(), 1);
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt64>({100, 100}));
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt64>({11, 22}));
+    auto* conjunct = second_request.conjuncts[0].get();
+    auto status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+    conjunct->close();
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/delimited_text/csv_reader_test.cpp b/be/test/format_v2/delimited_text/csv_reader_test.cpp
new file mode 100644
index 00000000000000..7c787de7f8c09a
--- /dev/null
+++ b/be/test/format_v2/delimited_text/csv_reader_test.cpp
@@ -0,0 +1,1070 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/csv_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+
+#include "common/consts.h"
+#include "common/object_pool.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/column_mapper.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+#include "testutil/desc_tbl_builder.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format::csv {
+namespace {
+
+TFileScanRangeParams csv_scan_params() {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN);
+    params.__set_file_type(TFileType::FILE_LOCAL);
+    TFileAttributes attributes;
+    TFileTextScanRangeParams text_params;
+    text_params.__set_column_separator(",");
+    text_params.__set_line_delimiter("\n");
+    attributes.__set_text_params(std::move(text_params));
+    attributes.__set_header_type(BeConsts::CSV_WITH_NAMES);
+    params.__set_file_attributes(std::move(attributes));
+    params.__set_column_idxs({0, 1, 2});
+    return params;
+}
+
+std::unique_ptr<io::FileDescription> file_description(const std::string& path,
+                                                      int64_t range_start_offset = 0,
+                                                      int64_t range_size = -1) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->range_start_offset = range_start_offset;
+    desc->range_size = range_size;
+    desc->file_size = static_cast<int64_t>(std::filesystem::file_size(path));
+    return desc;
+}
+
+std::unique_ptr<io::FileDescription> unknown_size_file_description(const std::string& path) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->range_start_offset = 0;
+    desc->range_size = -1;
+    desc->file_size = -1;
+    return desc;
+}
+
+std::vector<SlotDescriptor*> build_slots(ObjectPool* pool) {
+    DescriptorTblBuilder builder(pool);
+    builder.declare_tuple()
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeInt32>()), "id"}
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeString>()),
+                                           "name"}
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeInt32>()),
+                                           "score"};
+    auto* desc_tbl = builder.build();
+    return desc_tbl->get_tuple_descriptor(0)->slots();
+}
+
+SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type,
+                               const std::string& name) {
+    TSlotDescriptor slot_desc;
+    slot_desc.__set_id(slot_id);
+    slot_desc.__set_parent(0);
+    slot_desc.__set_slotType(type->to_thrift());
+    slot_desc.__set_columnPos(slot_idx);
+    slot_desc.__set_byteOffset(0);
+    slot_desc.__set_nullIndicatorByte(slot_idx / 8);
+    slot_desc.__set_nullIndicatorBit(slot_idx % 8);
+    slot_desc.__set_slotIdx(slot_idx);
+    slot_desc.__set_isMaterialized(true);
+    slot_desc.__set_colName(name);
+    return pool->add(new SlotDescriptor(slot_desc));
+}
+
+std::vector<SlotDescriptor*> build_struct_slots(ObjectPool* pool) {
+    const auto nullable_int = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int, nullable_int}, Strings {"a", "b"}));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, struct_type, "s"),
+            make_test_slot(pool, 2, 2, make_nullable(std::make_shared<DataTypeInt32>()), "score")};
+}
+
+std::vector<SlotDescriptor*> build_nested_complex_slots(ObjectPool* pool) {
+    const auto nullable_int = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto nullable_string = make_nullable(std::make_shared<DataTypeString>());
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int, nullable_string}, Strings {"a", "b"}));
+    const auto array_type = make_nullable(std::make_shared<DataTypeArray>(struct_type));
+    const auto map_type =
+            make_nullable(std::make_shared<DataTypeMap>(nullable_string, struct_type));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, array_type, "xs"),
+            make_test_slot(pool, 2, 2, map_type, "kv")};
+}
+
+std::vector<SlotDescriptor*> build_char_varchar_slots(ObjectPool* pool) {
+    const auto nullable_char3 =
+            make_nullable(std::make_shared<DataTypeString>(3, PrimitiveType::TYPE_CHAR));
+    const auto nullable_varchar4 =
+            make_nullable(std::make_shared<DataTypeString>(4, PrimitiveType::TYPE_VARCHAR));
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_char3, nullable_varchar4}, Strings {"city", "country"}));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, nullable_char3, "city"),
+            make_test_slot(pool, 2, 2, struct_type, "region")};
+}
+
+std::unique_ptr<CsvReader> create_reader(
+        const std::string& path, TFileScanRangeParams* params,
+        const std::vector<SlotDescriptor*>& slots, MockRuntimeState* state, RuntimeProfile* profile,
+        int64_t range_start_offset = 0, int64_t range_size = -1,
+        TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN,
+        std::shared_ptr<io::IOContext> io_ctx = nullptr) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(path, range_start_offset, range_size);
+    auto reader = std::make_unique<CsvReader>(system_properties, desc, std::move(io_ctx), profile,
+                                              params, slots, range_compress_type);
+    EXPECT_TRUE(reader->init(state).ok());
+    return reader;
+}
+
+std::unique_ptr<CsvReader> create_unknown_size_reader(const std::string& path,
+                                                      TFileScanRangeParams* params,
+                                                      const std::vector<SlotDescriptor*>& slots,
+                                                      MockRuntimeState* state,
+                                                      RuntimeProfile* profile) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = unknown_size_file_description(path);
+    auto reader =
+            std::make_unique<CsvReader>(system_properties, desc, nullptr, profile, params, slots);
+    EXPECT_TRUE(reader->init(state).ok());
+    return reader;
+}
+
+Block make_block(const std::vector<ColumnDefinition>& schema,
+                 const std::vector<int32_t>& local_ids) {
+    Block block;
+    for (const auto local_id : local_ids) {
+        const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) {
+            return column.local_id == local_id;
+        });
+        EXPECT_TRUE(it != schema.end());
+        block.insert({it->type->create_column(), it->type, it->name});
+    }
+    return block;
+}
+
+std::string nullable_string_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    return nested.get_data_at(row).to_string();
+}
+
+bool is_null_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    return nullable.is_null_at(row);
+}
+
+int32_t nullable_int_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+int32_t nullable_struct_int_child_at(const IColumn& column, size_t child_index, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& struct_column = assert_cast<const ColumnStruct&>(nullable.get_nested_column());
+    const auto& child_nullable =
+            assert_cast<const ColumnNullable&>(struct_column.get_column(child_index));
+    const auto& nested = assert_cast<const ColumnInt32&>(child_nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+int64_t counter_value(RuntimeProfile* profile, const std::string& name) {
+    auto* counter = profile->get_counter(name);
+    EXPECT_NE(counter, nullptr) << name;
+    return counter == nullptr ? 0 : counter->value();
+}
+
+class NullableIntGreaterThanExpr final : public VExpr {
+public:
+    NullableIntGreaterThanExpr(size_t block_position, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& data = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    !nullable.is_null_at(source_row) && data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<NullableIntGreaterThanExpr>(_block_position, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    int32_t _value;
+    const std::string _name = "NullableIntGreaterThanExpr";
+};
+
+class StructIntChildGreaterThanExpr final : public VExpr {
+public:
+    StructIntChildGreaterThanExpr(size_t block_position, size_t child_index, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _child_index(child_index),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& struct_column = assert_cast<const ColumnStruct&>(nullable.get_nested_column());
+        const auto& child_nullable =
+                assert_cast<const ColumnNullable&>(struct_column.get_column(_child_index));
+        const auto& child_data =
+                assert_cast<const ColumnInt32&>(child_nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& data = result->get_data();
+        data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            data[row] = !nullable.is_null_at(source_row) &&
+                        !child_nullable.is_null_at(source_row) &&
+                        child_data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<StructIntChildGreaterThanExpr>(_block_position,
+                                                                       _child_index, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    size_t _child_index;
+    int32_t _value;
+    const std::string _name = "StructIntChildGreaterThanExpr";
+};
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto context = VExprContext::create_shared(expr);
+    auto status = context->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = context->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return context;
+}
+
+class CsvV2ReaderTest : public testing::Test {
+public:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_csv_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "reader.csv").string();
+        std::ofstream output(_file_path, std::ios::binary);
+        output << "id,name,score\n";
+        output << "1,alice,10\n";
+        output << "2,bob,20\n";
+        output.close();
+        _slots = build_slots(&_pool);
+        _params = csv_scan_params();
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+protected:
+    ObjectPool _pool;
+    MockRuntimeState _state;
+    RuntimeProfile _profile {"csv_v2_reader_test"};
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+    std::vector<SlotDescriptor*> _slots;
+    TFileScanRangeParams _params;
+};
+
+// Scenario: CSV v2 exposes FE-provided file slots as nullable file-local schema using column_idxs
+// as CSV field ordinals.
+TEST_F(CsvV2ReaderTest, SchemaUsesSlotTypesAndColumnIdxs) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    EXPECT_EQ(schema[0].name, "id");
+    EXPECT_EQ(schema[0].local_id, 0);
+    EXPECT_TRUE(schema[0].type->is_nullable());
+    EXPECT_EQ(schema[1].name, "name");
+    EXPECT_EQ(schema[1].local_id, 1);
+    EXPECT_TRUE(schema[1].type->is_nullable());
+}
+
+// Scenario: FE slot types for CSV are table target types. CHAR/VARCHAR length is not stored in the
+// CSV file, so the file schema must expose bounded strings as unbounded STRING. Otherwise
+// TableReader believes the file value already satisfies the table length and skips truncation.
+TEST_F(CsvV2ReaderTest, SchemaTreatsCharVarcharSlotsAsUnboundedFileStrings) {
+    auto slots = build_char_varchar_slots(&_pool);
+    auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    const auto city_type = remove_nullable(schema[1].type);
+    EXPECT_EQ(city_type->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(assert_cast<const DataTypeString*>(city_type.get())->len(), -1);
+
+    const auto region_type = remove_nullable(schema[2].type);
+    ASSERT_EQ(region_type->get_primitive_type(), TYPE_STRUCT);
+    const auto* region_struct = assert_cast<const DataTypeStruct*>(region_type.get());
+    ASSERT_EQ(region_struct->get_elements().size(), 2);
+    EXPECT_EQ(remove_nullable(region_struct->get_element(0))->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(region_struct->get_element(1))->get_primitive_type(), TYPE_STRING);
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(remove_nullable(schema[2].children[0].type)->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(schema[2].children[1].type)->get_primitive_type(), TYPE_STRING);
+}
+
+// Scenario: CSV is row-oriented and cannot lazy-read predicate columns separately. The reader
+// declares that capability by choosing MaterializedColumnMapper itself.
+TEST_F(CsvV2ReaderTest, CreatesMaterializedColumnMapper) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto mapper = reader->create_column_mapper({.mode = TableColumnMappingMode::BY_NAME});
+
+    ASSERT_NE(dynamic_cast<MaterializedColumnMapper*>(mapper.get()), nullptr);
+}
+
+// Scenario: CSV v2 exposes delimited-text profile counters for read, parse, deserialize, and
+// file-local conjunct filtering, so scanner profiles can explain where row-reader time is spent.
+TEST_F(CsvV2ReaderTest, ProfileCountersTrackReadParseDeserializeAndFilter) {
+    const auto profile_path = (_test_dir / "profile.csv").string();
+    std::ofstream output(profile_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "\n";
+    output << "1,alice,10\n";
+    output << "2,bob,20\n";
+    output.close();
+
+    _state._query_options.__set_read_csv_empty_line_as_null(true);
+    auto io_ctx = std::make_shared<io::IOContext>();
+    auto reader = create_reader(profile_path, &_params, _slots, &_state, &_profile, 0, -1,
+                                TFileCompressType::UNKNOWN, io_ctx);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(1));
+    request->conjuncts = {
+            prepared_conjunct(&_state, std::make_shared<NullableIntGreaterThanExpr>(1, 15))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2);
+
+    EXPECT_NE(_profile.get_counter("OpenFileTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("CreateLineReaderTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("ReadLineTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("SplitLineTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("DeserializeTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("ConjunctFilterTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("DeleteConjunctFilterTime"), nullptr);
+    EXPECT_EQ(counter_value(&_profile, "RawLinesRead"), 3);
+    EXPECT_EQ(counter_value(&_profile, "RowsReadBeforeFilter"), 3);
+    EXPECT_EQ(counter_value(&_profile, "RowsFilteredByConjunct"), 2);
+    EXPECT_EQ(io_ctx->predicate_filtered_rows, 2);
+    EXPECT_EQ(counter_value(&_profile, "RowsFilteredByDeleteConjunct"), 0);
+    EXPECT_EQ(counter_value(&_profile, "RowsReturned"), 1);
+    EXPECT_EQ(counter_value(&_profile, "EmptyLinesRead"), 1);
+    EXPECT_EQ(counter_value(&_profile, "SkippedLines"), 1);
+    EXPECT_EQ(counter_value(&_profile, "CellsDeserialized"), 6);
+}
+
+// Scenario: CSV has no embedded nested schema, but TableColumnMapper still needs semantic children
+// for complex table columns. The reader synthesizes ARRAY/MAP/STRUCT children from the slot type
+// while keeping the top-level local id as the CSV field ordinal from column_idxs.
+TEST_F(CsvV2ReaderTest, SchemaSynthesizesComplexChildrenForColumnMapper) {
+    _params.__set_column_idxs({4, 7, 9});
+    auto slots = build_nested_complex_slots(&_pool);
+    auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile);
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    EXPECT_EQ(schema[1].name, "xs");
+    EXPECT_EQ(schema[1].local_id, 7);
+    ASSERT_EQ(schema[1].children.size(), 1);
+    EXPECT_EQ(schema[1].children[0].name, "element");
+    EXPECT_EQ(schema[1].children[0].local_id, 0);
+    ASSERT_EQ(schema[1].children[0].children.size(), 2);
+    EXPECT_EQ(schema[1].children[0].children[0].name, "a");
+    EXPECT_EQ(schema[1].children[0].children[0].local_id, 0);
+    EXPECT_EQ(schema[1].children[0].children[1].name, "b");
+    EXPECT_EQ(schema[1].children[0].children[1].local_id, 1);
+
+    EXPECT_EQ(schema[2].name, "kv");
+    EXPECT_EQ(schema[2].local_id, 9);
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(schema[2].children[0].name, "key");
+    EXPECT_EQ(schema[2].children[0].local_id, 0);
+    EXPECT_EQ(schema[2].children[1].name, "value");
+    EXPECT_EQ(schema[2].children[1].local_id, 1);
+    ASSERT_EQ(schema[2].children[1].children.size(), 2);
+    EXPECT_EQ(schema[2].children[1].children[0].name, "a");
+    EXPECT_EQ(schema[2].children[1].children[1].name, "b");
+}
+
+// Scenario: CSV v2 honors FileScanRequest local positions, so TableReader can request a subset of
+// CSV fields in an order different from the physical CSV field order.
+TEST_F(CsvV2ReaderTest, ReadsRequestedColumnsInFileLocalBlockOrder) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice");
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "bob");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 1), 2);
+}
+
+// Scenario: CSV v2 defaults to the same strict UTF-8 validation as the old query reader. Invalid
+// bytes should fail fast unless the scan params explicitly disable text UTF-8 validation.
+TEST_F(CsvV2ReaderTest, InvalidUtf8FailsWhenValidationEnabled) {
+    const auto invalid_path = (_test_dir / "invalid_utf8.csv").string();
+    std::ofstream output(invalid_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,";
+    output.write("\xff", 1);
+    output << ",10\n";
+    output.close();
+
+    auto reader = create_reader(invalid_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    const auto status = reader->get_block(&block, &rows, &eof);
+    EXPECT_FALSE(status.ok());
+    EXPECT_TRUE(status.to_string().find("Only support csv data in utf8 codec") != std::string::npos)
+            << status;
+}
+
+// Scenario: external CSV scans can opt out of UTF-8 validation through
+// `enable_text_validate_utf8=false`. In that mode the reader preserves the original bytes instead
+// of rejecting the row.
+TEST_F(CsvV2ReaderTest, DisableTextValidateUtf8ReadsRawBytes) {
+    const auto invalid_path = (_test_dir / "invalid_utf8_disabled.csv").string();
+    std::ofstream output(invalid_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,";
+    output.write("\xff", 1);
+    output << ",10\n";
+    output.close();
+
+    _params.file_attributes.__set_enable_text_validate_utf8(false);
+    auto reader = create_reader(invalid_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), std::string("\xff", 1));
+}
+
+// Scenario: file TVF can keep the logical CSV format as FORMAT_CSV_PLAIN and put the actual gzip
+// compression on the scan range. CSV v2 must honor that range-level compression before validating
+// UTF-8; otherwise the gzip bytes are misread as CSV text.
+TEST_F(CsvV2ReaderTest, RangeCompressTypeGzipDecompressesPlainCsvFormat) {
+    const auto gz_path = (_test_dir / "reader.csv.gz").string();
+    static constexpr unsigned char gzipped_csv[] = {
+            0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcb, 0x4c,
+            0xd1, 0xc9, 0x4b, 0xcc, 0x4d, 0xd5, 0x29, 0x4e, 0xce, 0x2f, 0x4a, 0xe5,
+            0x32, 0xd4, 0x49, 0xcc, 0xc9, 0x4c, 0x4e, 0xd5, 0x31, 0x34, 0xe0, 0x02,
+            0x00, 0x0b, 0xed, 0x5c, 0xa2, 0x19, 0x00, 0x00, 0x00};
+    std::ofstream output(gz_path, std::ios::binary);
+    output.write(reinterpret_cast<const char*>(gzipped_csv), sizeof(gzipped_csv));
+    output.close();
+
+    _params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN);
+    _params.__isset.compress_type = false;
+    auto reader = create_reader(gz_path, &_params, _slots, &_state, &_profile, 0, -1,
+                                TFileCompressType::GZ);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "alice");
+}
+
+// Scenario: FE column_idxs define the CSV field ordinal for each physical file slot. The mapping
+// can be non-identity when FE reorders projected file slots, so the reader must use the local id
+// from FileScanRequest instead of the slot vector position.
+TEST_F(CsvV2ReaderTest, ColumnIdxsMapSlotsToCsvOrdinals) {
+    const auto remap_path = (_test_dir / "remapped.csv").string();
+    std::ofstream output(remap_path, std::ios::binary);
+    output << "name,score,id\n";
+    output << "alice,10,1\n";
+    output.close();
+
+    _params.__set_column_idxs({2, 0, 1});
+    auto reader = create_reader(remap_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    EXPECT_EQ(schema[0].name, "id");
+    EXPECT_EQ(schema[0].local_id, 2);
+    EXPECT_EQ(schema[1].name, "name");
+    EXPECT_EQ(schema[1].local_id, 0);
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2)),
+                                      LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {2, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "alice");
+}
+
+// Scenario: CSV stores one complex column as one text field, so v2 must read the whole struct
+// field before evaluating a file-local predicate on one child. This covers `SELECT s.a WHERE
+// s.b > 10` style scans after CsvReader's MaterializedColumnMapper has requested the full
+// top-level `s`.
+TEST_F(CsvV2ReaderTest, FullStructColumnSupportsChildConjunctFiltering) {
+    const auto complex_path = (_test_dir / "complex.csv").string();
+    std::ofstream output(complex_path, std::ios::binary);
+    output << "id|s|score\n";
+    output << "1|{\"a\": 11, \"b\": 5}|10\n";
+    output << "2|{\"a\": 22, \"b\": 20}|20\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_column_separator("|");
+    _params.__set_column_idxs({0, 1, 2});
+    auto slots = build_struct_slots(&_pool);
+    auto reader = create_reader(complex_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->conjuncts = {prepared_conjunct(
+            &_state, std::make_shared<StructIntChildGreaterThanExpr>(
+                             /*block_position=*/0, /*child_index=*/1, /*value=*/10))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 0, 0), 22);
+    EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 1, 0), 20);
+}
+
+// Scenario: a table-level scan can need only partition/default columns, leaving the CSV
+// FileScanRequest with no file-local columns. The reader must still report the number of rows read.
+TEST_F(CsvV2ReaderTest, EmptyFileLocalProjectionStillReportsRows) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    Block block;
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_EQ(rows, 2);
+    EXPECT_FALSE(eof);
+}
+
+// Scenario: stream-load/http_stream inputs do not have a known split size or file size. A first
+// split must still read until EOF instead of rejecting the request before opening the stream.
+TEST_F(CsvV2ReaderTest, UnknownFirstSplitSizeReadsUntilEof) {
+    auto reader = create_unknown_size_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 1), "bob");
+}
+
+// Scenario: stream load/http_stream CSV input is not backed by a filesystem. If TableReader fails
+// to preserve the stream load id, the v2 reader should report that directly instead of calling the
+// generic FileFactory path and returning "unsupported file reader type: 2".
+TEST_F(CsvV2ReaderTest, StreamInputRequiresLoadIdBeforeOpeningPipe) {
+    _params.__set_file_type(TFileType::FILE_STREAM);
+    auto reader = create_unknown_size_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    const auto status = reader->open(request);
+
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("stream reader requires load id"), std::string::npos)
+            << status;
+}
+
+// Scenario: CSV has no footer row count, so v2 COUNT pushdown scans the split and returns the
+// counted row count through FileAggregateResult.
+TEST_F(CsvV2ReaderTest, CountAggregateScansRows) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::COUNT;
+    FileAggregateResult aggregate_result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+    EXPECT_EQ(aggregate_result.count, 2);
+}
+
+// Scenario: CSV v2 parses enclosed fields itself instead of delegating to the old CsvReader. A
+// separator inside an enclosed string must stay inside the same CSV field.
+TEST_F(CsvV2ReaderTest, EnclosedFieldKeepsSeparatorInsideStringValue) {
+    const auto quoted_path = (_test_dir / "quoted.csv").string();
+    std::ofstream output(quoted_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,\"alice,team\",10\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_enclose('"');
+    _params.file_attributes.text_params.__set_escape('\\');
+    auto reader = create_reader(quoted_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice,team");
+}
+
+// Scenario: when the CSV row has fewer fields than the FE-provided file slot list, v2 fills the
+// missing requested field with NULL instead of failing or shifting later columns.
+TEST_F(CsvV2ReaderTest, MissingRequestedFieldUsesNullFormat) {
+    const auto missing_path = (_test_dir / "missing.csv").string();
+    std::ofstream output(missing_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,alice\n";
+    output.close();
+
+    auto reader = create_reader(missing_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+}
+
+// Scenario: the first line may contain UTF-8 BOM and CSV_WITH_NAMES_AND_TYPES has two header
+// records. Both must be skipped before materializing the first data row.
+TEST_F(CsvV2ReaderTest, HeaderNamesAndTypesSkipsTwoLinesAndBom) {
+    const auto header_path = (_test_dir / "header_names_types.csv").string();
+    std::ofstream output(header_path, std::ios::binary);
+    output.write("\xEF\xBB\xBF", 3);
+    output << "id,name,score\n";
+    output << "INT,STRING,INT\n";
+    output << "7,carol,70\n";
+    output.close();
+
+    _params.file_attributes.__set_header_type(BeConsts::CSV_WITH_NAMES_AND_TYPES);
+    auto reader = create_reader(header_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 7);
+}
+
+// Scenario: when the first returned data line starts with UTF-8 BOM, CSV v2 strips the BOM before
+// passing the cell to the serde. This matters for headerless files whose first column is numeric.
+TEST_F(CsvV2ReaderTest, BomIsRemovedFromFirstDataLineWithoutHeader) {
+    const auto bom_path = (_test_dir / "bom_data.csv").string();
+    std::ofstream output(bom_path, std::ios::binary);
+    output.write("\xEF\xBB\xBF", 3);
+    output << "5,bom,50\n";
+    output.close();
+
+    _params.file_attributes.__isset.header_type = false;
+    auto reader = create_reader(bom_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 5);
+}
+
+// Scenario: when FE does not set header_type, CSV v2 must honor skip_lines exactly as the old
+// reader does.
+TEST_F(CsvV2ReaderTest, SkipLinesUsedWhenHeaderTypeUnset) {
+    const auto skip_path = (_test_dir / "skip_lines.csv").string();
+    std::ofstream output(skip_path, std::ios::binary);
+    output << "skip me\n";
+    output << "skip me too\n";
+    output << "3,dan,30\n";
+    output.close();
+
+    _params.file_attributes.__isset.header_type = false;
+    _params.file_attributes.__set_skip_lines(2);
+    auto reader = create_reader(skip_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 3);
+}
+
+// Scenario: empty physical lines are skipped by default, but read_csv_empty_line_as_null turns one
+// empty line into one all-null logical row.
+TEST_F(CsvV2ReaderTest, EmptyLineAsNullWhenQueryOptionEnabled) {
+    const auto empty_line_path = (_test_dir / "empty_line.csv").string();
+    std::ofstream output(empty_line_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "\n";
+    output << "4,erin,40\n";
+    output.close();
+
+    _state._query_options.__set_read_csv_empty_line_as_null(true);
+    auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 1), 4);
+}
+
+// Scenario: FE-provided CSV text parameters define NULL semantics. Explicit null_format and
+// empty_field_as_null should both produce nullable values without throwing serde errors.
+TEST_F(CsvV2ReaderTest, NullFormatAndEmptyFieldAsNullProduceNullableValues) {
+    const auto null_path = (_test_dir / "null_format.csv").string();
+    std::ofstream output(null_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,NULL,\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_null_format("NULL");
+    _params.file_attributes.text_params.__set_empty_field_as_null(true);
+    auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_TRUE(is_null_at(*block.get_by_position(1).column, 0));
+}
+
+// Scenario: OpenCSV keeps an empty field as an empty string when empty_field_as_null is false,
+// even if FE passes an empty null_format. This differs from Hive text serde, where an empty
+// serialization.null.format is a real NULL marker.
+TEST_F(CsvV2ReaderTest, EmptyNullFormatKeepsCsvEmptyFieldAsEmptyString) {
+    const auto null_path = (_test_dir / "empty_null_format.csv").string();
+    std::ofstream output(null_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,alice,10\n";
+    output << "2,,20\n";
+    output << "3,NULL,30\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_null_format("");
+    _params.file_attributes.text_params.__set_empty_field_as_null(false);
+    auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 3);
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 1));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "");
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 2));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 2), "NULL");
+}
+
+// Scenario: a non-first split starts inside a record. CSV v2 pre-reads enough delimiter bytes and
+// skips the partial first line so the split begins at the next complete row.
+TEST_F(CsvV2ReaderTest, NonFirstSplitSkipsPartialFirstRecord) {
+    const auto split_path = (_test_dir / "split.csv").string();
+    std::ofstream output(split_path, std::ios::binary);
+    output << "1,skip,10\n";
+    output << "2,bob,20\n";
+    output.close();
+
+    _params.file_attributes.__isset.header_type = false;
+    auto reader = create_reader(split_path, &_params, _slots, &_state, &_profile,
+                                /*range_start_offset=*/3);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2);
+}
+
+// Scenario: compressed CSV cannot be split at arbitrary byte offsets because the decompressor needs
+// the stream from the beginning. V2 should reject such a split before constructing the line reader.
+TEST_F(CsvV2ReaderTest, NonFirstCompressedSplitReturnsError) {
+    _params.__set_format_type(TFileFormatType::FORMAT_CSV_GZ);
+    _params.file_attributes.__isset.header_type = false;
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile,
+                                /*range_start_offset=*/1);
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    EXPECT_FALSE(reader->open(request).ok());
+}
+
+// Scenario: FileScanRequest is a TableReader-to-FileReader contract. Unknown CSV ordinals,
+// out-of-range block positions, and sparse block-position maps must fail during reader open.
+TEST_F(CsvV2ReaderTest, InvalidScanRequestReturnsError) {
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(99))};
+        request->local_positions.emplace(LocalColumnId(99), LocalIndex(0));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+        request->local_positions.emplace(LocalColumnId(0), LocalIndex(2));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                          LocalColumnIndex::top_level(LocalColumnId(1))};
+        request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+        request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+}
+
+// Scenario: CSV v2 can count rows by scanning, but it cannot answer min/max or mixed aggregate
+// requests from metadata.
+TEST_F(CsvV2ReaderTest, UnsupportedAggregateReturnsNotSupported) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::MINMAX;
+    FileAggregateResult aggregate_result;
+    EXPECT_FALSE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+}
+
+} // namespace
+} // namespace doris::format::csv
diff --git a/be/test/format_v2/delimited_text/text_reader_test.cpp b/be/test/format_v2/delimited_text/text_reader_test.cpp
new file mode 100644
index 00000000000000..b6402cab5d86d6
--- /dev/null
+++ b/be/test/format_v2/delimited_text/text_reader_test.cpp
@@ -0,0 +1,965 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/text_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+
+#include "common/consts.h"
+#include "common/object_pool.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/column_mapper.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+#include "testutil/desc_tbl_builder.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format::text {
+namespace {
+
+TFileScanRangeParams text_scan_params() {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_TEXT);
+    params.__set_file_type(TFileType::FILE_LOCAL);
+    TFileAttributes attributes;
+    TFileTextScanRangeParams text_params;
+    text_params.__set_column_separator(",");
+    text_params.__set_line_delimiter("\n");
+    text_params.__set_escape('\\');
+    attributes.__set_text_params(std::move(text_params));
+    params.__set_file_attributes(std::move(attributes));
+    params.__set_column_idxs({0, 1, 2});
+    return params;
+}
+
+std::unique_ptr<io::FileDescription> file_description(const std::string& path,
+                                                      int64_t range_start_offset = 0,
+                                                      int64_t range_size = -1) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->range_start_offset = range_start_offset;
+    desc->range_size = range_size;
+    desc->file_size = static_cast<int64_t>(std::filesystem::file_size(path));
+    return desc;
+}
+
+std::vector<SlotDescriptor*> build_slots(ObjectPool* pool) {
+    DescriptorTblBuilder builder(pool);
+    builder.declare_tuple()
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeInt32>()), "id"}
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeString>()),
+                                           "name"}
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeInt32>()),
+                                           "score"};
+    auto* desc_tbl = builder.build();
+    return desc_tbl->get_tuple_descriptor(0)->slots();
+}
+
+SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type,
+                               const std::string& name) {
+    TSlotDescriptor slot_desc;
+    slot_desc.__set_id(slot_id);
+    slot_desc.__set_parent(0);
+    slot_desc.__set_slotType(type->to_thrift());
+    slot_desc.__set_columnPos(slot_idx);
+    slot_desc.__set_byteOffset(0);
+    slot_desc.__set_nullIndicatorByte(slot_idx / 8);
+    slot_desc.__set_nullIndicatorBit(slot_idx % 8);
+    slot_desc.__set_slotIdx(slot_idx);
+    slot_desc.__set_isMaterialized(true);
+    slot_desc.__set_colName(name);
+    return pool->add(new SlotDescriptor(slot_desc));
+}
+
+std::vector<SlotDescriptor*> build_struct_slots(ObjectPool* pool) {
+    const auto nullable_int = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int, nullable_int}, Strings {"a", "b"}));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, struct_type, "s"),
+            make_test_slot(pool, 2, 2, make_nullable(std::make_shared<DataTypeInt32>()), "score")};
+}
+
+std::vector<SlotDescriptor*> build_nested_complex_slots(ObjectPool* pool) {
+    const auto nullable_int = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto nullable_string = make_nullable(std::make_shared<DataTypeString>());
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int, nullable_string}, Strings {"a", "b"}));
+    const auto array_type = make_nullable(std::make_shared<DataTypeArray>(struct_type));
+    const auto map_type =
+            make_nullable(std::make_shared<DataTypeMap>(nullable_string, struct_type));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, array_type, "xs"),
+            make_test_slot(pool, 2, 2, map_type, "kv")};
+}
+
+std::vector<SlotDescriptor*> build_char_varchar_slots(ObjectPool* pool) {
+    const auto nullable_char3 =
+            make_nullable(std::make_shared<DataTypeString>(3, PrimitiveType::TYPE_CHAR));
+    const auto nullable_varchar4 =
+            make_nullable(std::make_shared<DataTypeString>(4, PrimitiveType::TYPE_VARCHAR));
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_char3, nullable_varchar4}, Strings {"city", "country"}));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, nullable_char3, "city"),
+            make_test_slot(pool, 2, 2, struct_type, "region")};
+}
+
+std::unique_ptr<TextReader> create_reader(const std::string& path, TFileScanRangeParams* params,
+                                          const std::vector<SlotDescriptor*>& slots,
+                                          MockRuntimeState* state, RuntimeProfile* profile,
+                                          int64_t range_start_offset = 0, int64_t range_size = -1,
+                                          std::shared_ptr<io::IOContext> io_ctx = nullptr) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(path, range_start_offset, range_size);
+    auto reader = std::make_unique<TextReader>(system_properties, desc, std::move(io_ctx), profile,
+                                               params, slots);
+    EXPECT_TRUE(reader->init(state).ok());
+    return reader;
+}
+
+Block make_block(const std::vector<ColumnDefinition>& schema,
+                 const std::vector<int32_t>& local_ids) {
+    Block block;
+    for (const auto local_id : local_ids) {
+        const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) {
+            return column.local_id == local_id;
+        });
+        EXPECT_TRUE(it != schema.end());
+        block.insert({it->type->create_column(), it->type, it->name});
+    }
+    return block;
+}
+
+std::string nullable_string_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    return nested.get_data_at(row).to_string();
+}
+
+int32_t nullable_int_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+bool is_null_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    return nullable.is_null_at(row);
+}
+
+int32_t nullable_struct_int_child_at(const IColumn& column, size_t child_index, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& struct_column = assert_cast<const ColumnStruct&>(nullable.get_nested_column());
+    const auto& child_nullable =
+            assert_cast<const ColumnNullable&>(struct_column.get_column(child_index));
+    const auto& nested = assert_cast<const ColumnInt32&>(child_nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+int64_t counter_value(RuntimeProfile* profile, const std::string& name) {
+    auto* counter = profile->get_counter(name);
+    EXPECT_NE(counter, nullptr) << name;
+    return counter == nullptr ? 0 : counter->value();
+}
+
+class NullableIntGreaterThanExpr final : public VExpr {
+public:
+    NullableIntGreaterThanExpr(size_t block_position, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& data = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    !nullable.is_null_at(source_row) && data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<NullableIntGreaterThanExpr>(_block_position, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    int32_t _value;
+    const std::string _name = "NullableIntGreaterThanExpr";
+};
+
+class StructIntChildGreaterThanExpr final : public VExpr {
+public:
+    StructIntChildGreaterThanExpr(size_t block_position, size_t child_index, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _child_index(child_index),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& struct_column = assert_cast<const ColumnStruct&>(nullable.get_nested_column());
+        const auto& child_nullable =
+                assert_cast<const ColumnNullable&>(struct_column.get_column(_child_index));
+        const auto& child_data =
+                assert_cast<const ColumnInt32&>(child_nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& data = result->get_data();
+        data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            data[row] = !nullable.is_null_at(source_row) &&
+                        !child_nullable.is_null_at(source_row) &&
+                        child_data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<StructIntChildGreaterThanExpr>(_block_position,
+                                                                       _child_index, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    size_t _child_index;
+    int32_t _value;
+    const std::string _name = "StructIntChildGreaterThanExpr";
+};
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto context = VExprContext::create_shared(expr);
+    auto status = context->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = context->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return context;
+}
+
+class TextV2ReaderTest : public testing::Test {
+public:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_text_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "reader.text").string();
+        std::ofstream output(_file_path, std::ios::binary);
+        output << "1,alice,10\n";
+        output << "2,bob,20\n";
+        output.close();
+        _slots = build_slots(&_pool);
+        _params = text_scan_params();
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+protected:
+    ObjectPool _pool;
+    MockRuntimeState _state;
+    RuntimeProfile _profile {"text_v2_reader_test"};
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+    std::vector<SlotDescriptor*> _slots;
+    TFileScanRangeParams _params;
+};
+
+// Scenario: Text v2 exposes FE-provided file slots as nullable file-local schema using column_idxs
+// as Hive text field ordinals.
+TEST_F(TextV2ReaderTest, SchemaUsesSlotTypesAndColumnIdxs) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    EXPECT_EQ(schema[0].name, "id");
+    EXPECT_EQ(schema[0].local_id, 0);
+    EXPECT_TRUE(schema[0].type->is_nullable());
+    EXPECT_EQ(schema[1].name, "name");
+    EXPECT_EQ(schema[1].local_id, 1);
+    EXPECT_TRUE(schema[1].type->is_nullable());
+}
+
+// Scenario: FE slot types for Hive text are table target types. CHAR/VARCHAR length is not stored
+// in the text file, so the file schema must expose bounded strings as unbounded STRING. Otherwise
+// TableReader believes the file value already satisfies the table length and skips truncation.
+TEST_F(TextV2ReaderTest, SchemaTreatsCharVarcharSlotsAsUnboundedFileStrings) {
+    auto slots = build_char_varchar_slots(&_pool);
+    auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    const auto city_type = remove_nullable(schema[1].type);
+    EXPECT_EQ(city_type->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(assert_cast<const DataTypeString*>(city_type.get())->len(), -1);
+
+    const auto region_type = remove_nullable(schema[2].type);
+    ASSERT_EQ(region_type->get_primitive_type(), TYPE_STRUCT);
+    const auto* region_struct = assert_cast<const DataTypeStruct*>(region_type.get());
+    ASSERT_EQ(region_struct->get_elements().size(), 2);
+    EXPECT_EQ(remove_nullable(region_struct->get_element(0))->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(region_struct->get_element(1))->get_primitive_type(), TYPE_STRING);
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(remove_nullable(schema[2].children[0].type)->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(schema[2].children[1].type)->get_primitive_type(), TYPE_STRING);
+}
+
+// Scenario: Hive text is row-oriented and cannot lazy-read predicate columns separately. The
+// reader declares that capability by choosing MaterializedColumnMapper itself.
+TEST_F(TextV2ReaderTest, CreatesMaterializedColumnMapper) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto mapper = reader->create_column_mapper({.mode = TableColumnMappingMode::BY_NAME});
+
+    ASSERT_NE(dynamic_cast<MaterializedColumnMapper*>(mapper.get()), nullptr);
+}
+
+// Scenario: Text v2 exposes delimited-text profile counters for read, parse, deserialize, and
+// file-local conjunct filtering, so scanner profiles can explain where row-reader time is spent.
+TEST_F(TextV2ReaderTest, ProfileCountersTrackReadParseDeserializeAndFilter) {
+    const auto profile_path = (_test_dir / "profile.text").string();
+    std::ofstream output(profile_path, std::ios::binary);
+    output << "\n";
+    output << "1,alice,10\n";
+    output << "2,bob,20\n";
+    output.close();
+
+    _state._query_options.__set_read_csv_empty_line_as_null(true);
+    auto io_ctx = std::make_shared<io::IOContext>();
+    auto reader = create_reader(profile_path, &_params, _slots, &_state, &_profile, 0, -1, io_ctx);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(1));
+    request->conjuncts = {
+            prepared_conjunct(&_state, std::make_shared<NullableIntGreaterThanExpr>(1, 15))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2);
+
+    EXPECT_NE(_profile.get_counter("OpenFileTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("CreateLineReaderTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("ReadLineTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("SplitLineTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("DeserializeTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("ConjunctFilterTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("DeleteConjunctFilterTime"), nullptr);
+    EXPECT_EQ(counter_value(&_profile, "RawLinesRead"), 3);
+    EXPECT_EQ(counter_value(&_profile, "RowsReadBeforeFilter"), 3);
+    EXPECT_EQ(counter_value(&_profile, "RowsFilteredByConjunct"), 2);
+    EXPECT_EQ(io_ctx->predicate_filtered_rows, 2);
+    EXPECT_EQ(counter_value(&_profile, "RowsFilteredByDeleteConjunct"), 0);
+    EXPECT_EQ(counter_value(&_profile, "RowsReturned"), 1);
+    EXPECT_EQ(counter_value(&_profile, "EmptyLinesRead"), 1);
+    EXPECT_EQ(counter_value(&_profile, "SkippedLines"), 0);
+    EXPECT_EQ(counter_value(&_profile, "CellsDeserialized"), 6);
+}
+
+// Scenario: Hive text has no embedded nested schema, but TableColumnMapper still needs semantic
+// children for complex table columns. The reader synthesizes ARRAY/MAP/STRUCT children from the
+// slot type while keeping the top-level local id as the text field ordinal from column_idxs.
+TEST_F(TextV2ReaderTest, SchemaSynthesizesComplexChildrenForColumnMapper) {
+    _params.__set_column_idxs({4, 7, 9});
+    auto slots = build_nested_complex_slots(&_pool);
+    auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile);
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    EXPECT_EQ(schema[1].name, "xs");
+    EXPECT_EQ(schema[1].local_id, 7);
+    ASSERT_EQ(schema[1].children.size(), 1);
+    EXPECT_EQ(schema[1].children[0].name, "element");
+    EXPECT_EQ(schema[1].children[0].local_id, 0);
+    ASSERT_EQ(schema[1].children[0].children.size(), 2);
+    EXPECT_EQ(schema[1].children[0].children[0].name, "a");
+    EXPECT_EQ(schema[1].children[0].children[0].local_id, 0);
+    EXPECT_EQ(schema[1].children[0].children[1].name, "b");
+    EXPECT_EQ(schema[1].children[0].children[1].local_id, 1);
+
+    EXPECT_EQ(schema[2].name, "kv");
+    EXPECT_EQ(schema[2].local_id, 9);
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(schema[2].children[0].name, "key");
+    EXPECT_EQ(schema[2].children[0].local_id, 0);
+    EXPECT_EQ(schema[2].children[1].name, "value");
+    EXPECT_EQ(schema[2].children[1].local_id, 1);
+    ASSERT_EQ(schema[2].children[1].children.size(), 2);
+    EXPECT_EQ(schema[2].children[1].children[0].name, "a");
+    EXPECT_EQ(schema[2].children[1].children[1].name, "b");
+}
+
+// Scenario: Hive text escapes a field separator inside a string. The splitter keeps the escaped
+// separator in the same field, and hive-text serde unescapes the final string value.
+TEST_F(TextV2ReaderTest, EscapedSeparatorStaysInsideStringField) {
+    const auto escaped_path = (_test_dir / "escaped.text").string();
+    std::ofstream output(escaped_path, std::ios::binary);
+    output << "1,alice\\,team,10\n";
+    output.close();
+
+    auto reader = create_reader(escaped_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice,team");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 10);
+}
+
+// Scenario: Hive text supports multi-character field separators. V2 must not split on partial
+// matches and must still honor FileScanRequest output positions.
+TEST_F(TextV2ReaderTest, MultiCharacterSeparatorReadsRequestedColumns) {
+    const auto multi_path = (_test_dir / "multi.text").string();
+    std::ofstream output(multi_path, std::ios::binary);
+    output << "3||carol||30\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_column_separator("||");
+    auto reader = create_reader(multi_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "carol");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 3);
+}
+
+// Scenario: column_idxs can map table slots to non-identity Hive text field ordinals.
+TEST_F(TextV2ReaderTest, ColumnIdxsMapSlotsToTextOrdinals) {
+    const auto remap_path = (_test_dir / "remapped.text").string();
+    std::ofstream output(remap_path, std::ios::binary);
+    output << "doris,40,4\n";
+    output.close();
+
+    _params.__set_column_idxs({2, 0, 1});
+    auto reader = create_reader(remap_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    EXPECT_EQ(schema[0].local_id, 2);
+    EXPECT_EQ(schema[1].local_id, 0);
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2)),
+                                      LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {2, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 4);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "doris");
+}
+
+// Scenario: Hive text complex values are encoded inside one top-level text field. V2 reads the
+// complete struct field first, then evaluates a file-local predicate on one child, covering
+// `SELECT s.a WHERE s.b > 10` without pretending that Text has physical nested-column pruning.
+TEST_F(TextV2ReaderTest, FullStructColumnSupportsChildConjunctFiltering) {
+    const auto complex_path = (_test_dir / "complex.text").string();
+    std::ofstream output(complex_path, std::ios::binary);
+    output << "1|11,5|10\n";
+    output << "2|22,20|20\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_column_separator("|");
+    _params.file_attributes.text_params.__set_collection_delimiter(",");
+    _params.__set_column_idxs({0, 1, 2});
+    auto slots = build_struct_slots(&_pool);
+    auto reader = create_reader(complex_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->conjuncts = {prepared_conjunct(
+            &_state, std::make_shared<StructIntChildGreaterThanExpr>(
+                             /*block_position=*/0, /*child_index=*/1, /*value=*/10))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 0, 0), 22);
+    EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 1, 0), 20);
+}
+
+// Scenario: missing Hive text fields are materialized as NULL rather than shifting later columns.
+TEST_F(TextV2ReaderTest, MissingRequestedFieldUsesNullFormat) {
+    const auto missing_path = (_test_dir / "missing.text").string();
+    std::ofstream output(missing_path, std::ios::binary);
+    output << "1,alice\n";
+    output.close();
+
+    auto reader = create_reader(missing_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+}
+
+// Scenario: Text v2 can scan a request with no materialized columns. This is used by table-level
+// COUNT-style paths where the reader must still return the number of logical rows read.
+TEST_F(TextV2ReaderTest, EmptyFileLocalProjectionStillReportsRows) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    Block block;
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_EQ(rows, 2);
+    EXPECT_FALSE(eof);
+}
+
+// Scenario: stream load/http_stream text input is not backed by a filesystem. If TableReader fails
+// to preserve the stream load id, the v2 reader should report that directly instead of calling the
+// generic FileFactory path and returning "unsupported file reader type: 2".
+TEST_F(TextV2ReaderTest, StreamInputRequiresLoadIdBeforeOpeningPipe) {
+    _params.__set_file_type(TFileType::FILE_STREAM);
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    const auto status = reader->open(request);
+
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("stream reader requires load id"), std::string::npos)
+            << status;
+}
+
+// Scenario: explicit text null_format is honored by Hive-text serde. Unlike CSV
+// empty_field_as_null, an empty text field is not NULL unless it equals null_format exactly.
+TEST_F(TextV2ReaderTest, NullFormatProducesNullableValue) {
+    const auto null_path = (_test_dir / "null_format.text").string();
+    std::ofstream output(null_path, std::ios::binary);
+    output << "1,NULL,10\n";
+    output << "2,,20\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_null_format("NULL");
+    auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 1));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "");
+}
+
+// Scenario: Hive SerDe can define the empty string itself as NULL. The nullable string fast path
+// must match the generic nullable serde behavior instead of treating empty null_format as
+// "null format is not configured".
+TEST_F(TextV2ReaderTest, EmptyNullFormatProducesNullableValue) {
+    const auto null_path = (_test_dir / "empty_null_format.text").string();
+    std::ofstream output(null_path, std::ios::binary);
+    output << "1,alice,10\n";
+    output << "2,,20\n";
+    output << "3,NULL,30\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_null_format("");
+    auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 3);
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 1));
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 2));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 2), "NULL");
+}
+
+// Scenario: TEXT_WITH_NAMES_AND_TYPES-style headers share the delimited text base skip path with
+// CSV. Both header records must be skipped before the first data row is read.
+TEST_F(TextV2ReaderTest, HeaderNamesAndTypesSkipsTwoLines) {
+    const auto header_path = (_test_dir / "header_names_types.text").string();
+    std::ofstream output(header_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "INT,STRING,INT\n";
+    output << "7,carol,70\n";
+    output.close();
+
+    _params.file_attributes.__set_header_type(BeConsts::CSV_WITH_NAMES_AND_TYPES);
+    auto reader = create_reader(header_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 7);
+}
+
+// Scenario: the shared delimited text base removes UTF-8 BOM from the first returned data line.
+// This matters for headerless text files whose first column is numeric.
+TEST_F(TextV2ReaderTest, BomIsRemovedFromFirstDataLineWithoutHeader) {
+    const auto bom_path = (_test_dir / "bom_data.text").string();
+    std::ofstream output(bom_path, std::ios::binary);
+    output.write("\xEF\xBB\xBF", 3);
+    output << "5,bom,50\n";
+    output.close();
+
+    auto reader = create_reader(bom_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 5);
+}
+
+// Scenario: when FE does not set header_type, skip_lines should be honored by the shared
+// delimited text base before TextReader starts splitting rows.
+TEST_F(TextV2ReaderTest, SkipLinesUsedWhenHeaderTypeUnset) {
+    const auto skip_path = (_test_dir / "skip_lines.text").string();
+    std::ofstream output(skip_path, std::ios::binary);
+    output << "skip me\n";
+    output << "skip me too\n";
+    output << "3,dan,30\n";
+    output.close();
+
+    _params.file_attributes.__isset.header_type = false;
+    _params.file_attributes.__set_skip_lines(2);
+    auto reader = create_reader(skip_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 3);
+}
+
+// Scenario: Hive TEXTFILE treats an empty physical line as a record. For the first field it
+// deserializes an empty value; missing trailing fields are filled with null_format.
+TEST_F(TextV2ReaderTest, EmptyLineAsRecordByDefault) {
+    const auto empty_line_path = (_test_dir / "empty_line.text").string();
+    std::ofstream output(empty_line_path, std::ios::binary);
+    output << "\n";
+    output << "4,erin,40\n";
+    output.close();
+
+    auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(1));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(2));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 1, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_TRUE(is_null_at(*block.get_by_position(1).column, 0));
+    EXPECT_TRUE(is_null_at(*block.get_by_position(2).column, 0));
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 1), 4);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 1), "erin");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(2).column, 1), 40);
+}
+
+// Scenario: for a single-column Hive TEXTFILE table, an empty physical line is one empty string
+// field rather than a skipped row.
+TEST_F(TextV2ReaderTest, EmptyLineAsSingleEmptyStringField) {
+    const auto empty_line_path = (_test_dir / "empty_line_single_string.text").string();
+    std::ofstream output(empty_line_path, std::ios::binary);
+    output << "\n";
+    output << "erin\n";
+    output.close();
+
+    _params.__set_column_idxs({0});
+    const std::vector<SlotDescriptor*> slots {make_test_slot(
+            &_pool, 0, 0, make_nullable(std::make_shared<DataTypeString>()), "value")};
+    auto reader = create_reader(empty_line_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "");
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "erin");
+}
+
+// Scenario: text v2 COUNT pushdown counts empty physical lines as Hive TEXTFILE records.
+TEST_F(TextV2ReaderTest, CountAggregatePreservesEmptyLines) {
+    const auto empty_line_path = (_test_dir / "empty_line_count.text").string();
+    std::ofstream output(empty_line_path, std::ios::binary);
+    output << "\n";
+    output << "4,erin,40\n";
+    output.close();
+
+    auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::COUNT;
+    FileAggregateResult aggregate_result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+    EXPECT_EQ(aggregate_result.count, 2);
+}
+
+// Scenario: Text v2 COUNT pushdown scans rows because text files do not expose row-count metadata.
+TEST_F(TextV2ReaderTest, CountAggregateScansRows) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::COUNT;
+    FileAggregateResult aggregate_result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+    EXPECT_EQ(aggregate_result.count, 2);
+}
+
+// Scenario: a non-first split starts inside a text record and must skip the partial first line.
+TEST_F(TextV2ReaderTest, NonFirstSplitSkipsPartialFirstRecord) {
+    const auto split_path = (_test_dir / "split.text").string();
+    std::ofstream output(split_path, std::ios::binary);
+    output << "1,skip,10\n";
+    output << "2,bob,20\n";
+    output.close();
+
+    auto reader = create_reader(split_path, &_params, _slots, &_state, &_profile,
+                                /*range_start_offset=*/3);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2);
+}
+
+// Scenario: compressed text cannot be split at arbitrary byte offsets because the decompressor
+// needs the stream from the beginning. V2 should reject such a split before constructing the line
+// reader.
+TEST_F(TextV2ReaderTest, NonFirstCompressedSplitReturnsError) {
+    _params.__set_compress_type(TFileCompressType::GZ);
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile,
+                                /*range_start_offset=*/1);
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    EXPECT_FALSE(reader->open(request).ok());
+}
+
+// Scenario: FileScanRequest is a TableReader-to-FileReader contract. Unknown TEXT ordinals,
+// out-of-range block positions, and sparse block-position maps must fail during reader open.
+TEST_F(TextV2ReaderTest, InvalidScanRequestReturnsError) {
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(99))};
+        request->local_positions.emplace(LocalColumnId(99), LocalIndex(0));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+        request->local_positions.emplace(LocalColumnId(0), LocalIndex(2));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                          LocalColumnIndex::top_level(LocalColumnId(1))};
+        request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+        request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+}
+
+// Scenario: unsupported aggregate requests must fail explicitly instead of returning partial
+// results from the scan path.
+TEST_F(TextV2ReaderTest, UnsupportedAggregateReturnsNotSupported) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::MINMAX;
+    FileAggregateResult aggregate_result;
+    EXPECT_FALSE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+}
+
+} // namespace
+} // namespace doris::format::text
diff --git a/be/test/format_v2/expr/cast_test.cpp b/be/test/format_v2/expr/cast_test.cpp
new file mode 100644
index 00000000000000..341b89433f0c08
--- /dev/null
+++ b/be/test/format_v2/expr/cast_test.cpp
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/cast.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "runtime/descriptors.h"
+#include "testutil/column_helper.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format {
+
+class CastTest : public testing::Test {
+protected:
+    void SetUp() override { state.set_enable_strict_cast(true); }
+
+    static VExprContextSPtr create_context(const DataTypePtr& return_type,
+                                           const DataTypePtr& child_type, int child_column_id = 0) {
+        auto cast = Cast::create_shared(return_type);
+        cast->add_child(VSlotRef::create_shared(child_column_id, child_column_id, -1, child_type,
+                                                "source_column"));
+        return VExprContext::create_shared(cast);
+    }
+
+    Status prepare_open_execute(VExprContext* context, Block* block, int* result_column_id) {
+        RETURN_IF_ERROR(context->prepare(&state, RowDescriptor()));
+        RETURN_IF_ERROR(context->open(&state));
+        return context->execute(block, result_column_id);
+    }
+
+    MockRuntimeState state;
+};
+
+TEST_F(CastTest, CastIntSlotToBigInt) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeInt64>();
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, -2, 3}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    ASSERT_EQ(result_column_id, 1);
+    ASSERT_EQ(block.columns(), 2);
+    EXPECT_EQ(block.get_by_position(result_column_id).type, return_type);
+    const auto& result_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(result_column_id).column);
+    EXPECT_EQ(result_column.get_data()[0], 1);
+    EXPECT_EQ(result_column.get_data()[1], -2);
+    EXPECT_EQ(result_column.get_data()[2], 3);
+
+    context->close();
+}
+
+TEST_F(CastTest, CastStringSlotToNullableInt) {
+    state.set_enable_strict_cast(false);
+    auto source_type = std::make_shared<DataTypeString>();
+    auto return_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeString>({"10", "bad", "-3"}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& nullable_column =
+            assert_cast<const ColumnNullable&>(*block.get_by_position(result_column_id).column);
+    const auto& result_column =
+            assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+    const auto& null_map = nullable_column.get_null_map_data();
+    EXPECT_EQ(result_column.get_data()[0], 10);
+    EXPECT_EQ(result_column.get_data()[2], -3);
+    EXPECT_EQ(null_map[0], 0);
+    EXPECT_EQ(null_map[1], 1);
+    EXPECT_EQ(null_map[2], 0);
+
+    context->close();
+}
+
+TEST_F(CastTest, CastLiteralToString) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeString>();
+    auto cast = Cast::create_shared(return_type);
+    cast->add_child(VLiteral::create_shared(source_type, Field::create_field<TYPE_INT>(123)));
+    auto context = VExprContext::create_shared(cast);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, 2, 3}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& result = block.get_by_position(result_column_id);
+    EXPECT_EQ(result.type->to_string(*result.column, 0), "123");
+    EXPECT_EQ(result.type->to_string(*result.column, 1), "123");
+    EXPECT_EQ(result.type->to_string(*result.column, 2), "123");
+
+    context->close();
+}
+
+TEST_F(CastTest, EmptyBlockAppendsEmptyResultColumn) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeInt64>();
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    ASSERT_EQ(result_column_id, 1);
+    EXPECT_EQ(block.get_by_position(result_column_id).column->size(), 0);
+
+    context->close();
+}
+
+TEST_F(CastTest, PrepareRejectsMissingChild) {
+    auto cast = Cast::create_shared(std::make_shared<DataTypeInt64>());
+    VExprContext context(cast);
+
+    auto status = context.prepare(&state, RowDescriptor());
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos);
+}
+
+TEST_F(CastTest, PrepareRejectsMultipleChildren) {
+    auto child_type = std::make_shared<DataTypeInt32>();
+    auto cast = Cast::create_shared(std::make_shared<DataTypeInt64>());
+    cast->add_child(VSlotRef::create_shared(0, 0, -1, child_type, "c0"));
+    cast->add_child(VSlotRef::create_shared(1, 1, -1, child_type, "c1"));
+    VExprContext context(cast);
+
+    auto status = context.prepare(&state, RowDescriptor());
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos);
+}
+
+} // namespace doris::format
diff --git a/be/test/format_v2/expr/delete_predicate_test.cpp b/be/test/format_v2/expr/delete_predicate_test.cpp
new file mode 100644
index 00000000000000..264a9fdf9b19f5
--- /dev/null
+++ b/be/test/format_v2/expr/delete_predicate_test.cpp
@@ -0,0 +1,168 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/delete_predicate.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "exprs/vexpr_context.h"
+#include "runtime/descriptors.h"
+#include "testutil/mock/mock_slot_ref.h"
+
+namespace doris::format {
+
+class DeletePredicateTest : public testing::Test {
+protected:
+    static Block make_block(const std::vector<int64_t>& row_ids) {
+        auto column = ColumnInt64::create();
+        for (auto row_id : row_ids) {
+            column->insert_value(row_id);
+        }
+
+        Block block;
+        block.insert({std::move(column), std::make_shared<DataTypeInt64>(), "row_id"});
+        return block;
+    }
+
+    static std::vector<UInt8> result_column_data(const Block& block, int result_column_id) {
+        const auto& result_column =
+                assert_cast<const ColumnBool&>(*block.get_by_position(result_column_id).column);
+        return {result_column.get_data().begin(), result_column.get_data().end()};
+    }
+
+    static Status execute_delete_predicate(const std::vector<int64_t>& deleted_rows, Block* block,
+                                           int* result_column_id) {
+        auto delete_predicate = std::make_shared<DeletePredicate>(deleted_rows);
+        delete_predicate->_open_finished = true;
+        delete_predicate->add_child(
+                std::make_shared<MockSlotRef>(0, std::make_shared<DataTypeInt64>()));
+
+        VExprContext context(delete_predicate);
+        return delete_predicate->execute(&context, block, result_column_id);
+    }
+};
+
+TEST_F(DeletePredicateTest, MatchDeletedRowsInInputRange) {
+    const std::vector<int64_t> deleted_rows {-3, 1, 4, 8, 12, 20};
+    auto block = make_block({0, 1, 2, 3, 4, 5, 8, 12});
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    EXPECT_EQ(result_column_id, 1);
+    EXPECT_EQ(result_column_data(block, result_column_id),
+              std::vector<UInt8>({0, 1, 0, 0, 1, 0, 1, 1}));
+}
+
+TEST_F(DeletePredicateTest, EmptyDeletedRowsReturnAllFalse) {
+    const std::vector<int64_t> deleted_rows;
+    auto block = make_block({1, 2, 3});
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    EXPECT_EQ(result_column_data(block, result_column_id), std::vector<UInt8>({0, 0, 0}));
+}
+
+TEST_F(DeletePredicateTest, DeletedRowsOutsideInputRangeReturnAllFalse) {
+    const std::vector<int64_t> deleted_rows {-10, -1, 10, 11};
+    auto block = make_block({1, 2, 3});
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    EXPECT_EQ(result_column_data(block, result_column_id), std::vector<UInt8>({0, 0, 0}));
+}
+
+TEST_F(DeletePredicateTest, EmptyRowIdColumnAppendsEmptyResultColumn) {
+    const std::vector<int64_t> deleted_rows {1, 2, 3};
+    auto block = make_block({});
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    EXPECT_EQ(block.columns(), 2);
+    EXPECT_EQ(result_column_id, 1);
+    EXPECT_EQ(result_column_data(block, result_column_id), std::vector<UInt8>({}));
+}
+
+TEST_F(DeletePredicateTest, MissingRowIdColumnReturnsError) {
+    const std::vector<int64_t> deleted_rows {1, 2, 3};
+    Block block;
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("invalid column id"), std::string::npos);
+    EXPECT_EQ(block.columns(), 0);
+    EXPECT_EQ(result_column_id, -1);
+}
+
+TEST_F(DeletePredicateTest, MissingRowIdChildReturnsError) {
+    const std::vector<int64_t> deleted_rows {1};
+    auto block = make_block({1});
+    auto delete_predicate = std::make_shared<DeletePredicate>(deleted_rows);
+    delete_predicate->_open_finished = true;
+    VExprContext context(delete_predicate);
+
+    int result_column_id = -1;
+    auto status = delete_predicate->execute(&context, &block, &result_column_id);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos);
+}
+
+TEST_F(DeletePredicateTest, ExecuteColumnImplReturnsError) {
+    const std::vector<int64_t> deleted_rows {1};
+    DeletePredicate delete_predicate(deleted_rows);
+    VExprContext context(std::make_shared<DeletePredicate>(deleted_rows));
+    ColumnPtr result_column;
+
+    auto status =
+            delete_predicate.execute_column_impl(&context, nullptr, nullptr, 0, result_column);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("DeletePredicate::execute_column_impl"), std::string::npos);
+}
+
+TEST_F(DeletePredicateTest, LifecycleAndDebugString) {
+    const std::vector<int64_t> deleted_rows {1};
+    DeletePredicate delete_predicate(deleted_rows);
+    VExprContext context(std::make_shared<DeletePredicate>(deleted_rows));
+    RowDescriptor row_desc;
+
+    auto status = delete_predicate.prepare(nullptr, row_desc, &context);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(delete_predicate.expr_name(), "DeletePredicate");
+    EXPECT_EQ(delete_predicate.debug_string(), "DeletePredicate");
+
+    status = delete_predicate.open(nullptr, &context, FunctionContext::THREAD_LOCAL);
+    ASSERT_TRUE(status.ok()) << status;
+    delete_predicate.close(&context, FunctionContext::THREAD_LOCAL);
+}
+
+} // namespace doris::format
diff --git a/be/test/format_v2/expr/equality_delete_predicate_test.cpp b/be/test/format_v2/expr/equality_delete_predicate_test.cpp
new file mode 100644
index 00000000000000..886a86713fe8da
--- /dev/null
+++ b/be/test/format_v2/expr/equality_delete_predicate_test.cpp
@@ -0,0 +1,181 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/equality_delete_predicate.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/expr/cast.h"
+#include "runtime/descriptors.h"
+#include "testutil/column_helper.h"
+#include "testutil/mock/mock_runtime_state.h"
+#include "testutil/mock/mock_slot_ref.h"
+
+namespace doris::format {
+
+class EqualityDeletePredicateTest : public testing::Test {
+protected:
+    static ColumnWithTypeAndName make_nullable_int_column(
+            const std::string& name, const std::vector<std::optional<int>>& values) {
+        auto data = ColumnInt32::create();
+        auto null_map = ColumnUInt8::create();
+        for (const auto& value : values) {
+            data->insert_value(value.value_or(0));
+            null_map->insert_value(!value.has_value());
+        }
+        auto type = make_nullable(std::make_shared<DataTypeInt32>());
+        return {ColumnNullable::create(std::move(data), std::move(null_map)), type, name};
+    }
+
+    static ColumnWithTypeAndName make_nullable_string_column(
+            const std::string& name, const std::vector<std::optional<std::string>>& values) {
+        auto data = ColumnString::create();
+        auto null_map = ColumnUInt8::create();
+        for (const auto& value : values) {
+            const std::string data_value = value.value_or("");
+            data->insert_data(data_value.data(), data_value.size());
+            null_map->insert_value(!value.has_value());
+        }
+        auto type = make_nullable(std::make_shared<DataTypeString>());
+        return {ColumnNullable::create(std::move(data), std::move(null_map)), type, name};
+    }
+
+    static std::vector<UInt8> result_column_data(const Block& block, int result_column_id) {
+        const auto& result_column =
+                assert_cast<const ColumnBool&>(*block.get_by_position(result_column_id).column);
+        return {result_column.get_data().begin(), result_column.get_data().end()};
+    }
+
+    static Status execute_equality_delete_predicate(Block delete_block, std::vector<int> field_ids,
+                                                    Block* data_block, int* result_column_id) {
+        auto predicate =
+                std::make_shared<EqualityDeletePredicate>(std::move(delete_block), field_ids);
+        predicate->_open_finished = true;
+        for (size_t idx = 0; idx < field_ids.size(); ++idx) {
+            predicate->add_child(
+                    std::make_shared<MockSlotRef>(idx, data_block->get_by_position(idx).type));
+        }
+
+        VExprContext context(predicate);
+        return predicate->execute(&context, data_block, result_column_id);
+    }
+
+    static Status execute_prepared_equality_delete_predicate(const VExprContextSPtr& context,
+                                                             MockRuntimeState* state,
+                                                             Block* data_block,
+                                                             int* result_column_id) {
+        RETURN_IF_ERROR(context->prepare(state, RowDescriptor()));
+        RETURN_IF_ERROR(context->open(state));
+        return context->execute(data_block, result_column_id);
+    }
+};
+
+TEST_F(EqualityDeletePredicateTest, MatchSingleColumn) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {1, 4}));
+    Block data_block;
+    data_block.insert(make_nullable_int_column("id", {1, 2, 3, 4}));
+
+    int result_column_id = -1;
+    auto status = execute_equality_delete_predicate(std::move(delete_block), {1}, &data_block,
+                                                    &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector<UInt8>({1, 0, 0, 1}));
+}
+
+TEST_F(EqualityDeletePredicateTest, MatchMultipleColumns) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {1, 2}));
+    delete_block.insert(make_nullable_string_column("name", {"a", "b"}));
+    Block data_block;
+    data_block.insert(make_nullable_int_column("id", {1, 1, 2, 2}));
+    data_block.insert(make_nullable_string_column("name", {"a", "b", "a", "b"}));
+
+    int result_column_id = -1;
+    auto status = execute_equality_delete_predicate(std::move(delete_block), {1, 2}, &data_block,
+                                                    &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector<UInt8>({1, 0, 0, 1}));
+}
+
+TEST_F(EqualityDeletePredicateTest, MatchNullValues) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {std::nullopt}));
+    Block data_block;
+    data_block.insert(make_nullable_int_column("id", {1, std::nullopt, 3}));
+
+    int result_column_id = -1;
+    auto status = execute_equality_delete_predicate(std::move(delete_block), {1}, &data_block,
+                                                    &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector<UInt8>({0, 1, 0}));
+}
+
+TEST_F(EqualityDeletePredicateTest, MatchAfterCastToDeleteKeyType) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {1, 4}));
+    Block data_block;
+    data_block.insert(ColumnHelper::create_column_with_name<DataTypeInt64>({1, 2, 4}));
+
+    auto predicate = std::make_shared<EqualityDeletePredicate>(std::move(delete_block),
+                                                               std::vector<int> {1});
+    auto cast_expr = Cast::create_shared(make_nullable(std::make_shared<DataTypeInt32>()));
+    cast_expr->add_child(std::make_shared<MockSlotRef>(0, data_block.get_by_position(0).type));
+    predicate->add_child(std::move(cast_expr));
+    auto context = VExprContext::create_shared(predicate);
+    MockRuntimeState state;
+
+    int result_column_id = -1;
+    auto status = execute_prepared_equality_delete_predicate(context, &state, &data_block,
+                                                             &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector<UInt8>({1, 0, 1}));
+    context->close();
+}
+
+TEST_F(EqualityDeletePredicateTest, ChildCountMismatchReturnsError) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {1}));
+    auto predicate = std::make_shared<EqualityDeletePredicate>(std::move(delete_block),
+                                                               std::vector<int> {1});
+    predicate->_open_finished = true;
+    Block data_block;
+    data_block.insert(make_nullable_int_column("id", {1}));
+    VExprContext context(predicate);
+
+    int result_column_id = -1;
+    auto status = predicate->execute(&context, &data_block, &result_column_id);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("should have 1 child exprs"), std::string::npos);
+}
+
+} // namespace doris::format
diff --git a/be/test/format_v2/json/json_reader_test.cpp b/be/test/format_v2/json/json_reader_test.cpp
new file mode 100644
index 00000000000000..994d71c0e6aefc
--- /dev/null
+++ b/be/test/format_v2/json/json_reader_test.cpp
@@ -0,0 +1,402 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/json/json_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/object_pool.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "format_v2/column_data.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_profile.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format::json {
+namespace {
+
+TFileScanRangeParams json_scan_params(bool read_json_by_line = true, bool strip_outer_array = false,
+                                      std::string jsonpaths = "", std::string json_root = "",
+                                      bool ignore_malformed = false) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_JSON);
+    params.__set_file_type(TFileType::FILE_LOCAL);
+    params.__set_compress_type(TFileCompressType::PLAIN);
+    TFileAttributes attributes;
+    TFileTextScanRangeParams text_params;
+    text_params.__set_line_delimiter("\n");
+    attributes.__set_text_params(std::move(text_params));
+    attributes.__set_read_json_by_line(read_json_by_line);
+    attributes.__set_strip_outer_array(strip_outer_array);
+    attributes.__set_num_as_string(false);
+    attributes.__set_fuzzy_parse(false);
+    if (!jsonpaths.empty()) {
+        attributes.__set_jsonpaths(std::move(jsonpaths));
+    }
+    if (!json_root.empty()) {
+        attributes.__set_json_root(std::move(json_root));
+    }
+    if (ignore_malformed) {
+        attributes.__set_openx_json_ignore_malformed(true);
+    }
+    params.__set_file_attributes(std::move(attributes));
+    return params;
+}
+
+SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type,
+                               const std::string& name) {
+    TSlotDescriptor slot_desc;
+    slot_desc.__set_id(slot_id);
+    slot_desc.__set_parent(0);
+    slot_desc.__set_slotType(type->to_thrift());
+    slot_desc.__set_columnPos(slot_idx);
+    slot_desc.__set_byteOffset(0);
+    if (type->is_nullable()) {
+        slot_desc.__set_nullIndicatorByte(slot_idx / 8);
+        slot_desc.__set_nullIndicatorBit(slot_idx % 8);
+    } else {
+        slot_desc.__set_nullIndicatorByte(0);
+        slot_desc.__set_nullIndicatorBit(-1);
+    }
+    slot_desc.__set_slotIdx(slot_idx);
+    slot_desc.__set_isMaterialized(true);
+    slot_desc.__set_colName(name);
+    return pool->add(new SlotDescriptor(slot_desc));
+}
+
+std::vector<SlotDescriptor*> build_slots(ObjectPool* pool) {
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, make_nullable(std::make_shared<DataTypeString>()), "name")};
+}
+
+std::vector<SlotDescriptor*> build_slots_with_required_name(ObjectPool* pool) {
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, std::make_shared<DataTypeString>(), "name")};
+}
+
+std::unique_ptr<io::FileDescription> file_description(const std::string& path) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->file_size = static_cast<int64_t>(std::filesystem::file_size(path));
+    desc->range_start_offset = 0;
+    desc->range_size = desc->file_size;
+    return desc;
+}
+
+std::filesystem::path write_json_file(const std::string& name, const std::string& content) {
+    const auto test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_json_reader";
+    std::filesystem::create_directories(test_dir);
+    const auto file_path = test_dir / name;
+    std::ofstream out(file_path);
+    out << content;
+    return file_path;
+}
+
+TFileRangeDesc file_range(const std::filesystem::path& file_path) {
+    TFileRangeDesc range;
+    range.__set_path(file_path.string());
+    range.__set_start_offset(0);
+    range.__set_size(static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    range.__set_file_size(static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    return range;
+}
+
+Block make_block(const std::vector<ColumnDefinition>& schema,
+                 const std::vector<int32_t>& local_ids) {
+    Block block;
+    for (const auto local_id : local_ids) {
+        const auto it = std::ranges::find_if(
+                schema, [&](const auto& column) { return column.local_id == local_id; });
+        EXPECT_TRUE(it != schema.end());
+        block.insert({it->type->create_column(), it->type, it->name});
+    }
+    return block;
+}
+
+struct ReadResult {
+    Status status;
+    Status second_status = Status::OK();
+    Block block;
+    size_t rows = 0;
+    bool eof = false;
+    size_t second_rows = 0;
+    bool second_eof = false;
+    std::vector<ColumnDefinition> schema;
+};
+
+ReadResult read_once(const std::string& file_name, const std::string& content,
+                     TFileScanRangeParams params, const std::vector<SlotDescriptor*>& slots,
+                     const std::vector<int32_t>& requested_local_ids, bool read_twice = false) {
+    const auto file_path = write_json_file(file_name, content);
+    auto range = file_range(file_path);
+
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(file_path.string());
+    RuntimeProfile profile("json_v2_reader_test");
+    MockRuntimeState state;
+    JsonReader reader(system_properties, desc, nullptr, &profile, &params, range, slots);
+
+    ReadResult result;
+    result.status = reader.init(&state);
+    if (!result.status.ok()) {
+        return result;
+    }
+    result.status = reader.get_schema(&result.schema);
+    if (!result.status.ok()) {
+        return result;
+    }
+
+    auto request = std::make_shared<FileScanRequest>();
+    for (size_t i = 0; i < requested_local_ids.size(); ++i) {
+        request->local_positions.emplace(LocalColumnId(requested_local_ids[i]), LocalIndex(i));
+    }
+    result.status = reader.open(request);
+    if (!result.status.ok()) {
+        return result;
+    }
+
+    result.block = make_block(result.schema, requested_local_ids);
+    result.status = reader.get_block(&result.block, &result.rows, &result.eof);
+    if (result.status.ok() && read_twice) {
+        auto eof_block = make_block(result.schema, requested_local_ids);
+        result.second_status =
+                reader.get_block(&eof_block, &result.second_rows, &result.second_eof);
+    }
+    return result;
+}
+
+std::string nullable_string_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    return nested.get_data_at(row).to_string();
+}
+
+std::string string_at(const IColumn& column, size_t row) {
+    const auto& nested = assert_cast<const ColumnString&>(column);
+    return nested.get_data_at(row).to_string();
+}
+
+int32_t nullable_int_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+bool nullable_is_null_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    return nullable.is_null_at(row);
+}
+
+} // namespace
+
+TEST(JsonReaderTest, ReadsRequestedColumnsInFileScanRequestOrder) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("order.jsonl",
+                            R"({"id":1,"name":"alice"})"
+                            "\n"
+                            R"({"id":2,"name":"bob"})"
+                            "\n",
+                            json_scan_params(), slots, {1, 0}, true);
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.schema.size(), 2);
+    EXPECT_EQ(result.schema[0].name, "id");
+    EXPECT_EQ(result.schema[0].local_id, 0);
+    EXPECT_EQ(result.schema[1].name, "name");
+    EXPECT_EQ(result.schema[1].local_id, 1);
+    ASSERT_EQ(result.rows, 2);
+    ASSERT_EQ(result.block.columns(), 2);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 0), "alice");
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 1), "bob");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 0), 1);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 1), 2);
+    ASSERT_TRUE(result.second_status.ok()) << result.second_status.to_string();
+    EXPECT_EQ(result.second_rows, 0);
+    EXPECT_TRUE(result.second_eof);
+}
+
+TEST(JsonReaderTest, ReadsSingleDocumentOuterArray) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result =
+            read_once("outer_array.json", R"([{"id":3,"name":"carol"},{"id":4,"name":"dave"}])",
+                      json_scan_params(false, true), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 3);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "carol");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 4);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "dave");
+}
+
+TEST(JsonReaderTest, ReadsJsonRootByLine) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("json_root.jsonl",
+                            R"({"payload":{"id":5,"name":"eve"}})"
+                            "\n"
+                            R"({"payload":{"id":6,"name":"frank"}})"
+                            "\n",
+                            json_scan_params(true, false, "", "$.payload"), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 5);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "eve");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 6);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "frank");
+}
+
+TEST(JsonReaderTest, ReadsJsonPathsBySourceSlotAndReturnsRequestedBlockOrder) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("jsonpaths.jsonl",
+                            R"({"payload":{"id":7,"user":"grace"}})"
+                            "\n"
+                            R"({"payload":{"id":8,"user":"heidi"}})"
+                            "\n",
+                            json_scan_params(true, false, R"(["$.payload.id","$.payload.user"])"),
+                            slots, {1, 0});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 0), "grace");
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 1), "heidi");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 0), 7);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 1), 8);
+}
+
+TEST(JsonReaderTest, ReadsJsonPathsFromSingleDocumentOuterArray) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once(
+            "outer_array_jsonpaths.json",
+            R"([{"payload":{"id":12,"user":"kate"}},{"payload":{"id":13,"user":"leo"}}])",
+            json_scan_params(false, true, R"(["$.payload.id","$.payload.user"])"), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 12);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "kate");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 13);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "leo");
+}
+
+TEST(JsonReaderTest, FillsMissingNullableColumnWithNull) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("missing_nullable.jsonl",
+                            R"({"id":9})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 1);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 9);
+    EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(1).column, 0));
+}
+
+TEST(JsonReaderTest, ReturnsErrorForMissingRequiredColumn) {
+    ObjectPool pool;
+    auto slots = build_slots_with_required_name(&pool);
+    auto result = read_once("missing_required.jsonl",
+                            R"({"id":10})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    EXPECT_FALSE(result.status.ok());
+}
+
+TEST(JsonReaderTest, ReadsPresentRequiredColumn) {
+    ObjectPool pool;
+    auto slots = build_slots_with_required_name(&pool);
+    auto result = read_once("present_required.jsonl",
+                            R"({"id":14,"name":"mallory"})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.schema.size(), 2);
+    EXPECT_TRUE(result.schema[0].type->is_nullable());
+    EXPECT_FALSE(result.schema[1].type->is_nullable());
+    ASSERT_EQ(result.rows, 1);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 14);
+    EXPECT_EQ(string_at(*result.block.get_by_position(1).column, 0), "mallory");
+}
+
+TEST(JsonReaderTest, ReturnsErrorForMalformedJsonByDefault) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("malformed_strict.jsonl",
+                            "not-json\n"
+                            R"({"id":11,"name":"judy"})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    EXPECT_FALSE(result.status.ok());
+}
+
+TEST(JsonReaderTest, IgnoresMalformedJsonAsNullRowsWhenConfigured) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("ignore_malformed.jsonl",
+                            "not-json\n"
+                            R"({"id":11,"name":"judy"})"
+                            "\n",
+                            json_scan_params(true, false, "", "", true), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(0).column, 0));
+    EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(1).column, 0));
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 11);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "judy");
+}
+
+TEST(JsonReaderTest, SkipsEmptyJsonLine) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("empty_line.jsonl",
+                            "\n"
+                            R"({"id":15,"name":"nancy"})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 1);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 15);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "nancy");
+}
+
+} // namespace doris::format::json
diff --git a/be/test/format_v2/parquet/parquet_column_reader_test.cpp b/be/test/format_v2/parquet/parquet_column_reader_test.cpp
new file mode 100644
index 00000000000000..91382203c5cea9
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_column_reader_test.cpp
@@ -0,0 +1,3620 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <array>
+#include <filesystem>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_array.h"
+#include "core/column/column_decimal.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/types.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/parquet/selection_vector.h"
+
+namespace doris::format::parquet {
+namespace {
+
+constexpr int64_t ROW_COUNT = 5;
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+template <typename ColumnType>
+const ColumnType& get_nullable_nested_column(const IColumn& column) {
+    // File-local schema exposed by the parquet reader follows Doris external-table semantics:
+    // nested STRUCT fields, LIST elements, and MAP keys/values are nullable even when the parquet
+    // field is required.
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    return assert_cast<const ColumnType&>(nullable_column.get_nested_column());
+}
+
+ParquetColumnSchema mock_column_schema() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "mock";
+    schema.type = std::make_shared<DataTypeInt32>();
+    return schema;
+}
+
+class BaseUnsupportedReader final : public ParquetColumnReader {
+public:
+    BaseUnsupportedReader()
+            : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {}
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override { return Status::OK(); }
+};
+
+class DefaultSelectReader final : public ParquetColumnReader {
+public:
+    DefaultSelectReader() : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {}
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override {
+        auto& values = assert_cast<ColumnInt32&>(*column);
+        for (int64_t row = 0; row < rows; ++row) {
+            values.insert_value(static_cast<int32_t>(_cursor + row));
+        }
+        _cursor += rows;
+        *rows_read = rows;
+        _read_ranges.push_back(rows);
+        return Status::OK();
+    }
+
+    Status skip(int64_t rows) override {
+        _cursor += rows;
+        _skip_ranges.push_back(rows);
+        return Status::OK();
+    }
+
+    const std::vector<int64_t>& read_ranges() const { return _read_ranges; }
+    const std::vector<int64_t>& skip_ranges() const { return _skip_ranges; }
+
+private:
+    int64_t _cursor = 0;
+    std::vector<int64_t> _read_ranges;
+    std::vector<int64_t> _skip_ranges;
+};
+
+class NestedSkipReader final : public ParquetColumnReader {
+public:
+    NestedSkipReader() : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {}
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override { return Status::OK(); }
+
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override {
+        auto& values = assert_cast<ColumnInt32&>(*column);
+        for (int64_t row = 0; row < length_upper_bound; ++row) {
+            values.insert_value(static_cast<int32_t>(row));
+        }
+        *values_read = length_upper_bound;
+        return Status::OK();
+    }
+};
+
+class ParquetColumnReaderTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_parquet_column_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "reader.parquet").string();
+        write_parquet_file();
+        _file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+        auto metadata = _file_reader->metadata();
+        ASSERT_EQ(metadata->num_row_groups(), 1);
+        _row_group = _file_reader->RowGroup(0);
+        ASSERT_NE(_row_group, nullptr);
+        auto schema_descriptor = _file_reader->metadata()->schema();
+        ASSERT_NE(schema_descriptor, nullptr);
+        auto st = build_parquet_column_schema(*schema_descriptor, &_fields);
+        ASSERT_TRUE(st.ok()) << st;
+        ASSERT_EQ(_fields.size(), _expected_by_field.size());
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+    template <typename Builder, typename Value>
+    std::shared_ptr<arrow::Array> build_required_array(const std::vector<Value>& values) {
+        Builder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+        arrow::StringBuilder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int32_array() {
+        arrow::Int32Builder builder;
+        EXPECT_TRUE(builder.Append(1).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(3).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(5).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_all_null_int32_array() {
+        arrow::Int32Builder builder;
+        for (int64_t row = 0; row < ROW_COUNT; ++row) {
+            EXPECT_TRUE(builder.AppendNull().ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_struct_array() {
+        auto struct_type = arrow::struct_({arrow::field("a", arrow::int32(), false),
+                                           arrow::field("b", arrow::utf8(), false)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+        auto* b_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+        const std::vector<int32_t> a_values = {101, 102, 103, 104, 105};
+        const std::vector<std::string> b_values = {"sa", "sb", "sc", "sd", "se"};
+        for (size_t row = 0; row < a_values.size(); ++row) {
+            EXPECT_TRUE(builder.Append().ok());
+            EXPECT_TRUE(a_builder->Append(a_values[row]).ok());
+            EXPECT_TRUE(b_builder->Append(b_values[row]).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_array() {
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+        auto* b_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(201).ok());
+        EXPECT_TRUE(b_builder->Append("nsa").ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(203).ok());
+        EXPECT_TRUE(b_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(204).ok());
+        EXPECT_TRUE(b_builder->Append("nsd").ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_with_decimal_array() {
+        auto decimal_type = arrow::decimal128(38, 6);
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("d", decimal_type, true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto d_array_builder = std::make_unique<arrow::Decimal128Builder>(
+                decimal_type, arrow::default_memory_pool());
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(d_array_builder)));
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+        auto* d_builder = assert_cast<arrow::Decimal128Builder*>(builder.field_builder(1));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(301).ok());
+        EXPECT_TRUE(d_builder->Append(arrow::Decimal128(123456789)).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(303).ok());
+        EXPECT_TRUE(d_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(304).ok());
+        EXPECT_TRUE(d_builder->Append(arrow::Decimal128(-987654321)).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_with_list_array() {
+        auto list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("xs", list_type, true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 value_builder, list_type);
+        field_builders.push_back(list_builder);
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(301).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append(2).ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(303).ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(304).ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(305).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(5).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_with_map_array() {
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("kv", map_type, true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_builder = std::make_shared<arrow::MapBuilder>(
+                arrow::default_memory_pool(), key_builder, value_builder, map_type);
+        field_builders.push_back(map_builder);
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(401).ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append("one").ok());
+        EXPECT_TRUE(key_builder->Append(2).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(403).ok());
+        EXPECT_TRUE(map_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(404).ok());
+        EXPECT_TRUE(map_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(405).ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(5).ok());
+        EXPECT_TRUE(value_builder->Append("five").ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_with_nested_struct_list_array() {
+        auto list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto nested_type = arrow::struct_({arrow::field("xs", list_type, true)});
+        auto struct_type = arrow::struct_({arrow::field("nested", nested_type, true)});
+
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 value_builder, list_type);
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> nested_field_builders;
+        nested_field_builders.push_back(list_builder);
+        auto nested_builder = std::make_shared<arrow::StructBuilder>(
+                nested_type, arrow::default_memory_pool(), std::move(nested_field_builders));
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        field_builders.push_back(nested_builder);
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(nested_builder->Append().ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(7).ok());
+        EXPECT_TRUE(value_builder->Append(8).ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(nested_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(nested_builder->Append().ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(nested_builder->Append().ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_int_list_array() {
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder,
+                                   arrow::list(arrow::field("element", arrow::int32(), false)));
+        const std::vector<std::vector<int32_t>> values = {
+                {1, 2}, {3}, {4, 5, 6}, {7}, {8, 9},
+        };
+        for (const auto& row : values) {
+            EXPECT_TRUE(builder.Append().ok());
+            for (const auto value : row) {
+                EXPECT_TRUE(value_builder->Append(value).ok());
+            }
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_list_array() {
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder,
+                                   arrow::list(arrow::field("element", arrow::int32(), true)));
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->Append(10).ok());
+        EXPECT_TRUE(value_builder->Append(20).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(30).ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->Append(40).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_nullable_int_list_array() {
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder,
+                                   arrow::list(arrow::field("element", arrow::int32(), true)));
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(110).ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->Append(120).ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->Append(130).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_list_array() {
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+        auto struct_builder = std::make_shared<arrow::StructBuilder>(
+                struct_type, arrow::default_memory_pool(), std::move(field_builders));
+        arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder,
+                                   arrow::list(arrow::field("element", struct_type, true)));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(struct_builder->field_builder(0));
+        auto* b_builder = assert_cast<arrow::StringBuilder*>(struct_builder->field_builder(1));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(11).ok());
+        EXPECT_TRUE(b_builder->Append("la").ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(12).ok());
+        EXPECT_TRUE(b_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->AppendNull().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(13).ok());
+        EXPECT_TRUE(b_builder->Append("ld").ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(14).ok());
+        EXPECT_TRUE(b_builder->Append("le").ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_list_list_int_array() {
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        auto inner_list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto inner_list_builder = std::make_shared<arrow::ListBuilder>(
+                arrow::default_memory_pool(), value_builder, inner_list_type);
+        arrow::ListBuilder builder(arrow::default_memory_pool(), inner_list_builder,
+                                   arrow::list(arrow::field("element", inner_list_type, true)));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(inner_list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append(2).ok());
+        EXPECT_TRUE(inner_list_builder->AppendEmptyValue().ok());
+        EXPECT_TRUE(inner_list_builder->AppendNull().ok());
+        EXPECT_TRUE(inner_list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(3).ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(inner_list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(4).ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(inner_list_builder->AppendEmptyValue().ok());
+        EXPECT_TRUE(inner_list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(5).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_int_string_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), false));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder,
+                                  map_type);
+        const std::vector<std::vector<std::pair<int32_t, std::string>>> values = {
+                {{1, "a"}, {2, "b"}}, {{3, "c"}},           {{4, "d"}, {5, "e"}, {6, "f"}},
+                {{7, "g"}},           {{8, "h"}, {9, "i"}},
+        };
+        for (const auto& row : values) {
+            EXPECT_TRUE(builder.Append().ok());
+            for (const auto& [key, value] : row) {
+                EXPECT_TRUE(key_builder->Append(key).ok());
+                EXPECT_TRUE(value_builder->Append(value).ok());
+            }
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_string_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder,
+                                  map_type);
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(10).ok());
+        EXPECT_TRUE(value_builder->Append("aa").ok());
+        EXPECT_TRUE(key_builder->Append(20).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(30).ok());
+        EXPECT_TRUE(value_builder->Append("cc").ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(40).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_nullable_string_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder,
+                                  map_type);
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(101).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(102).ok());
+        EXPECT_TRUE(value_builder->Append("bb").ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(103).ok());
+        EXPECT_TRUE(value_builder->Append("cc").ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(104).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_struct_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+        auto value_builder = std::make_shared<arrow::StructBuilder>(
+                struct_type, arrow::default_memory_pool(), std::move(field_builders));
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", struct_type, true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder,
+                                  map_type);
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(value_builder->field_builder(0));
+        auto* b_builder = assert_cast<arrow::StringBuilder*>(value_builder->field_builder(1));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(101).ok());
+        EXPECT_TRUE(value_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(21).ok());
+        EXPECT_TRUE(b_builder->Append("ma").ok());
+        EXPECT_TRUE(key_builder->Append(102).ok());
+        EXPECT_TRUE(value_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(22).ok());
+        EXPECT_TRUE(b_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(103).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(104).ok());
+        EXPECT_TRUE(value_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(24).ok());
+        EXPECT_TRUE(b_builder->Append("me").ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_list_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        auto list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 value_builder, list_type);
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, list_builder,
+                                  map_type);
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(201).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append(2).ok());
+        EXPECT_TRUE(key_builder->Append(202).ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(203).ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(204).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(3).ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(205).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(4).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_map_list_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto map_builder = std::make_shared<arrow::MapBuilder>(
+                arrow::default_memory_pool(), key_builder, value_builder, map_type);
+        arrow::ListBuilder builder(arrow::default_memory_pool(), map_builder,
+                                   arrow::list(arrow::field("element", map_type, true)));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append("a").ok());
+        EXPECT_TRUE(key_builder->Append(2).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(map_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(map_builder->AppendNull().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(3).ok());
+        EXPECT_TRUE(value_builder->Append("c").ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(4).ok());
+        EXPECT_TRUE(value_builder->Append("d").ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_map_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto nested_key_builder = std::make_shared<arrow::Int32Builder>();
+        auto nested_value_builder = std::make_shared<arrow::StringBuilder>();
+        auto nested_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto nested_map_builder = std::make_shared<arrow::MapBuilder>(
+                arrow::default_memory_pool(), nested_key_builder, nested_value_builder,
+                nested_map_type);
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", nested_map_type, true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, nested_map_builder,
+                                  map_type);
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(10).ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(101).ok());
+        EXPECT_TRUE(nested_value_builder->Append("aa").ok());
+        EXPECT_TRUE(key_builder->Append(20).ok());
+        EXPECT_TRUE(nested_map_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(30).ok());
+        EXPECT_TRUE(nested_map_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(40).ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(401).ok());
+        EXPECT_TRUE(nested_value_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_deep_list_struct_map_list_array() {
+        auto element_builder = std::make_shared<arrow::Int32Builder>();
+        auto list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 element_builder, list_type);
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true));
+        auto map_builder = std::make_shared<arrow::MapBuilder>(arrow::default_memory_pool(),
+                                                               key_builder, list_builder, map_type);
+        auto struct_type = arrow::struct_({arrow::field("kv", map_type, true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> struct_field_builders;
+        struct_field_builders.push_back(map_builder);
+        auto struct_builder = std::make_shared<arrow::StructBuilder>(
+                struct_type, arrow::default_memory_pool(), std::move(struct_field_builders));
+        arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder,
+                                   arrow::list(arrow::field("element", struct_type, true)));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(1).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(element_builder->Append(10).ok());
+        EXPECT_TRUE(element_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(2).ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+        EXPECT_TRUE(struct_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(map_builder->AppendNull().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(map_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(3).ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(4).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(element_builder->Append(40).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_deep_map_list_map_array() {
+        auto nested_key_builder = std::make_shared<arrow::Int32Builder>();
+        auto nested_value_builder = std::make_shared<arrow::StringBuilder>();
+        auto nested_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto nested_map_builder = std::make_shared<arrow::MapBuilder>(
+                arrow::default_memory_pool(), nested_key_builder, nested_value_builder,
+                nested_map_type);
+        auto list_type = arrow::list(arrow::field("element", nested_map_type, true));
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 nested_map_builder, list_type);
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, list_builder,
+                                  map_type);
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(10).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(1).ok());
+        EXPECT_TRUE(nested_value_builder->Append("a").ok());
+        EXPECT_TRUE(nested_key_builder->Append(2).ok());
+        EXPECT_TRUE(nested_value_builder->AppendNull().ok());
+        EXPECT_TRUE(nested_map_builder->AppendEmptyValue().ok());
+        EXPECT_TRUE(nested_map_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(20).ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(30).ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(40).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(3).ok());
+        EXPECT_TRUE(nested_value_builder->Append("c").ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(50).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(nested_map_builder->AppendNull().ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(4).ok());
+        EXPECT_TRUE(nested_value_builder->Append("d").ok());
+        return finish_array(&builder);
+    }
+
+    void add_field(const std::shared_ptr<arrow::Field>& field, std::shared_ptr<arrow::Array> array,
+                   std::function<void(const ParquetColumnSchema&, const IColumn&)> validator) {
+        _arrow_fields.push_back(field);
+        _arrays.push_back(std::move(array));
+        _expected_by_field.push_back(std::move(validator));
+    }
+
+    void write_parquet_file() {
+        add_field(arrow::field("int32_col", arrow::int32(), false),
+                  build_required_array<arrow::Int32Builder, int32_t>({10, 20, 30, 40, 50}),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32);
+                      const auto& values = assert_cast<const ColumnInt32&>(column);
+                      EXPECT_EQ(values.get_element(0), 10);
+                      EXPECT_EQ(values.get_element(4), 50);
+                  });
+        add_field(arrow::field("string_col", arrow::utf8(), false),
+                  build_string_array({"alpha", "beta", "gamma", "delta", "epsilon"}),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type_descriptor.is_string_like);
+                      const auto& values = assert_cast<const ColumnString&>(column);
+                      EXPECT_EQ(values.get_data_at(0).to_string(), "alpha");
+                      EXPECT_EQ(values.get_data_at(4).to_string(), "epsilon");
+                  });
+        add_field(arrow::field("nullable_int_col", arrow::int32(), true),
+                  build_nullable_int32_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      const auto& nested_column =
+                              assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_TRUE(nullable_column.is_null_at(3));
+                      EXPECT_EQ(nested_column.get_element(0), 1);
+                      EXPECT_EQ(nested_column.get_element(2), 3);
+                  });
+        add_field(arrow::field("all_null_int_col", arrow::int32(), true),
+                  build_all_null_int32_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      for (size_t row = 0; row < ROW_COUNT; ++row) {
+                          EXPECT_TRUE(nullable_column.is_null_at(row));
+                      }
+                  });
+        add_field(arrow::field("struct_col",
+                               arrow::struct_({
+                                       arrow::field("a", arrow::int32(), false),
+                                       arrow::field("b", arrow::utf8(), false),
+                               }),
+                               false),
+                  build_required_struct_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_STRUCT);
+                      const auto& struct_column = assert_cast<const ColumnStruct&>(column);
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& b_values =
+                              get_nullable_nested_column<ColumnString>(struct_column.get_column(1));
+                      EXPECT_EQ(a_values.get_element(0), 101);
+                      EXPECT_EQ(a_values.get_element(4), 105);
+                      EXPECT_EQ(b_values.get_data_at(1).to_string(), "sb");
+                      EXPECT_EQ(b_values.get_data_at(4).to_string(), "se");
+                  });
+        add_field(arrow::field("nullable_struct_col",
+                               arrow::struct_({
+                                       arrow::field("a", arrow::int32(), false),
+                                       arrow::field("b", arrow::utf8(), true),
+                               }),
+                               true),
+                  build_nullable_struct_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_TRUE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& b_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      const auto& b_nested =
+                              assert_cast<const ColumnString&>(b_values.get_nested_column());
+                      EXPECT_EQ(a_values.get_element(0), 201);
+                      EXPECT_EQ(a_values.get_element(2), 203);
+                      EXPECT_EQ(a_values.get_element(3), 204);
+                      EXPECT_FALSE(b_values.is_null_at(0));
+                      EXPECT_TRUE(b_values.is_null_at(2));
+                      EXPECT_FALSE(b_values.is_null_at(3));
+                      EXPECT_EQ(b_nested.get_data_at(0).to_string(), "nsa");
+                      EXPECT_EQ(b_nested.get_data_at(3).to_string(), "nsd");
+                  });
+        add_field(arrow::field("nullable_struct_decimal_col",
+                               arrow::struct_({
+                                       arrow::field("a", arrow::int32(), false),
+                                       arrow::field("d", arrow::decimal128(38, 6), true),
+                               }),
+                               true),
+                  build_nullable_struct_with_decimal_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_TRUE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& d_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      const auto& d_nested =
+                              assert_cast<const ColumnDecimal128V3&>(d_values.get_nested_column());
+                      EXPECT_EQ(a_values.get_element(0), 301);
+                      EXPECT_EQ(a_values.get_element(2), 303);
+                      EXPECT_EQ(a_values.get_element(3), 304);
+                      EXPECT_FALSE(d_values.is_null_at(0));
+                      EXPECT_TRUE(d_values.is_null_at(2));
+                      EXPECT_FALSE(d_values.is_null_at(3));
+                      EXPECT_EQ(d_nested.get_element(0), Decimal128V3(123456789));
+                      EXPECT_EQ(d_nested.get_element(3), Decimal128V3(-987654321));
+                  });
+        auto struct_list_type = arrow::struct_({
+                arrow::field("a", arrow::int32(), false),
+                arrow::field("xs", arrow::list(arrow::field("element", arrow::int32(), true)),
+                             true),
+        });
+        add_field(arrow::field("nullable_struct_list_col", struct_list_type, true),
+                  build_nullable_struct_with_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      EXPECT_EQ(a_values.get_element(0), 301);
+                      EXPECT_EQ(a_values.get_element(2), 303);
+                      EXPECT_EQ(a_values.get_element(3), 304);
+                      EXPECT_EQ(a_values.get_element(4), 305);
+
+                      const auto& xs_nullable =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      ASSERT_EQ(xs_nullable.size(), ROW_COUNT);
+                      EXPECT_FALSE(xs_nullable.is_null_at(0));
+                      EXPECT_FALSE(xs_nullable.is_null_at(2));
+                      EXPECT_TRUE(xs_nullable.is_null_at(3));
+                      EXPECT_FALSE(xs_nullable.is_null_at(4));
+                      const auto& xs_array =
+                              assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+                      const auto& offsets = xs_array.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 2);
+                      EXPECT_EQ(offsets[4], 4);
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(xs_array.get_data());
+                      ASSERT_EQ(elements.size(), 4);
+                      EXPECT_FALSE(elements.is_null_at(0));
+                      EXPECT_FALSE(elements.is_null_at(1));
+                      EXPECT_TRUE(elements.is_null_at(2));
+                      EXPECT_FALSE(elements.is_null_at(3));
+                      const auto& values =
+                              assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                      EXPECT_EQ(values.get_element(0), 1);
+                      EXPECT_EQ(values.get_element(1), 2);
+                      EXPECT_EQ(values.get_element(3), 5);
+                  });
+        auto struct_map_type = arrow::struct_({
+                arrow::field("a", arrow::int32(), false),
+                arrow::field("kv",
+                             arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)),
+                             true),
+        });
+        add_field(arrow::field("nullable_struct_map_col", struct_map_type, true),
+                  build_nullable_struct_with_map_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      EXPECT_EQ(a_values.get_element(0), 401);
+                      EXPECT_EQ(a_values.get_element(2), 403);
+                      EXPECT_EQ(a_values.get_element(3), 404);
+                      EXPECT_EQ(a_values.get_element(4), 405);
+
+                      const auto& kv_nullable =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      ASSERT_EQ(kv_nullable.size(), ROW_COUNT);
+                      EXPECT_FALSE(kv_nullable.is_null_at(0));
+                      EXPECT_FALSE(kv_nullable.is_null_at(2));
+                      EXPECT_TRUE(kv_nullable.is_null_at(3));
+                      EXPECT_FALSE(kv_nullable.is_null_at(4));
+                      const auto& kv_map =
+                              assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+                      const auto& offsets = kv_map.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 2);
+                      EXPECT_EQ(offsets[4], 3);
+                      const auto& keys = get_nullable_nested_column<ColumnInt32>(kv_map.get_keys());
+                      const auto& values = assert_cast<const ColumnNullable&>(kv_map.get_values());
+                      const auto& value_data =
+                              assert_cast<const ColumnString&>(values.get_nested_column());
+                      ASSERT_EQ(keys.size(), 3);
+                      ASSERT_EQ(values.size(), 3);
+                      EXPECT_EQ(keys.get_element(0), 1);
+                      EXPECT_EQ(keys.get_element(1), 2);
+                      EXPECT_EQ(keys.get_element(2), 5);
+                      EXPECT_EQ(value_data.get_data_at(0).to_string(), "one");
+                      EXPECT_TRUE(values.is_null_at(1));
+                      EXPECT_EQ(value_data.get_data_at(2).to_string(), "five");
+                  });
+        auto nested_struct_list_type = arrow::struct_({
+                arrow::field("nested",
+                             arrow::struct_({
+                                     arrow::field("xs",
+                                                  arrow::list(arrow::field("element",
+                                                                           arrow::int32(), true)),
+                                                  true),
+                             }),
+                             true),
+        });
+        add_field(arrow::field("nullable_struct_nested_struct_list_col", nested_struct_list_type,
+                               true),
+                  build_nullable_struct_with_nested_struct_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      const auto& nested_nullable =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+                      EXPECT_FALSE(nested_nullable.is_null_at(0));
+                      EXPECT_TRUE(nested_nullable.is_null_at(2));
+                      EXPECT_FALSE(nested_nullable.is_null_at(3));
+                      EXPECT_FALSE(nested_nullable.is_null_at(4));
+                  });
+        add_field(arrow::field("list_int_col",
+                               arrow::list(arrow::field("element", arrow::int32(), false)), false),
+                  build_required_int_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_ARRAY);
+                      const auto* array_type =
+                              assert_cast<const DataTypeArray*>(remove_nullable(schema.type).get());
+                      EXPECT_EQ(
+                              remove_nullable(array_type->get_nested_type())->get_primitive_type(),
+                              TYPE_INT);
+                      const auto& array_column = assert_cast<const ColumnArray&>(column);
+                      ASSERT_EQ(array_column.size(), ROW_COUNT);
+                      const auto array_size_at = [&array_column](size_t row_idx) {
+                          return array_column.get_offsets()[row_idx] -
+                                 (row_idx == 0 ? 0 : array_column.get_offsets()[row_idx - 1]);
+                      };
+                      EXPECT_EQ(array_size_at(0), 2);
+                      EXPECT_EQ(array_size_at(1), 1);
+                      EXPECT_EQ(array_size_at(2), 3);
+                      EXPECT_EQ(array_size_at(4), 2);
+                      const auto& values =
+                              get_nullable_nested_column<ColumnInt32>(array_column.get_data());
+                      ASSERT_EQ(values.size(), 9);
+                      EXPECT_EQ(values.get_element(0), 1);
+                      EXPECT_EQ(values.get_element(5), 6);
+                      EXPECT_EQ(values.get_element(8), 9);
+                  });
+        add_field(arrow::field("nullable_list_int_col",
+                               arrow::list(arrow::field("element", arrow::int32(), true)), true),
+                  build_nullable_int_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      const auto& array_column =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& offsets = array_column.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 4);
+                      EXPECT_EQ(offsets[4], 5);
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(array_column.get_data());
+                      const auto& values =
+                              assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                      ASSERT_EQ(elements.size(), 5);
+                      EXPECT_EQ(values.get_element(0), 10);
+                      EXPECT_EQ(values.get_element(1), 20);
+                      EXPECT_TRUE(elements.is_null_at(2));
+                      EXPECT_EQ(values.get_element(3), 30);
+                      EXPECT_EQ(values.get_element(4), 40);
+                  });
+        add_field(arrow::field("required_nullable_list_int_col",
+                               arrow::list(arrow::field("element", arrow::int32(), true)), false),
+                  build_required_nullable_int_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_FALSE(schema.type->is_nullable());
+                      const auto& array_column = assert_cast<const ColumnArray&>(column);
+                      const auto& offsets = array_column.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 0);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 3);
+                      EXPECT_EQ(offsets[3], 5);
+                      EXPECT_EQ(offsets[4], 5);
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(array_column.get_data());
+                      ASSERT_EQ(elements.size(), 5);
+                      EXPECT_TRUE(elements.is_null_at(0));
+                      EXPECT_FALSE(elements.is_null_at(1));
+                      EXPECT_TRUE(elements.is_null_at(4));
+                  });
+        auto list_struct_type = arrow::struct_({
+                arrow::field("a", arrow::int32(), false),
+                arrow::field("b", arrow::utf8(), true),
+        });
+        add_field(arrow::field("nullable_list_struct_col",
+                               arrow::list(arrow::field("element", list_struct_type, true)), true),
+                  build_nullable_struct_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& array_column =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& offsets = array_column.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 4);
+                      EXPECT_EQ(offsets[4], 5);
+
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(array_column.get_data());
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(elements.get_nested_column());
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& b_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      const auto& b_data =
+                              assert_cast<const ColumnString&>(b_values.get_nested_column());
+                      ASSERT_EQ(elements.size(), 5);
+                      EXPECT_FALSE(elements.is_null_at(0));
+                      EXPECT_FALSE(elements.is_null_at(1));
+                      EXPECT_TRUE(elements.is_null_at(2));
+                      EXPECT_FALSE(elements.is_null_at(3));
+                      EXPECT_EQ(a_values.get_element(0), 11);
+                      EXPECT_EQ(a_values.get_element(1), 12);
+                      EXPECT_EQ(a_values.get_element(3), 13);
+                      EXPECT_EQ(a_values.get_element(4), 14);
+                      EXPECT_EQ(b_data.get_data_at(0).to_string(), "la");
+                      EXPECT_TRUE(b_values.is_null_at(1));
+                      EXPECT_EQ(b_data.get_data_at(3).to_string(), "ld");
+                      EXPECT_EQ(b_data.get_data_at(4).to_string(), "le");
+                  });
+        auto nested_list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        add_field(arrow::field("nullable_list_list_int_col",
+                               arrow::list(arrow::field("element", nested_list_type, true)), true),
+                  build_nullable_list_list_int_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& outer_array =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& outer_offsets = outer_array.get_offsets();
+                      ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(outer_offsets[0], 4);
+                      EXPECT_EQ(outer_offsets[1], 4);
+                      EXPECT_EQ(outer_offsets[2], 4);
+                      EXPECT_EQ(outer_offsets[3], 5);
+                      EXPECT_EQ(outer_offsets[4], 7);
+
+                      const auto& inner_nullable =
+                              assert_cast<const ColumnNullable&>(outer_array.get_data());
+                      ASSERT_EQ(inner_nullable.size(), 7);
+                      EXPECT_FALSE(inner_nullable.is_null_at(0));
+                      EXPECT_FALSE(inner_nullable.is_null_at(1));
+                      EXPECT_TRUE(inner_nullable.is_null_at(2));
+                      EXPECT_FALSE(inner_nullable.is_null_at(3));
+                      EXPECT_FALSE(inner_nullable.is_null_at(6));
+
+                      const auto& inner_array =
+                              assert_cast<const ColumnArray&>(inner_nullable.get_nested_column());
+                      const auto& inner_offsets = inner_array.get_offsets();
+                      ASSERT_EQ(inner_offsets.size(), 7);
+                      EXPECT_EQ(inner_offsets[0], 2);
+                      EXPECT_EQ(inner_offsets[1], 2);
+                      EXPECT_EQ(inner_offsets[2], 2);
+                      EXPECT_EQ(inner_offsets[3], 4);
+                      EXPECT_EQ(inner_offsets[4], 5);
+                      EXPECT_EQ(inner_offsets[5], 5);
+                      EXPECT_EQ(inner_offsets[6], 7);
+
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(inner_array.get_data());
+                      const auto& values =
+                              assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                      ASSERT_EQ(elements.size(), 7);
+                      EXPECT_EQ(values.get_element(0), 1);
+                      EXPECT_EQ(values.get_element(1), 2);
+                      EXPECT_TRUE(elements.is_null_at(2));
+                      EXPECT_EQ(values.get_element(3), 3);
+                      EXPECT_EQ(values.get_element(4), 4);
+                      EXPECT_EQ(values.get_element(5), 5);
+                      EXPECT_TRUE(elements.is_null_at(6));
+                  });
+        add_field(arrow::field(
+                          "map_int_string_col",
+                          arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), false)),
+                          false),
+                  build_required_int_string_map_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_MAP);
+                      const auto* map_type =
+                              assert_cast<const DataTypeMap*>(remove_nullable(schema.type).get());
+                      EXPECT_EQ(remove_nullable(map_type->get_key_type())->get_primitive_type(),
+                                TYPE_INT);
+                      EXPECT_EQ(remove_nullable(map_type->get_value_type())->get_primitive_type(),
+                                TYPE_STRING);
+                      const auto& map_column = assert_cast<const ColumnMap&>(column);
+                      ASSERT_EQ(map_column.size(), ROW_COUNT);
+                      const auto map_size_at = [&map_column](size_t row_idx) {
+                          return map_column.get_offsets()[row_idx] -
+                                 (row_idx == 0 ? 0 : map_column.get_offsets()[row_idx - 1]);
+                      };
+                      EXPECT_EQ(map_size_at(0), 2);
+                      EXPECT_EQ(map_size_at(1), 1);
+                      EXPECT_EQ(map_size_at(2), 3);
+                      EXPECT_EQ(map_size_at(4), 2);
+                      const auto& keys =
+                              get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                      const auto& values =
+                              get_nullable_nested_column<ColumnString>(map_column.get_values());
+                      ASSERT_EQ(keys.size(), 9);
+                      ASSERT_EQ(values.size(), 9);
+                      EXPECT_EQ(keys.get_element(0), 1);
+                      EXPECT_EQ(keys.get_element(5), 6);
+                      EXPECT_EQ(keys.get_element(8), 9);
+                      EXPECT_EQ(values.get_data_at(0).to_string(), "a");
+                      EXPECT_EQ(values.get_data_at(5).to_string(), "f");
+                      EXPECT_EQ(values.get_data_at(8).to_string(), "i");
+                  });
+        add_field(
+                arrow::field("nullable_map_int_string_col",
+                             arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)),
+                             true),
+                build_nullable_int_string_map_array(),
+                [](const ParquetColumnSchema& schema, const IColumn& column) {
+                    EXPECT_TRUE(schema.type->is_nullable());
+                    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                    EXPECT_FALSE(nullable_column.is_null_at(0));
+                    EXPECT_TRUE(nullable_column.is_null_at(1));
+                    EXPECT_FALSE(nullable_column.is_null_at(2));
+                    const auto& map_column =
+                            assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                    const auto& offsets = map_column.get_offsets();
+                    ASSERT_EQ(offsets.size(), ROW_COUNT);
+                    EXPECT_EQ(offsets[0], 2);
+                    EXPECT_EQ(offsets[1], 2);
+                    EXPECT_EQ(offsets[2], 2);
+                    EXPECT_EQ(offsets[3], 3);
+                    EXPECT_EQ(offsets[4], 4);
+                    const auto& keys =
+                            get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                    const auto& values =
+                            assert_cast<const ColumnNullable&>(map_column.get_values());
+                    const auto& value_data =
+                            assert_cast<const ColumnString&>(values.get_nested_column());
+                    ASSERT_EQ(keys.size(), 4);
+                    EXPECT_EQ(keys.get_element(0), 10);
+                    EXPECT_EQ(keys.get_element(1), 20);
+                    EXPECT_EQ(keys.get_element(3), 40);
+                    EXPECT_EQ(value_data.get_data_at(0).to_string(), "aa");
+                    EXPECT_TRUE(values.is_null_at(1));
+                    EXPECT_EQ(value_data.get_data_at(2).to_string(), "cc");
+                    EXPECT_TRUE(values.is_null_at(3));
+                });
+        add_field(
+                arrow::field("required_nullable_map_int_string_col",
+                             arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)),
+                             false),
+                build_required_nullable_string_map_array(),
+                [](const ParquetColumnSchema& schema, const IColumn& column) {
+                    EXPECT_FALSE(schema.type->is_nullable());
+                    const auto& map_column = assert_cast<const ColumnMap&>(column);
+                    const auto& offsets = map_column.get_offsets();
+                    ASSERT_EQ(offsets.size(), ROW_COUNT);
+                    EXPECT_EQ(offsets[0], 0);
+                    EXPECT_EQ(offsets[1], 2);
+                    EXPECT_EQ(offsets[2], 3);
+                    EXPECT_EQ(offsets[3], 3);
+                    EXPECT_EQ(offsets[4], 4);
+                    const auto& values =
+                            assert_cast<const ColumnNullable&>(map_column.get_values());
+                    ASSERT_EQ(values.size(), 4);
+                    EXPECT_TRUE(values.is_null_at(0));
+                    EXPECT_FALSE(values.is_null_at(1));
+                    EXPECT_TRUE(values.is_null_at(3));
+                });
+        auto map_struct_type = arrow::struct_({
+                arrow::field("a", arrow::int32(), false),
+                arrow::field("b", arrow::utf8(), true),
+        });
+        add_field(arrow::field(
+                          "nullable_map_int_struct_col",
+                          arrow::map(arrow::int32(), arrow::field("value", map_struct_type, true)),
+                          true),
+                  build_nullable_int_struct_map_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& map_column =
+                              assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                      const auto& offsets = map_column.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 3);
+                      EXPECT_EQ(offsets[4], 4);
+
+                      const auto& keys =
+                              get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                      const auto& values =
+                              assert_cast<const ColumnNullable&>(map_column.get_values());
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(values.get_nested_column());
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& b_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      const auto& b_data =
+                              assert_cast<const ColumnString&>(b_values.get_nested_column());
+                      ASSERT_EQ(keys.size(), 4);
+                      ASSERT_EQ(values.size(), 4);
+                      EXPECT_EQ(keys.get_element(0), 101);
+                      EXPECT_EQ(keys.get_element(1), 102);
+                      EXPECT_EQ(keys.get_element(3), 104);
+                      EXPECT_FALSE(values.is_null_at(0));
+                      EXPECT_FALSE(values.is_null_at(1));
+                      EXPECT_TRUE(values.is_null_at(2));
+                      EXPECT_FALSE(values.is_null_at(3));
+                      EXPECT_EQ(a_values.get_element(0), 21);
+                      EXPECT_EQ(a_values.get_element(1), 22);
+                      EXPECT_EQ(a_values.get_element(3), 24);
+                      EXPECT_EQ(b_data.get_data_at(0).to_string(), "ma");
+                      EXPECT_TRUE(b_values.is_null_at(1));
+                      EXPECT_EQ(b_data.get_data_at(3).to_string(), "me");
+                  });
+        auto map_list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        add_field(
+                arrow::field("nullable_map_int_list_col",
+                             arrow::map(arrow::int32(), arrow::field("value", map_list_type, true)),
+                             true),
+                build_nullable_int_list_map_array(),
+                [](const ParquetColumnSchema& schema, const IColumn& column) {
+                    EXPECT_TRUE(schema.type->is_nullable());
+                    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                    EXPECT_FALSE(nullable_column.is_null_at(0));
+                    EXPECT_TRUE(nullable_column.is_null_at(1));
+                    EXPECT_FALSE(nullable_column.is_null_at(2));
+                    EXPECT_FALSE(nullable_column.is_null_at(3));
+                    EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                    const auto& map_column =
+                            assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                    const auto& map_offsets = map_column.get_offsets();
+                    ASSERT_EQ(map_offsets.size(), ROW_COUNT);
+                    EXPECT_EQ(map_offsets[0], 2);
+                    EXPECT_EQ(map_offsets[1], 2);
+                    EXPECT_EQ(map_offsets[2], 2);
+                    EXPECT_EQ(map_offsets[3], 4);
+                    EXPECT_EQ(map_offsets[4], 5);
+
+                    const auto& keys =
+                            get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                    ASSERT_EQ(keys.size(), 5);
+                    EXPECT_EQ(keys.get_element(0), 201);
+                    EXPECT_EQ(keys.get_element(1), 202);
+                    EXPECT_EQ(keys.get_element(2), 203);
+                    EXPECT_EQ(keys.get_element(3), 204);
+                    EXPECT_EQ(keys.get_element(4), 205);
+
+                    const auto& values =
+                            assert_cast<const ColumnNullable&>(map_column.get_values());
+                    ASSERT_EQ(values.size(), 5);
+                    EXPECT_FALSE(values.is_null_at(0));
+                    EXPECT_FALSE(values.is_null_at(1));
+                    EXPECT_TRUE(values.is_null_at(2));
+                    EXPECT_FALSE(values.is_null_at(3));
+                    EXPECT_FALSE(values.is_null_at(4));
+
+                    const auto& list_column =
+                            assert_cast<const ColumnArray&>(values.get_nested_column());
+                    const auto& list_offsets = list_column.get_offsets();
+                    ASSERT_EQ(list_offsets.size(), 5);
+                    EXPECT_EQ(list_offsets[0], 2);
+                    EXPECT_EQ(list_offsets[1], 2);
+                    EXPECT_EQ(list_offsets[2], 2);
+                    EXPECT_EQ(list_offsets[3], 4);
+                    EXPECT_EQ(list_offsets[4], 5);
+
+                    const auto& elements =
+                            assert_cast<const ColumnNullable&>(list_column.get_data());
+                    const auto& element_values =
+                            assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                    ASSERT_EQ(elements.size(), 5);
+                    EXPECT_EQ(element_values.get_element(0), 1);
+                    EXPECT_EQ(element_values.get_element(1), 2);
+                    EXPECT_TRUE(elements.is_null_at(2));
+                    EXPECT_EQ(element_values.get_element(3), 3);
+                    EXPECT_EQ(element_values.get_element(4), 4);
+                });
+        auto list_map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        add_field(arrow::field("nullable_list_map_int_string_col",
+                               arrow::list(arrow::field("element", list_map_type, true)), true),
+                  build_nullable_map_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& outer_array =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& outer_offsets = outer_array.get_offsets();
+                      ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(outer_offsets[0], 2);
+                      EXPECT_EQ(outer_offsets[1], 2);
+                      EXPECT_EQ(outer_offsets[2], 2);
+                      EXPECT_EQ(outer_offsets[3], 4);
+                      EXPECT_EQ(outer_offsets[4], 5);
+
+                      const auto& map_values =
+                              assert_cast<const ColumnNullable&>(outer_array.get_data());
+                      ASSERT_EQ(map_values.size(), 5);
+                      EXPECT_FALSE(map_values.is_null_at(0));
+                      EXPECT_FALSE(map_values.is_null_at(1));
+                      EXPECT_TRUE(map_values.is_null_at(2));
+                      EXPECT_FALSE(map_values.is_null_at(3));
+                      EXPECT_FALSE(map_values.is_null_at(4));
+
+                      const auto& map_column =
+                              assert_cast<const ColumnMap&>(map_values.get_nested_column());
+                      const auto& map_offsets = map_column.get_offsets();
+                      ASSERT_EQ(map_offsets.size(), 5);
+                      EXPECT_EQ(map_offsets[0], 2);
+                      EXPECT_EQ(map_offsets[1], 2);
+                      EXPECT_EQ(map_offsets[2], 2);
+                      EXPECT_EQ(map_offsets[3], 3);
+                      EXPECT_EQ(map_offsets[4], 4);
+                      const auto& keys =
+                              get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                      const auto& values =
+                              assert_cast<const ColumnNullable&>(map_column.get_values());
+                      const auto& value_data =
+                              assert_cast<const ColumnString&>(values.get_nested_column());
+                      ASSERT_EQ(keys.size(), 4);
+                      EXPECT_EQ(keys.get_element(0), 1);
+                      EXPECT_EQ(keys.get_element(1), 2);
+                      EXPECT_EQ(keys.get_element(2), 3);
+                      EXPECT_EQ(keys.get_element(3), 4);
+                      EXPECT_EQ(value_data.get_data_at(0).to_string(), "a");
+                      EXPECT_TRUE(values.is_null_at(1));
+                      EXPECT_EQ(value_data.get_data_at(2).to_string(), "c");
+                      EXPECT_EQ(value_data.get_data_at(3).to_string(), "d");
+                  });
+        auto nested_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        add_field(arrow::field(
+                          "nullable_map_int_map_int_string_col",
+                          arrow::map(arrow::int32(), arrow::field("value", nested_map_type, true)),
+                          true),
+                  build_nullable_int_map_map_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& outer_map =
+                              assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                      const auto& outer_offsets = outer_map.get_offsets();
+                      ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(outer_offsets[0], 2);
+                      EXPECT_EQ(outer_offsets[1], 2);
+                      EXPECT_EQ(outer_offsets[2], 2);
+                      EXPECT_EQ(outer_offsets[3], 4);
+                      EXPECT_EQ(outer_offsets[4], 4);
+
+                      const auto& outer_keys =
+                              get_nullable_nested_column<ColumnInt32>(outer_map.get_keys());
+                      ASSERT_EQ(outer_keys.size(), 4);
+                      EXPECT_EQ(outer_keys.get_element(0), 10);
+                      EXPECT_EQ(outer_keys.get_element(1), 20);
+                      EXPECT_EQ(outer_keys.get_element(2), 30);
+                      EXPECT_EQ(outer_keys.get_element(3), 40);
+
+                      const auto& inner_values =
+                              assert_cast<const ColumnNullable&>(outer_map.get_values());
+                      ASSERT_EQ(inner_values.size(), 4);
+                      EXPECT_FALSE(inner_values.is_null_at(0));
+                      EXPECT_FALSE(inner_values.is_null_at(1));
+                      EXPECT_TRUE(inner_values.is_null_at(2));
+                      EXPECT_FALSE(inner_values.is_null_at(3));
+
+                      const auto& inner_map =
+                              assert_cast<const ColumnMap&>(inner_values.get_nested_column());
+                      const auto& inner_offsets = inner_map.get_offsets();
+                      ASSERT_EQ(inner_offsets.size(), 4);
+                      EXPECT_EQ(inner_offsets[0], 1);
+                      EXPECT_EQ(inner_offsets[1], 1);
+                      EXPECT_EQ(inner_offsets[2], 1);
+                      EXPECT_EQ(inner_offsets[3], 2);
+                      const auto& inner_keys =
+                              get_nullable_nested_column<ColumnInt32>(inner_map.get_keys());
+                      const auto& inner_strings =
+                              assert_cast<const ColumnNullable&>(inner_map.get_values());
+                      const auto& inner_string_data =
+                              assert_cast<const ColumnString&>(inner_strings.get_nested_column());
+                      ASSERT_EQ(inner_keys.size(), 2);
+                      EXPECT_EQ(inner_keys.get_element(0), 101);
+                      EXPECT_EQ(inner_keys.get_element(1), 401);
+                      EXPECT_EQ(inner_string_data.get_data_at(0).to_string(), "aa");
+                      EXPECT_TRUE(inner_strings.is_null_at(1));
+                  });
+        auto deep_list_value_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto deep_list_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", deep_list_value_type, true));
+        auto deep_list_struct_type = arrow::struct_({arrow::field("kv", deep_list_map_type, true)});
+        add_field(arrow::field("nullable_list_struct_map_list_col",
+                               arrow::list(arrow::field("element", deep_list_struct_type, true)),
+                               true),
+                  build_deep_list_struct_map_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& outer_array =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& outer_offsets = outer_array.get_offsets();
+                      ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(outer_offsets[0], 2);
+                      EXPECT_EQ(outer_offsets[1], 2);
+                      EXPECT_EQ(outer_offsets[2], 2);
+                      EXPECT_EQ(outer_offsets[3], 4);
+                      EXPECT_EQ(outer_offsets[4], 5);
+
+                      const auto& struct_values =
+                              assert_cast<const ColumnNullable&>(outer_array.get_data());
+                      ASSERT_EQ(struct_values.size(), 5);
+                      EXPECT_FALSE(struct_values.is_null_at(0));
+                      EXPECT_TRUE(struct_values.is_null_at(1));
+                      EXPECT_FALSE(struct_values.is_null_at(2));
+                      EXPECT_FALSE(struct_values.is_null_at(3));
+                      EXPECT_FALSE(struct_values.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(struct_values.get_nested_column());
+                      const auto& map_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+                      ASSERT_EQ(map_values.size(), 5);
+                      EXPECT_FALSE(map_values.is_null_at(0));
+                      EXPECT_TRUE(map_values.is_null_at(1));
+                      EXPECT_TRUE(map_values.is_null_at(2));
+                      EXPECT_FALSE(map_values.is_null_at(3));
+                      EXPECT_FALSE(map_values.is_null_at(4));
+
+                      const auto& map_column =
+                              assert_cast<const ColumnMap&>(map_values.get_nested_column());
+                      const auto& map_offsets = map_column.get_offsets();
+                      ASSERT_EQ(map_offsets.size(), 5);
+                      EXPECT_EQ(map_offsets[0], 2);
+                      EXPECT_EQ(map_offsets[1], 2);
+                      EXPECT_EQ(map_offsets[2], 2);
+                      EXPECT_EQ(map_offsets[3], 2);
+                      EXPECT_EQ(map_offsets[4], 4);
+                      const auto& keys =
+                              get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                      ASSERT_EQ(keys.size(), 4);
+                      EXPECT_EQ(keys.get_element(0), 1);
+                      EXPECT_EQ(keys.get_element(1), 2);
+                      EXPECT_EQ(keys.get_element(2), 3);
+                      EXPECT_EQ(keys.get_element(3), 4);
+
+                      const auto& lists =
+                              assert_cast<const ColumnNullable&>(map_column.get_values());
+                      ASSERT_EQ(lists.size(), 4);
+                      EXPECT_FALSE(lists.is_null_at(0));
+                      EXPECT_FALSE(lists.is_null_at(1));
+                      EXPECT_TRUE(lists.is_null_at(2));
+                      EXPECT_FALSE(lists.is_null_at(3));
+                      const auto& list_column =
+                              assert_cast<const ColumnArray&>(lists.get_nested_column());
+                      const auto& list_offsets = list_column.get_offsets();
+                      ASSERT_EQ(list_offsets.size(), 4);
+                      EXPECT_EQ(list_offsets[0], 2);
+                      EXPECT_EQ(list_offsets[1], 2);
+                      EXPECT_EQ(list_offsets[2], 2);
+                      EXPECT_EQ(list_offsets[3], 3);
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(list_column.get_data());
+                      const auto& element_values =
+                              assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                      ASSERT_EQ(elements.size(), 3);
+                      EXPECT_EQ(element_values.get_element(0), 10);
+                      EXPECT_TRUE(elements.is_null_at(1));
+                      EXPECT_EQ(element_values.get_element(2), 40);
+                  });
+        auto deep_map_nested_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto deep_map_list_type =
+                arrow::list(arrow::field("element", deep_map_nested_map_type, true));
+        add_field(
+                arrow::field(
+                        "nullable_map_int_list_map_int_string_col",
+                        arrow::map(arrow::int32(), arrow::field("value", deep_map_list_type, true)),
+                        true),
+                build_deep_map_list_map_array(),
+                [](const ParquetColumnSchema& schema, const IColumn& column) {
+                    EXPECT_TRUE(schema.type->is_nullable());
+                    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                    EXPECT_FALSE(nullable_column.is_null_at(0));
+                    EXPECT_TRUE(nullable_column.is_null_at(1));
+                    EXPECT_FALSE(nullable_column.is_null_at(2));
+                    EXPECT_FALSE(nullable_column.is_null_at(3));
+                    EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                    const auto& outer_map =
+                            assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                    const auto& outer_offsets = outer_map.get_offsets();
+                    ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                    EXPECT_EQ(outer_offsets[0], 2);
+                    EXPECT_EQ(outer_offsets[1], 2);
+                    EXPECT_EQ(outer_offsets[2], 2);
+                    EXPECT_EQ(outer_offsets[3], 4);
+                    EXPECT_EQ(outer_offsets[4], 5);
+                    const auto& outer_keys =
+                            get_nullable_nested_column<ColumnInt32>(outer_map.get_keys());
+                    ASSERT_EQ(outer_keys.size(), 5);
+                    EXPECT_EQ(outer_keys.get_element(0), 10);
+                    EXPECT_EQ(outer_keys.get_element(1), 20);
+                    EXPECT_EQ(outer_keys.get_element(2), 30);
+                    EXPECT_EQ(outer_keys.get_element(3), 40);
+                    EXPECT_EQ(outer_keys.get_element(4), 50);
+
+                    const auto& lists = assert_cast<const ColumnNullable&>(outer_map.get_values());
+                    ASSERT_EQ(lists.size(), 5);
+                    EXPECT_FALSE(lists.is_null_at(0));
+                    EXPECT_FALSE(lists.is_null_at(1));
+                    EXPECT_TRUE(lists.is_null_at(2));
+                    EXPECT_FALSE(lists.is_null_at(3));
+                    EXPECT_FALSE(lists.is_null_at(4));
+                    const auto& list_column =
+                            assert_cast<const ColumnArray&>(lists.get_nested_column());
+                    const auto& list_offsets = list_column.get_offsets();
+                    ASSERT_EQ(list_offsets.size(), 5);
+                    EXPECT_EQ(list_offsets[0], 3);
+                    EXPECT_EQ(list_offsets[1], 3);
+                    EXPECT_EQ(list_offsets[2], 3);
+                    EXPECT_EQ(list_offsets[3], 4);
+                    EXPECT_EQ(list_offsets[4], 6);
+
+                    const auto& inner_maps =
+                            assert_cast<const ColumnNullable&>(list_column.get_data());
+                    ASSERT_EQ(inner_maps.size(), 6);
+                    EXPECT_FALSE(inner_maps.is_null_at(0));
+                    EXPECT_FALSE(inner_maps.is_null_at(1));
+                    EXPECT_TRUE(inner_maps.is_null_at(2));
+                    EXPECT_FALSE(inner_maps.is_null_at(3));
+                    EXPECT_TRUE(inner_maps.is_null_at(4));
+                    EXPECT_FALSE(inner_maps.is_null_at(5));
+                    const auto& inner_map_column =
+                            assert_cast<const ColumnMap&>(inner_maps.get_nested_column());
+                    const auto& inner_offsets = inner_map_column.get_offsets();
+                    ASSERT_EQ(inner_offsets.size(), 6);
+                    EXPECT_EQ(inner_offsets[0], 2);
+                    EXPECT_EQ(inner_offsets[1], 2);
+                    EXPECT_EQ(inner_offsets[2], 2);
+                    EXPECT_EQ(inner_offsets[3], 3);
+                    EXPECT_EQ(inner_offsets[4], 3);
+                    EXPECT_EQ(inner_offsets[5], 4);
+                    const auto& inner_keys =
+                            get_nullable_nested_column<ColumnInt32>(inner_map_column.get_keys());
+                    ASSERT_EQ(inner_keys.size(), 4);
+                    EXPECT_EQ(inner_keys.get_element(0), 1);
+                    EXPECT_EQ(inner_keys.get_element(1), 2);
+                    EXPECT_EQ(inner_keys.get_element(2), 3);
+                    EXPECT_EQ(inner_keys.get_element(3), 4);
+                    const auto& strings =
+                            assert_cast<const ColumnNullable&>(inner_map_column.get_values());
+                    const auto& string_data =
+                            assert_cast<const ColumnString&>(strings.get_nested_column());
+                    ASSERT_EQ(strings.size(), 4);
+                    EXPECT_EQ(string_data.get_data_at(0).to_string(), "a");
+                    EXPECT_TRUE(strings.is_null_at(1));
+                    EXPECT_EQ(string_data.get_data_at(2).to_string(), "c");
+                    EXPECT_EQ(string_data.get_data_at(3).to_string(), "d");
+                });
+
+        auto schema = arrow::schema(_arrow_fields);
+        auto table = arrow::Table::Make(schema, _arrays);
+
+        auto file_result = arrow::io::FileOutputStream::Open(_file_path);
+        ASSERT_TRUE(file_result.ok()) << file_result.status();
+        std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+        ::parquet::WriterProperties::Builder builder;
+        builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+        builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+        builder.compression(::parquet::Compression::UNCOMPRESSED);
+        PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                          ROW_COUNT, builder.build()));
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_reader(size_t field_idx) const {
+        ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+        std::unique_ptr<ParquetColumnReader> reader;
+        auto st = factory.create(*_fields[field_idx], &reader);
+        EXPECT_TRUE(st.ok()) << st;
+        return reader;
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_projected_child_reader(size_t field_idx,
+                                                                       size_t child_idx) const {
+        const auto& struct_schema = *_fields[field_idx];
+        EXPECT_LT(child_idx, struct_schema.children.size());
+
+        format::LocalColumnIndex projection;
+        projection.index = struct_schema.local_id;
+        projection.project_all_children = false;
+        format::LocalColumnIndex child_projection;
+        child_projection.index = struct_schema.children[child_idx]->local_id;
+        projection.children.push_back(std::move(child_projection));
+
+        ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+        std::unique_ptr<ParquetColumnReader> reader;
+        auto st = factory.create(struct_schema, &projection, &reader);
+        EXPECT_TRUE(st.ok()) << st;
+        return reader;
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_projected_grandchild_reader(
+            size_t field_idx, size_t child_idx, size_t grandchild_idx) const {
+        const auto& struct_schema = *_fields[field_idx];
+        EXPECT_LT(child_idx, struct_schema.children.size());
+        const auto& child_schema = *struct_schema.children[child_idx];
+        EXPECT_LT(grandchild_idx, child_schema.children.size());
+
+        format::LocalColumnIndex projection;
+        projection.index = struct_schema.local_id;
+        projection.project_all_children = false;
+        format::LocalColumnIndex child_projection;
+        child_projection.index = child_schema.local_id;
+        child_projection.project_all_children = false;
+        format::LocalColumnIndex grandchild_projection;
+        grandchild_projection.index = child_schema.children[grandchild_idx]->local_id;
+        child_projection.children.push_back(std::move(grandchild_projection));
+        projection.children.push_back(std::move(child_projection));
+
+        ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+        std::unique_ptr<ParquetColumnReader> reader;
+        auto st = factory.create(struct_schema, &projection, &reader);
+        EXPECT_TRUE(st.ok()) << st;
+        return reader;
+    }
+
+    void read_and_validate(size_t field_idx) const {
+        auto reader = create_reader(field_idx);
+        ASSERT_NE(reader, nullptr);
+        MutableColumnPtr column = reader->type()->create_column();
+        int64_t rows_read = 0;
+        auto st = reader->read(ROW_COUNT, column, &rows_read);
+        ASSERT_TRUE(st.ok()) << st;
+        ASSERT_EQ(rows_read, ROW_COUNT);
+        ASSERT_EQ(column->size(), ROW_COUNT);
+        _expected_by_field[field_idx](*_fields[field_idx], *column);
+    }
+
+    size_t find_field_idx(const std::string& name) const {
+        for (size_t field_idx = 0; field_idx < _fields.size(); ++field_idx) {
+            if (_fields[field_idx]->name == name) {
+                return field_idx;
+            }
+        }
+        ADD_FAILURE() << "Cannot find parquet test field " << name;
+        return _fields.size();
+    }
+
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+    std::unique_ptr<::parquet::ParquetFileReader> _file_reader;
+    std::shared_ptr<::parquet::RowGroupReader> _row_group;
+    std::vector<std::unique_ptr<ParquetColumnSchema>> _fields;
+    std::vector<std::shared_ptr<arrow::Field>> _arrow_fields;
+    std::vector<std::shared_ptr<arrow::Array>> _arrays;
+    std::vector<std::function<void(const ParquetColumnSchema&, const IColumn&)>> _expected_by_field;
+};
+
+TEST(ParquetColumnReaderBaseTest, SelectionVectorRangesAndValidation) {
+    SelectionVector identity;
+    ASSERT_TRUE(identity.verify(4, 5).ok());
+    auto ranges = selection_to_ranges(identity, 4);
+    ASSERT_EQ(ranges.size(), 1);
+    EXPECT_EQ(ranges[0].start, 0);
+    EXPECT_EQ(ranges[0].length, 4);
+
+    std::array<SelectionVector::Index, 5> selected = {0, 2, 3, 6, 6};
+    SelectionVector external(selected.data(), 4);
+    auto status = external.verify(3, 7);
+    ASSERT_TRUE(status.ok()) << status;
+    ranges = selection_to_ranges(external, 3);
+    ASSERT_EQ(ranges.size(), 2);
+    EXPECT_EQ(ranges[0].start, 0);
+    EXPECT_EQ(ranges[0].length, 1);
+    EXPECT_EQ(ranges[1].start, 2);
+    EXPECT_EQ(ranges[1].length, 2);
+
+    EXPECT_FALSE(external.verify(8, 7).ok());
+    EXPECT_FALSE(external.verify(5, 7).ok());
+    EXPECT_FALSE(external.verify(4, 6).ok());
+
+    std::array<SelectionVector::Index, 3> duplicate = {0, 2, 2};
+    SelectionVector non_strict(duplicate.data(), duplicate.size());
+    EXPECT_FALSE(non_strict.verify(3, 5).ok());
+    EXPECT_FALSE(identity.verify(1, -1).ok());
+}
+
+TEST(ParquetColumnReaderBaseTest, DefaultSelectUsesSkipReadRangesAndSkipNestedUsesBuild) {
+    DefaultSelectReader reader;
+    std::array<SelectionVector::Index, 3> selected = {1, 3, 4};
+    SelectionVector selection(selected.data(), selected.size());
+    auto column = ColumnInt32::create();
+    MutableColumnPtr mutable_column = std::move(column);
+    auto status = reader.select(selection, selected.size(), 6, mutable_column);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& values = assert_cast<const ColumnInt32&>(*mutable_column);
+    ASSERT_EQ(values.size(), 3);
+    EXPECT_EQ(values.get_element(0), 1);
+    EXPECT_EQ(values.get_element(1), 3);
+    EXPECT_EQ(values.get_element(2), 4);
+    EXPECT_EQ(reader.skip_ranges(), std::vector<int64_t>({1, 1, 1}));
+    EXPECT_EQ(reader.read_ranges(), std::vector<int64_t>({1, 2}));
+
+    BaseUnsupportedReader unsupported_reader;
+    auto skip_status = unsupported_reader.skip(1);
+    EXPECT_FALSE(skip_status.ok());
+    EXPECT_NE(skip_status.to_string().find("skip is not implemented"), std::string::npos);
+    EXPECT_FALSE(unsupported_reader.load_nested_batch(1).ok());
+    int64_t values_read = 0;
+    EXPECT_FALSE(unsupported_reader.build_nested_column(1, mutable_column, &values_read).ok());
+
+    NestedSkipReader nested_reader;
+    auto nested_status = nested_reader.skip_nested_column(3);
+    ASSERT_TRUE(nested_status.ok()) << nested_status;
+}
+
+TEST_F(ParquetColumnReaderTest, ScalarReadCoversRequiredNullableAllNullAndMultipleBatches) {
+    read_and_validate(find_field_idx("int32_col"));
+    read_and_validate(find_field_idx("string_col"));
+    read_and_validate(find_field_idx("nullable_int_col"));
+    read_and_validate(find_field_idx("all_null_int_col"));
+
+    auto reader = create_reader(find_field_idx("int32_col"));
+    auto column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader->read(2, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 2);
+    ASSERT_TRUE(reader->read(3, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 3);
+    const auto& values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(values.size(), ROW_COUNT);
+    EXPECT_EQ(values.get_element(0), 10);
+    EXPECT_EQ(values.get_element(1), 20);
+    EXPECT_EQ(values.get_element(2), 30);
+    EXPECT_EQ(values.get_element(4), 50);
+}
+
+TEST_F(ParquetColumnReaderTest, ScalarSkipCoversZeroSomeAllAndNulls) {
+    auto reader = create_reader(find_field_idx("int32_col"));
+    ASSERT_TRUE(reader->skip(0).ok());
+    auto column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader->read(1, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 1);
+    const auto& first_value = assert_cast<const ColumnInt32&>(*column);
+    EXPECT_EQ(first_value.get_element(0), 10);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    ASSERT_TRUE(reader->skip(2).ok());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->read(2, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 2);
+    const auto& skipped_values = assert_cast<const ColumnInt32&>(*column);
+    EXPECT_EQ(skipped_values.get_element(0), 30);
+    EXPECT_EQ(skipped_values.get_element(1), 40);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    ASSERT_TRUE(reader->skip(ROW_COUNT).ok());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->read(1, column, &rows_read).ok());
+    EXPECT_EQ(rows_read, 0);
+    EXPECT_EQ(column->size(), 0);
+
+    reader = create_reader(find_field_idx("nullable_int_col"));
+    ASSERT_TRUE(reader->skip(1).ok());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->read(2, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 2);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 2);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+}
+
+TEST_F(ParquetColumnReaderTest, ScalarSelectCoversAllDisjointSingleZeroThenReadAndNulls) {
+    auto reader = create_reader(find_field_idx("int32_col"));
+    SelectionVector all_selected(ROW_COUNT);
+    auto column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(all_selected, ROW_COUNT, ROW_COUNT, column).ok());
+    const auto& all_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(all_values.size(), ROW_COUNT);
+    EXPECT_EQ(all_values.get_element(0), 10);
+    EXPECT_EQ(all_values.get_element(4), 50);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    std::array<SelectionVector::Index, 3> disjoint = {0, 2, 4};
+    SelectionVector disjoint_selection(disjoint.data(), disjoint.size());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(disjoint_selection, disjoint.size(), ROW_COUNT, column).ok());
+    const auto& disjoint_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(disjoint_values.size(), 3);
+    EXPECT_EQ(disjoint_values.get_element(0), 10);
+    EXPECT_EQ(disjoint_values.get_element(1), 30);
+    EXPECT_EQ(disjoint_values.get_element(2), 50);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    std::array<SelectionVector::Index, 1> single = {2};
+    SelectionVector single_selection(single.data(), single.size());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(single_selection, single.size(), ROW_COUNT, column).ok());
+    const auto& single_value = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(single_value.size(), 1);
+    EXPECT_EQ(single_value.get_element(0), 30);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    std::array<SelectionVector::Index, 2> first_last = {0, 4};
+    SelectionVector first_last_selection(first_last.data(), first_last.size());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(first_last_selection, first_last.size(), ROW_COUNT, column).ok());
+    const auto& first_last_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(first_last_values.size(), 2);
+    EXPECT_EQ(first_last_values.get_element(0), 10);
+    EXPECT_EQ(first_last_values.get_element(1), 50);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    SelectionVector empty_selection;
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(empty_selection, 0, 2, column).ok());
+    ASSERT_EQ(column->size(), 0);
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader->read(1, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 1);
+    const auto& after_empty_select = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(after_empty_select.size(), 1);
+    EXPECT_EQ(after_empty_select.get_element(0), 30);
+
+    reader = create_reader(find_field_idx("nullable_int_col"));
+    std::array<SelectionVector::Index, 3> nullable_rows = {0, 1, 2};
+    SelectionVector nullable_selection(nullable_rows.data(), nullable_rows.size());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(nullable_selection, nullable_rows.size(), ROW_COUNT, column).ok());
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+}
+
+TEST_F(ParquetColumnReaderTest, FactoryRejectsInvalidScalarInputsAndNestedScalarProjection) {
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    const auto& int_schema = *_fields[find_field_idx("int32_col")];
+    ParquetColumnSchema invalid_leaf;
+    invalid_leaf.kind = ParquetColumnSchemaKind::PRIMITIVE;
+    invalid_leaf.name = "invalid_leaf";
+    invalid_leaf.type = int_schema.type;
+    invalid_leaf.type_descriptor = int_schema.type_descriptor;
+    invalid_leaf.descriptor = int_schema.descriptor;
+    invalid_leaf.leaf_column_id = _file_reader->metadata()->num_columns();
+    auto status = factory.create(invalid_leaf, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid parquet leaf column id"), std::string::npos);
+
+    ParquetColumnSchema null_descriptor;
+    null_descriptor.kind = ParquetColumnSchemaKind::PRIMITIVE;
+    null_descriptor.name = "null_descriptor";
+    null_descriptor.type = int_schema.type;
+    null_descriptor.type_descriptor = int_schema.type_descriptor;
+    null_descriptor.leaf_column_id = int_schema.leaf_column_id;
+    status = factory.create(null_descriptor, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("descriptor is null"), std::string::npos);
+
+    const auto& list_element_schema =
+            *_fields[find_field_idx("nullable_list_int_col")]->children[0];
+    status = factory.create(list_element_schema, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("flat primitive columns"), std::string::npos);
+
+    const auto& list_schema = *_fields[find_field_idx("nullable_list_int_col")];
+    format::LocalColumnIndex projection =
+            format::LocalColumnIndex::partial_local(list_schema.local_id);
+    format::LocalColumnIndex element_projection =
+            format::LocalColumnIndex::partial_local(list_element_schema.local_id);
+    projection.children.push_back(std::move(element_projection));
+    status = factory.create(list_schema, &projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("scalar projection is invalid"), std::string::npos);
+}
+
+TEST_F(ParquetColumnReaderTest, FactoryRejectsInvalidComplexProjections) {
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    const auto& struct_schema = *_fields[find_field_idx("struct_col")];
+    format::LocalColumnIndex struct_empty =
+            format::LocalColumnIndex::partial_local(struct_schema.local_id);
+    auto status = factory.create(struct_schema, &struct_empty, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no children"), std::string::npos);
+
+    format::LocalColumnIndex struct_invalid =
+            format::LocalColumnIndex::partial_local(struct_schema.local_id);
+    struct_invalid.children.push_back(format::LocalColumnIndex::local(9999));
+    status = factory.create(struct_schema, &struct_invalid, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains invalid child"), std::string::npos);
+
+    const auto& list_schema = *_fields[find_field_idx("nullable_list_int_col")];
+    format::LocalColumnIndex list_empty =
+            format::LocalColumnIndex::partial_local(list_schema.local_id);
+    status = factory.create(list_schema, &list_empty, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no element"), std::string::npos);
+
+    const auto& map_schema = *_fields[find_field_idx("nullable_map_int_struct_col")];
+    const auto& value_schema = *map_schema.children[1];
+    format::LocalColumnIndex map_invalid =
+            format::LocalColumnIndex::partial_local(map_schema.local_id);
+    map_invalid.children.push_back(format::LocalColumnIndex::local(value_schema.local_id));
+    map_invalid.children.push_back(format::LocalColumnIndex::local(9999));
+    status = factory.create(map_schema, &map_invalid, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains invalid child"), std::string::npos);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadSupportedComplexTypes) {
+    read_and_validate(find_field_idx("struct_col"));
+    read_and_validate(find_field_idx("nullable_struct_col"));
+    read_and_validate(find_field_idx("nullable_struct_decimal_col"));
+    read_and_validate(find_field_idx("list_int_col"));
+    read_and_validate(find_field_idx("nullable_list_int_col"));
+    read_and_validate(find_field_idx("required_nullable_list_int_col"));
+    read_and_validate(find_field_idx("nullable_list_struct_col"));
+    read_and_validate(find_field_idx("nullable_list_list_int_col"));
+    read_and_validate(find_field_idx("map_int_string_col"));
+    read_and_validate(find_field_idx("nullable_map_int_string_col"));
+    read_and_validate(find_field_idx("required_nullable_map_int_string_col"));
+    read_and_validate(find_field_idx("nullable_map_int_struct_col"));
+    read_and_validate(find_field_idx("nullable_map_int_list_col"));
+    read_and_validate(find_field_idx("nullable_list_map_int_string_col"));
+    read_and_validate(find_field_idx("nullable_map_int_map_int_string_col"));
+    read_and_validate(find_field_idx("nullable_list_struct_map_list_col"));
+    read_and_validate(find_field_idx("nullable_map_int_list_map_int_string_col"));
+}
+
+TEST_F(ParquetColumnReaderTest, SkipThenRead) {
+    auto reader = create_reader(find_field_idx("int32_col"));
+    auto st = reader->skip(2);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+
+    const auto& int_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(int_values.size(), 2);
+    EXPECT_EQ(int_values.get_element(0), 30);
+    EXPECT_EQ(int_values.get_element(1), 40);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectReadsOnlySelectedRanges) {
+    auto reader = create_reader(find_field_idx("int32_col"));
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 2);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& int_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(int_values.size(), 3);
+    EXPECT_EQ(int_values.get_element(0), 10);
+    EXPECT_EQ(int_values.get_element(1), 30);
+    EXPECT_EQ(int_values.get_element(2), 50);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedStructChildren) {
+    const auto field_idx = find_field_idx("struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& struct_schema = *_fields[field_idx];
+    ASSERT_EQ(struct_schema.name, "struct_col");
+    ASSERT_EQ(struct_schema.children.size(), 2);
+
+    format::LocalColumnIndex projection;
+    projection.index = struct_schema.local_id;
+    projection.project_all_children = false;
+    format::LocalColumnIndex child_projection;
+    child_projection.index = struct_schema.children[1]->local_id;
+    projection.children.push_back(std::move(child_projection));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    auto st = factory.create(struct_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(remove_nullable(reader->type())->get_primitive_type(), TYPE_STRUCT);
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(reader->type()).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "b");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+    const auto& struct_column = assert_cast<const ColumnStruct&>(*column);
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& values = get_nullable_nested_column<ColumnString>(struct_column.get_column(0));
+    EXPECT_EQ(values.get_data_at(0).to_string(), "sa");
+    EXPECT_EQ(values.get_data_at(4).to_string(), "se");
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedNullableStructChildren) {
+    const auto field_idx = find_field_idx("nullable_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& struct_schema = *_fields[field_idx];
+    ASSERT_EQ(struct_schema.name, "nullable_struct_col");
+    ASSERT_EQ(struct_schema.children.size(), 2);
+
+    format::LocalColumnIndex projection;
+    projection.index = struct_schema.local_id;
+    projection.project_all_children = false;
+    format::LocalColumnIndex child_projection;
+    child_projection.index = struct_schema.children[1]->local_id;
+    projection.children.push_back(std::move(child_projection));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    auto st = factory.create(struct_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_TRUE(reader->type()->is_nullable());
+    ASSERT_EQ(remove_nullable(reader->type())->get_primitive_type(), TYPE_STRUCT);
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(reader->type()).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "b");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_TRUE(nullable_column.is_null_at(4));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    const auto& nested_values = assert_cast<const ColumnString&>(values.get_nested_column());
+    EXPECT_FALSE(values.is_null_at(0));
+    EXPECT_TRUE(values.is_null_at(2));
+    EXPECT_FALSE(values.is_null_at(3));
+    EXPECT_EQ(nested_values.get_data_at(0).to_string(), "nsa");
+    EXPECT_EQ(nested_values.get_data_at(3).to_string(), "nsd");
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedListStructElementChildren) {
+    const auto field_idx = find_field_idx("nullable_list_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& list_schema = *_fields[field_idx];
+    ASSERT_EQ(list_schema.name, "nullable_list_struct_col");
+    ASSERT_EQ(list_schema.children.size(), 1);
+    const auto& element_schema = *list_schema.children[0];
+    ASSERT_EQ(element_schema.children.size(), 2);
+
+    format::LocalColumnIndex projection;
+    projection.index = list_schema.local_id;
+    projection.project_all_children = false;
+    format::LocalColumnIndex element_projection;
+    element_projection.index = element_schema.local_id;
+    element_projection.project_all_children = false;
+    format::LocalColumnIndex child_projection;
+    child_projection.index = element_schema.children[1]->local_id;
+    element_projection.children.push_back(std::move(child_projection));
+    projection.children.push_back(std::move(element_projection));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    auto st = factory.create(list_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_TRUE(reader->type()->is_nullable());
+    const auto* array_type =
+            assert_cast<const DataTypeArray*>(remove_nullable(reader->type()).get());
+    const auto* element_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(array_type->get_nested_type()).get());
+    ASSERT_EQ(element_type->get_elements().size(), 1);
+    EXPECT_EQ(element_type->get_element_name(0), "b");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& elements = assert_cast<const ColumnNullable&>(array_column.get_data());
+    const auto& struct_column = assert_cast<const ColumnStruct&>(elements.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& b_values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    const auto& b_data = assert_cast<const ColumnString&>(b_values.get_nested_column());
+    ASSERT_EQ(elements.size(), 5);
+    EXPECT_EQ(b_data.get_data_at(0).to_string(), "la");
+    EXPECT_TRUE(b_values.is_null_at(1));
+    EXPECT_TRUE(elements.is_null_at(2));
+    EXPECT_EQ(b_data.get_data_at(3).to_string(), "ld");
+    EXPECT_EQ(b_data.get_data_at(4).to_string(), "le");
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedMapStructValueChildren) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& map_schema = *_fields[field_idx];
+    ASSERT_EQ(map_schema.name, "nullable_map_int_struct_col");
+    ASSERT_EQ(map_schema.children.size(), 2);
+    const auto& value_schema = *map_schema.children[1];
+    ASSERT_EQ(value_schema.children.size(), 2);
+
+    format::LocalColumnIndex projection;
+    projection.index = map_schema.local_id;
+    projection.project_all_children = false;
+    format::LocalColumnIndex value_projection;
+    value_projection.index = value_schema.local_id;
+    value_projection.project_all_children = false;
+    format::LocalColumnIndex child_projection;
+    child_projection.index = value_schema.children[1]->local_id;
+    value_projection.children.push_back(std::move(child_projection));
+    projection.children.push_back(std::move(value_projection));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    auto st = factory.create(map_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_TRUE(reader->type()->is_nullable());
+    const auto* map_type = assert_cast<const DataTypeMap*>(remove_nullable(reader->type()).get());
+    EXPECT_EQ(remove_nullable(map_type->get_key_type())->get_primitive_type(), TYPE_INT);
+    const auto* value_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(map_type->get_value_type()).get());
+    ASSERT_EQ(value_type->get_elements().size(), 1);
+    EXPECT_EQ(value_type->get_element_name(0), "b");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    const auto& struct_column = assert_cast<const ColumnStruct&>(values.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& b_values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    const auto& b_data = assert_cast<const ColumnString&>(b_values.get_nested_column());
+    ASSERT_EQ(keys.size(), 4);
+    ASSERT_EQ(values.size(), 4);
+    EXPECT_EQ(keys.get_element(0), 101);
+    EXPECT_EQ(keys.get_element(1), 102);
+    EXPECT_EQ(keys.get_element(3), 104);
+    EXPECT_EQ(b_data.get_data_at(0).to_string(), "ma");
+    EXPECT_TRUE(b_values.is_null_at(1));
+    EXPECT_TRUE(values.is_null_at(2));
+    EXPECT_EQ(b_data.get_data_at(3).to_string(), "me");
+}
+
+TEST_F(ParquetColumnReaderTest, AllowsMapKeyWithValueProjection) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& map_schema = *_fields[field_idx];
+    ASSERT_EQ(map_schema.children.size(), 2);
+    const auto& key_schema = *map_schema.children[0];
+    const auto& value_schema = *map_schema.children[1];
+
+    auto projection = format::LocalColumnIndex::partial_local(map_schema.local_id);
+    projection.children.push_back(format::LocalColumnIndex::local(key_schema.local_id));
+    projection.children.push_back(format::LocalColumnIndex::local(value_schema.local_id));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    const auto st = factory.create(map_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_NE(reader, nullptr);
+}
+
+TEST_F(ParquetColumnReaderTest, RejectMapKeyOnlyProjection) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& map_schema = *_fields[field_idx];
+    ASSERT_EQ(map_schema.children.size(), 2);
+    const auto& key_schema = *map_schema.children[0];
+
+    auto projection = format::LocalColumnIndex::partial_local(map_schema.local_id);
+    projection.children.push_back(format::LocalColumnIndex::local(key_schema.local_id));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    const auto st = factory.create(map_schema, &projection, &reader);
+    ASSERT_FALSE(st.ok());
+    EXPECT_NE(st.to_string().find("contains no value"), std::string::npos);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedStructListChildOnly) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& struct_schema = *_fields[field_idx];
+    ASSERT_EQ(struct_schema.name, "nullable_struct_list_col");
+    ASSERT_EQ(struct_schema.children.size(), 2);
+
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    ASSERT_TRUE(reader->type()->is_nullable());
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(reader->type()).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "xs");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(xs_nullable.size(), ROW_COUNT);
+    EXPECT_FALSE(xs_nullable.is_null_at(0));
+    EXPECT_FALSE(xs_nullable.is_null_at(2));
+    EXPECT_TRUE(xs_nullable.is_null_at(3));
+    EXPECT_FALSE(xs_nullable.is_null_at(4));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), ROW_COUNT);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 2);
+    EXPECT_EQ(offsets[3], 2);
+    EXPECT_EQ(offsets[4], 4);
+    const auto& elements = assert_cast<const ColumnNullable&>(xs_array.get_data());
+    const auto& values = assert_cast<const ColumnInt32&>(elements.get_nested_column());
+    ASSERT_EQ(elements.size(), 4);
+    EXPECT_EQ(values.get_element(0), 1);
+    EXPECT_EQ(values.get_element(1), 2);
+    EXPECT_TRUE(elements.is_null_at(2));
+    EXPECT_EQ(values.get_element(3), 5);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipProjectedStructListChildOnlyThenRead) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(xs_nullable.size(), 3);
+    EXPECT_FALSE(xs_nullable.is_null_at(1));
+    EXPECT_TRUE(xs_nullable.is_null_at(2));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 0);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectProjectedStructListChildOnly) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(xs_nullable.size(), 3);
+    EXPECT_FALSE(xs_nullable.is_null_at(0));
+    EXPECT_TRUE(xs_nullable.is_null_at(1));
+    EXPECT_FALSE(xs_nullable.is_null_at(2));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedStructMapChildOnly) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& struct_schema = *_fields[field_idx];
+    ASSERT_EQ(struct_schema.name, "nullable_struct_map_col");
+    ASSERT_EQ(struct_schema.children.size(), 2);
+
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    ASSERT_TRUE(reader->type()->is_nullable());
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(reader->type()).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "kv");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(kv_nullable.size(), ROW_COUNT);
+    EXPECT_FALSE(kv_nullable.is_null_at(0));
+    EXPECT_FALSE(kv_nullable.is_null_at(2));
+    EXPECT_TRUE(kv_nullable.is_null_at(3));
+    EXPECT_FALSE(kv_nullable.is_null_at(4));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), ROW_COUNT);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 2);
+    EXPECT_EQ(offsets[3], 2);
+    EXPECT_EQ(offsets[4], 3);
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(kv_map.get_keys());
+    const auto& values = assert_cast<const ColumnNullable&>(kv_map.get_values());
+    const auto& value_data = assert_cast<const ColumnString&>(values.get_nested_column());
+    ASSERT_EQ(keys.size(), 3);
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 5);
+    EXPECT_EQ(value_data.get_data_at(0).to_string(), "one");
+    EXPECT_TRUE(values.is_null_at(1));
+    EXPECT_EQ(value_data.get_data_at(2).to_string(), "five");
+}
+
+TEST_F(ParquetColumnReaderTest, NullableStructUsesListChildAsShapeSource) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+}
+
+TEST_F(ParquetColumnReaderTest, NullableStructUsesMapChildAsShapeSource) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+}
+
+TEST_F(ParquetColumnReaderTest, NullableStructUsesNestedStructComplexChildAsShapeSource) {
+    const auto field_idx = find_field_idx("nullable_struct_nested_struct_list_col");
+    auto reader = create_projected_grandchild_reader(field_idx, 0, 0);
+    ASSERT_NE(reader, nullptr);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& nested_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    EXPECT_FALSE(nested_nullable.is_null_at(0));
+    EXPECT_TRUE(nested_nullable.is_null_at(2));
+    EXPECT_FALSE(nested_nullable.is_null_at(3));
+    EXPECT_FALSE(nested_nullable.is_null_at(4));
+}
+
+TEST_F(ParquetColumnReaderTest, SkipProjectedStructMapChildOnlyThenRead) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(kv_nullable.size(), 3);
+    EXPECT_FALSE(kv_nullable.is_null_at(1));
+    EXPECT_TRUE(kv_nullable.is_null_at(2));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 0);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectProjectedStructMapChildOnly) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(kv_nullable.size(), 3);
+    EXPECT_FALSE(kv_nullable.is_null_at(0));
+    EXPECT_TRUE(kv_nullable.is_null_at(1));
+    EXPECT_FALSE(kv_nullable.is_null_at(2));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 3);
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(kv_map.get_keys());
+    ASSERT_EQ(keys.size(), 3);
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 5);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadListWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_list_int_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipListWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_list_int_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& offsets = array_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 2);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectListWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_list_int_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& offsets = array_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 4);
+    EXPECT_EQ(offsets[2], 5);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadStructListWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipStructListWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+    ASSERT_EQ(xs_nullable.size(), 3);
+    EXPECT_FALSE(xs_nullable.is_null_at(1));
+    EXPECT_TRUE(xs_nullable.is_null_at(2));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 0);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectStructListWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& a_values = get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+    EXPECT_EQ(a_values.get_element(0), 301);
+    EXPECT_EQ(a_values.get_element(1), 304);
+    EXPECT_EQ(a_values.get_element(2), 305);
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+    ASSERT_EQ(xs_nullable.size(), 3);
+    EXPECT_FALSE(xs_nullable.is_null_at(0));
+    EXPECT_TRUE(xs_nullable.is_null_at(1));
+    EXPECT_FALSE(xs_nullable.is_null_at(2));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadStructMapWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipStructMapWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+    ASSERT_EQ(kv_nullable.size(), 3);
+    EXPECT_FALSE(kv_nullable.is_null_at(1));
+    EXPECT_TRUE(kv_nullable.is_null_at(2));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 0);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectStructMapWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& a_values = get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+    EXPECT_EQ(a_values.get_element(0), 401);
+    EXPECT_EQ(a_values.get_element(1), 404);
+    EXPECT_EQ(a_values.get_element(2), 405);
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+    ASSERT_EQ(kv_nullable.size(), 3);
+    EXPECT_FALSE(kv_nullable.is_null_at(0));
+    EXPECT_TRUE(kv_nullable.is_null_at(1));
+    EXPECT_FALSE(kv_nullable.is_null_at(2));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 3);
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(kv_map.get_keys());
+    const auto& values = assert_cast<const ColumnNullable&>(kv_map.get_values());
+    const auto& value_data = assert_cast<const ColumnString&>(values.get_nested_column());
+    ASSERT_EQ(keys.size(), 3);
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 5);
+    EXPECT_EQ(value_data.get_data_at(0).to_string(), "one");
+    EXPECT_TRUE(values.is_null_at(1));
+    EXPECT_EQ(value_data.get_data_at(2).to_string(), "five");
+}
+
+TEST_F(ParquetColumnReaderTest, ReadListStructWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_list_struct_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipListStructWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_list_struct_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& offsets = array_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 2);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectListStructWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_list_struct_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& offsets = array_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 4);
+    EXPECT_EQ(offsets[2], 5);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadListListWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_list_list_int_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipListListWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_list_list_int_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 3);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 0);
+    EXPECT_EQ(outer_offsets[2], 1);
+
+    const auto& inner_nullable = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(inner_nullable.size(), 1);
+    EXPECT_FALSE(inner_nullable.is_null_at(0));
+    const auto& inner_array = assert_cast<const ColumnArray&>(inner_nullable.get_nested_column());
+    const auto& inner_offsets = inner_array.get_offsets();
+    ASSERT_EQ(inner_offsets.size(), 1);
+    EXPECT_EQ(inner_offsets[0], 1);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectListListWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_list_list_int_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 3);
+    EXPECT_EQ(outer_offsets[0], 4);
+    EXPECT_EQ(outer_offsets[1], 5);
+    EXPECT_EQ(outer_offsets[2], 7);
+
+    const auto& inner_nullable = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(inner_nullable.size(), 7);
+    EXPECT_TRUE(inner_nullable.is_null_at(2));
+    const auto& inner_array = assert_cast<const ColumnArray&>(inner_nullable.get_nested_column());
+    const auto& inner_offsets = inner_array.get_offsets();
+    ASSERT_EQ(inner_offsets.size(), 7);
+    EXPECT_EQ(inner_offsets[0], 2);
+    EXPECT_EQ(inner_offsets[3], 4);
+    EXPECT_EQ(inner_offsets[4], 5);
+    EXPECT_EQ(inner_offsets[6], 7);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadMapWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipMapWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& offsets = map_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 1);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectMapWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& offsets = map_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 3);
+    EXPECT_EQ(offsets[2], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadMapStructWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipMapStructWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& offsets = map_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 1);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectMapStructWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& offsets = map_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 3);
+    EXPECT_EQ(offsets[2], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadMapListWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipMapListWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& map_offsets = map_column.get_offsets();
+    ASSERT_EQ(map_offsets.size(), 3);
+    EXPECT_EQ(map_offsets[0], 0);
+    EXPECT_EQ(map_offsets[1], 0);
+    EXPECT_EQ(map_offsets[2], 2);
+
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    ASSERT_EQ(values.size(), 2);
+    EXPECT_TRUE(values.is_null_at(0));
+    EXPECT_FALSE(values.is_null_at(1));
+    const auto& list_column = assert_cast<const ColumnArray&>(values.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 2);
+    EXPECT_EQ(list_offsets[0], 0);
+    EXPECT_EQ(list_offsets[1], 2);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectMapListWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& map_offsets = map_column.get_offsets();
+    ASSERT_EQ(map_offsets.size(), 3);
+    EXPECT_EQ(map_offsets[0], 2);
+    EXPECT_EQ(map_offsets[1], 4);
+    EXPECT_EQ(map_offsets[2], 5);
+
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    ASSERT_EQ(values.size(), 5);
+    EXPECT_FALSE(values.is_null_at(0));
+    EXPECT_TRUE(values.is_null_at(2));
+    EXPECT_FALSE(values.is_null_at(4));
+    const auto& list_column = assert_cast<const ColumnArray&>(values.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 5);
+    EXPECT_EQ(list_offsets[0], 2);
+    EXPECT_EQ(list_offsets[1], 2);
+    EXPECT_EQ(list_offsets[2], 2);
+    EXPECT_EQ(list_offsets[3], 4);
+    EXPECT_EQ(list_offsets[4], 5);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadDeepListStructMapListAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_list_struct_map_list_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(1, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 1);
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipDeepListStructMapListThenRead) {
+    const auto field_idx = find_field_idx("nullable_list_struct_map_list_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(4, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 4);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 4);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 4);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 0);
+    EXPECT_EQ(outer_offsets[2], 2);
+    EXPECT_EQ(outer_offsets[3], 3);
+
+    const auto& struct_values = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(struct_values.size(), 3);
+    EXPECT_FALSE(struct_values.is_null_at(0));
+    EXPECT_FALSE(struct_values.is_null_at(1));
+    EXPECT_FALSE(struct_values.is_null_at(2));
+    const auto& struct_column = assert_cast<const ColumnStruct&>(struct_values.get_nested_column());
+    const auto& map_values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(map_values.size(), 3);
+    EXPECT_TRUE(map_values.is_null_at(0));
+    EXPECT_FALSE(map_values.is_null_at(1));
+    EXPECT_FALSE(map_values.is_null_at(2));
+
+    const auto& map_column = assert_cast<const ColumnMap&>(map_values.get_nested_column());
+    const auto& map_offsets = map_column.get_offsets();
+    ASSERT_EQ(map_offsets.size(), 3);
+    EXPECT_EQ(map_offsets[0], 0);
+    EXPECT_EQ(map_offsets[1], 0);
+    EXPECT_EQ(map_offsets[2], 2);
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+    ASSERT_EQ(keys.size(), 2);
+    EXPECT_EQ(keys.get_element(0), 3);
+    EXPECT_EQ(keys.get_element(1), 4);
+    const auto& lists = assert_cast<const ColumnNullable&>(map_column.get_values());
+    ASSERT_EQ(lists.size(), 2);
+    EXPECT_TRUE(lists.is_null_at(0));
+    EXPECT_FALSE(lists.is_null_at(1));
+    const auto& list_column = assert_cast<const ColumnArray&>(lists.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 2);
+    EXPECT_EQ(list_offsets[0], 0);
+    EXPECT_EQ(list_offsets[1], 1);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectDeepListStructMapList) {
+    const auto field_idx = find_field_idx("nullable_list_struct_map_list_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 3);
+    EXPECT_EQ(outer_offsets[0], 2);
+    EXPECT_EQ(outer_offsets[1], 4);
+    EXPECT_EQ(outer_offsets[2], 5);
+
+    const auto& struct_values = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(struct_values.size(), 5);
+    EXPECT_FALSE(struct_values.is_null_at(0));
+    EXPECT_TRUE(struct_values.is_null_at(1));
+    EXPECT_FALSE(struct_values.is_null_at(2));
+    EXPECT_FALSE(struct_values.is_null_at(3));
+    EXPECT_FALSE(struct_values.is_null_at(4));
+    const auto& struct_column = assert_cast<const ColumnStruct&>(struct_values.get_nested_column());
+    const auto& map_values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(map_values.size(), 5);
+    EXPECT_FALSE(map_values.is_null_at(0));
+    EXPECT_TRUE(map_values.is_null_at(1));
+    EXPECT_TRUE(map_values.is_null_at(2));
+    EXPECT_FALSE(map_values.is_null_at(3));
+    EXPECT_FALSE(map_values.is_null_at(4));
+    const auto& map_column = assert_cast<const ColumnMap&>(map_values.get_nested_column());
+    const auto& map_offsets = map_column.get_offsets();
+    ASSERT_EQ(map_offsets.size(), 5);
+    EXPECT_EQ(map_offsets[0], 2);
+    EXPECT_EQ(map_offsets[1], 2);
+    EXPECT_EQ(map_offsets[2], 2);
+    EXPECT_EQ(map_offsets[3], 2);
+    EXPECT_EQ(map_offsets[4], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadDeepMapListMapAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(1, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 1);
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipDeepMapListMapThenRead) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(4, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 4);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 4);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    const auto& outer_map = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_map.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 4);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 0);
+    EXPECT_EQ(outer_offsets[2], 2);
+    EXPECT_EQ(outer_offsets[3], 3);
+    const auto& outer_keys = get_nullable_nested_column<ColumnInt32>(outer_map.get_keys());
+    ASSERT_EQ(outer_keys.size(), 3);
+    EXPECT_EQ(outer_keys.get_element(0), 30);
+    EXPECT_EQ(outer_keys.get_element(1), 40);
+    EXPECT_EQ(outer_keys.get_element(2), 50);
+
+    const auto& lists = assert_cast<const ColumnNullable&>(outer_map.get_values());
+    ASSERT_EQ(lists.size(), 3);
+    EXPECT_TRUE(lists.is_null_at(0));
+    EXPECT_FALSE(lists.is_null_at(1));
+    EXPECT_FALSE(lists.is_null_at(2));
+    const auto& list_column = assert_cast<const ColumnArray&>(lists.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 3);
+    EXPECT_EQ(list_offsets[0], 0);
+    EXPECT_EQ(list_offsets[1], 1);
+    EXPECT_EQ(list_offsets[2], 3);
+    const auto& inner_maps = assert_cast<const ColumnNullable&>(list_column.get_data());
+    ASSERT_EQ(inner_maps.size(), 3);
+    EXPECT_FALSE(inner_maps.is_null_at(0));
+    EXPECT_TRUE(inner_maps.is_null_at(1));
+    EXPECT_FALSE(inner_maps.is_null_at(2));
+}
+
+TEST_F(ParquetColumnReaderTest, SelectDeepMapListMap) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& outer_map = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_map.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 3);
+    EXPECT_EQ(outer_offsets[0], 2);
+    EXPECT_EQ(outer_offsets[1], 4);
+    EXPECT_EQ(outer_offsets[2], 5);
+    const auto& outer_keys = get_nullable_nested_column<ColumnInt32>(outer_map.get_keys());
+    ASSERT_EQ(outer_keys.size(), 5);
+    EXPECT_EQ(outer_keys.get_element(0), 10);
+    EXPECT_EQ(outer_keys.get_element(1), 20);
+    EXPECT_EQ(outer_keys.get_element(2), 30);
+    EXPECT_EQ(outer_keys.get_element(3), 40);
+    EXPECT_EQ(outer_keys.get_element(4), 50);
+
+    const auto& lists = assert_cast<const ColumnNullable&>(outer_map.get_values());
+    ASSERT_EQ(lists.size(), 5);
+    EXPECT_FALSE(lists.is_null_at(0));
+    EXPECT_FALSE(lists.is_null_at(1));
+    EXPECT_TRUE(lists.is_null_at(2));
+    EXPECT_FALSE(lists.is_null_at(3));
+    EXPECT_FALSE(lists.is_null_at(4));
+    const auto& list_column = assert_cast<const ColumnArray&>(lists.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 5);
+    EXPECT_EQ(list_offsets[0], 3);
+    EXPECT_EQ(list_offsets[1], 3);
+    EXPECT_EQ(list_offsets[2], 3);
+    EXPECT_EQ(list_offsets[3], 4);
+    EXPECT_EQ(list_offsets[4], 6);
+}
+
+} // namespace
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp b/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp
new file mode 100644
index 00000000000000..0d0f9a2f8567cc
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp
@@ -0,0 +1,506 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/parquet_leaf_reader.h"
+
+#include <arrow/array/builder_binary.h>
+#include <cctz/time_zone.h>
+#include <gtest/gtest.h>
+#include <parquet/api/schema.h>
+
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+
+namespace doris::format::parquet {
+namespace {
+
+std::shared_ptr<arrow::Array> fixed_binary_array(const std::vector<std::string>& values,
+                                                 int byte_width) {
+    auto type = arrow::fixed_size_binary(byte_width);
+    arrow::FixedSizeBinaryBuilder builder(type, arrow::default_memory_pool());
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(reinterpret_cast<const uint8_t*>(value.data())).ok());
+    }
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder.Finish(&array).ok());
+    return array;
+}
+
+ParquetLeafReader make_leaf_reader(ParquetTypeDescriptor descriptor, DataTypePtr type) {
+    return ParquetLeafReader(nullptr, descriptor, std::move(type), "leaf", nullptr);
+}
+
+struct CapturedDecodedView {
+    DecodedValueKind value_kind = DecodedValueKind::INT32;
+    DecodedTimeUnit time_unit = DecodedTimeUnit::UNKNOWN;
+    int64_t row_count = 0;
+    int decimal_precision = -1;
+    int decimal_scale = -1;
+    int fixed_length = -1;
+    bool timestamp_is_adjusted_to_utc = false;
+    bool enable_strict_mode = false;
+    const cctz::time_zone* timezone = nullptr;
+    bool null_map_is_null = true;
+    std::vector<uint8_t> null_map;
+    std::vector<uint8_t> fixed_values;
+    std::vector<StringRef> binary_values;
+    std::vector<std::string> owned_binary_values;
+};
+
+ParquetLeafReader make_spy_leaf_reader(ParquetTypeDescriptor descriptor, DataTypePtr type,
+                                       CapturedDecodedView* captured,
+                                       const cctz::time_zone* timezone = nullptr,
+                                       bool enable_strict_mode = false) {
+    auto appender = [captured](MutableColumnPtr&, const DecodedColumnView& view) {
+        captured->value_kind = view.value_kind;
+        captured->time_unit = view.time_unit;
+        captured->row_count = view.row_count;
+        captured->decimal_precision = view.decimal_precision;
+        captured->decimal_scale = view.decimal_scale;
+        captured->fixed_length = view.fixed_length;
+        captured->timestamp_is_adjusted_to_utc = view.timestamp_is_adjusted_to_utc;
+        captured->enable_strict_mode = view.enable_strict_mode;
+        captured->timezone = view.timezone;
+        captured->null_map_is_null = view.null_map == nullptr;
+        captured->null_map.clear();
+        if (view.null_map != nullptr) {
+            captured->null_map.assign(view.null_map, view.null_map + view.row_count);
+        }
+        captured->fixed_values.clear();
+        if (view.values != nullptr && view.value_kind == DecodedValueKind::INT64) {
+            captured->fixed_values.assign(view.values, view.values + view.row_count * 8);
+        } else if (view.values != nullptr && view.value_kind == DecodedValueKind::FLOAT) {
+            captured->fixed_values.assign(view.values, view.values + view.row_count * 4);
+        } else if (view.values != nullptr && view.value_kind == DecodedValueKind::INT32) {
+            captured->fixed_values.assign(view.values, view.values + view.row_count * 4);
+        }
+        captured->binary_values.clear();
+        captured->owned_binary_values.clear();
+        if (view.binary_values != nullptr) {
+            captured->owned_binary_values.reserve(view.binary_values->size());
+            for (const auto& value : *view.binary_values) {
+                captured->owned_binary_values.emplace_back(
+                        value.data == nullptr ? std::string()
+                                              : std::string(value.data, value.size));
+            }
+            captured->binary_values.reserve(captured->owned_binary_values.size());
+            for (const auto& value : captured->owned_binary_values) {
+                captured->binary_values.emplace_back(value.data(), value.size());
+            }
+        }
+        return Status::OK();
+    };
+    return ParquetLeafReader(nullptr, descriptor, std::move(type), "leaf", nullptr, {}, timezone,
+                             enable_strict_mode, std::move(appender));
+}
+
+} // namespace
+
+struct ParquetLeafReaderTestAccess {
+    static ParquetLeafBatch make_fixed_batch(const std::vector<int16_t>& def_levels,
+                                             const std::vector<int16_t>& rep_levels,
+                                             const std::vector<int32_t>& values,
+                                             bool read_dense_for_nullable = false) {
+        ParquetLeafBatch batch;
+        batch._value_kind = DecodedValueKind::INT32;
+        batch._consumed_level_count = static_cast<int64_t>(def_levels.size());
+        batch._decoded_level_count = static_cast<int64_t>(def_levels.size());
+        batch._values_written = static_cast<int64_t>(values.size());
+        batch._def_levels = def_levels.data();
+        batch._rep_levels = rep_levels.data();
+        batch._fixed_values = reinterpret_cast<const uint8_t*>(values.data());
+        batch._read_dense_for_nullable = read_dense_for_nullable;
+        return batch;
+    }
+
+    static Status build_nested_batch(const ParquetLeafReader& reader,
+                                     const ParquetLeafBatch& leaf_batch, int64_t records_read,
+                                     int16_t value_slot_definition_level,
+                                     int16_t value_slot_repetition_level,
+                                     ParquetNestedScalarBatch* nested_batch) {
+        return reader.build_nested_batch_from_leaf_batch(leaf_batch, records_read,
+                                                         value_slot_definition_level, nested_batch,
+                                                         value_slot_repetition_level);
+    }
+};
+
+std::shared_ptr<::parquet::ColumnDescriptor> int32_column_descriptor(int16_t max_definition_level,
+                                                                     int16_t max_repetition_level) {
+    auto node = ::parquet::schema::PrimitiveNode::Make("leaf", ::parquet::Repetition::OPTIONAL,
+                                                       ::parquet::Type::INT32);
+    return std::make_shared<::parquet::ColumnDescriptor>(node, max_definition_level,
+                                                         max_repetition_level);
+}
+
+ParquetLeafReader make_nested_leaf_reader(
+        const std::shared_ptr<::parquet::ColumnDescriptor>& descriptor, DataTypePtr type) {
+    ParquetTypeDescriptor type_descriptor;
+    type_descriptor.physical_type = ::parquet::Type::INT32;
+    type_descriptor.doris_type = type;
+    return ParquetLeafReader(descriptor.get(), type_descriptor, std::move(type), "nested_leaf",
+                             nullptr);
+}
+
+TEST(ParquetLeafReaderTest, DenseNullableFixedValuesAreSpacedBeforeSerde) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::INT32;
+    auto type = make_nullable(std::make_shared<DataTypeInt32>());
+    auto reader = make_leaf_reader(descriptor, type);
+
+    const std::vector<int32_t> compact_values = {10, 30, 50};
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::INT32;
+    batch._fixed_values = reinterpret_cast<const uint8_t*>(compact_values.data());
+    batch._values_written = compact_values.size();
+    batch._read_dense_for_nullable = true;
+
+    const NullMap null_map = {0, 1, 0, 1, 0};
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 5, &null_map, column);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& nullable = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable.size(), 5);
+    EXPECT_FALSE(nullable.is_null_at(0));
+    EXPECT_TRUE(nullable.is_null_at(1));
+    EXPECT_FALSE(nullable.is_null_at(2));
+    EXPECT_TRUE(nullable.is_null_at(3));
+    EXPECT_FALSE(nullable.is_null_at(4));
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    EXPECT_EQ(nested.get_element(0), 10);
+    EXPECT_EQ(nested.get_element(2), 30);
+    EXPECT_EQ(nested.get_element(4), 50);
+}
+
+TEST(ParquetLeafReaderTest, DenseNullableFixedValuesRejectCountMismatch) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::INT32;
+    auto type = make_nullable(std::make_shared<DataTypeInt32>());
+    auto reader = make_leaf_reader(descriptor, type);
+
+    const std::vector<int32_t> compact_values = {10, 30};
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::INT32;
+    batch._fixed_values = reinterpret_cast<const uint8_t*>(compact_values.data());
+    batch._values_written = compact_values.size();
+    batch._read_dense_for_nullable = true;
+
+    const NullMap null_map = {0, 1, 0, 1, 0};
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 5, &null_map, column);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid dense nullable parquet values"), std::string::npos);
+}
+
+TEST(ParquetLeafReaderTest, Float16BinaryValuesAreConvertedToFloat) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY;
+    descriptor.extra_type_info = ParquetExtraTypeInfo::FLOAT16;
+    descriptor.fixed_length = 2;
+    auto type = std::make_shared<DataTypeFloat32>();
+    auto reader = make_leaf_reader(descriptor, type);
+
+    auto half = [](uint16_t value) {
+        std::string bytes(sizeof(value), '\0');
+        memcpy(bytes.data(), &value, sizeof(value));
+        return bytes;
+    };
+
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::FIXED_BINARY;
+    batch._binary_chunks = {fixed_binary_array(
+            {half(0x0000), half(0x8000), half(0x3E00), half(0x0001), half(0x7E00)}, 2)};
+    batch._values_written = 5;
+
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 5, nullptr, column);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& floats = assert_cast<const ColumnFloat32&>(*column);
+    ASSERT_EQ(floats.size(), 5);
+    EXPECT_FLOAT_EQ(floats.get_element(0), 0.0F);
+    EXPECT_TRUE(std::signbit(floats.get_element(1)));
+    EXPECT_FLOAT_EQ(floats.get_element(2), 1.5F);
+    EXPECT_NEAR(floats.get_element(3), 5.9604645e-8F, 1e-12F);
+    EXPECT_TRUE(std::isnan(floats.get_element(4)));
+}
+
+TEST(ParquetLeafReaderTest, BinaryDenseNullableValuesAreSpacedWithNullRefs) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::BYTE_ARRAY;
+    auto type = make_nullable(std::make_shared<DataTypeString>());
+    auto reader = make_leaf_reader(descriptor, type);
+
+    arrow::BinaryBuilder builder;
+    ASSERT_TRUE(builder.Append("aa").ok());
+    ASSERT_TRUE(builder.Append("cc").ok());
+    ASSERT_TRUE(builder.Append("ee").ok());
+    std::shared_ptr<arrow::Array> array;
+    ASSERT_TRUE(builder.Finish(&array).ok());
+
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::BINARY;
+    batch._binary_chunks = {array};
+    batch._values_written = 3;
+    batch._read_dense_for_nullable = true;
+
+    const NullMap null_map = {0, 1, 0, 1, 0};
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 5, &null_map, column);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& nullable = assert_cast<const ColumnNullable&>(*column);
+    const auto& strings = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    ASSERT_EQ(nullable.size(), 5);
+    EXPECT_EQ(strings.get_data_at(0).to_string(), "aa");
+    EXPECT_TRUE(nullable.is_null_at(1));
+    EXPECT_EQ(strings.get_data_at(2).to_string(), "cc");
+    EXPECT_TRUE(nullable.is_null_at(3));
+    EXPECT_EQ(strings.get_data_at(4).to_string(), "ee");
+}
+
+TEST(ParquetLeafReaderTest, BinaryDenseNullableRejectsCountMismatch) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::BYTE_ARRAY;
+    auto type = make_nullable(std::make_shared<DataTypeString>());
+    auto reader = make_leaf_reader(descriptor, type);
+
+    arrow::BinaryBuilder builder;
+    ASSERT_TRUE(builder.Append("only_one").ok());
+    std::shared_ptr<arrow::Array> array;
+    ASSERT_TRUE(builder.Finish(&array).ok());
+
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::BINARY;
+    batch._binary_chunks = {array};
+    batch._values_written = 1;
+    batch._read_dense_for_nullable = true;
+
+    const NullMap null_map = {0, 1, 0};
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 3, &null_map, column);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid dense nullable parquet binary values"),
+              std::string::npos);
+}
+
+TEST(ParquetLeafReaderTest, DecodedColumnViewCarriesDescriptorSessionAndNullMapFields) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::INT64;
+    descriptor.time_unit = ParquetTimeUnit::NANOS;
+    descriptor.decimal_precision = 18;
+    descriptor.decimal_scale = 4;
+    descriptor.fixed_length = 12;
+    descriptor.timestamp_is_adjusted_to_utc = true;
+    auto type = make_nullable(std::make_shared<DataTypeInt64>());
+    cctz::time_zone shanghai;
+    ASSERT_TRUE(cctz::load_time_zone("Asia/Shanghai", &shanghai));
+
+    CapturedDecodedView captured;
+    auto reader = make_spy_leaf_reader(descriptor, type, &captured, &shanghai, true);
+    const std::vector<int64_t> values = {100, 200, 300};
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::INT64;
+    batch._fixed_values = reinterpret_cast<const uint8_t*>(values.data());
+    batch._values_written = values.size();
+
+    const NullMap null_map = {0, 1, 0};
+    auto column = type->create_column();
+    ASSERT_TRUE(reader.append_values(batch, 3, &null_map, column).ok());
+    EXPECT_EQ(captured.value_kind, DecodedValueKind::INT64);
+    EXPECT_EQ(captured.time_unit, DecodedTimeUnit::NANOS);
+    EXPECT_EQ(captured.row_count, 3);
+    EXPECT_EQ(captured.decimal_precision, 18);
+    EXPECT_EQ(captured.decimal_scale, 4);
+    EXPECT_EQ(captured.fixed_length, 12);
+    EXPECT_TRUE(captured.timestamp_is_adjusted_to_utc);
+    EXPECT_TRUE(captured.enable_strict_mode);
+    EXPECT_EQ(captured.timezone, &shanghai);
+    EXPECT_FALSE(captured.null_map_is_null);
+    EXPECT_EQ(captured.null_map, std::vector<uint8_t>({0, 1, 0}));
+
+    auto required_column = type->create_column();
+    ASSERT_TRUE(reader.append_values(batch, 3, nullptr, required_column).ok());
+    EXPECT_TRUE(captured.null_map_is_null);
+
+    const NullMap empty_null_map;
+    ASSERT_TRUE(reader.append_values(batch, 3, &empty_null_map, required_column).ok());
+    EXPECT_TRUE(captured.null_map_is_null);
+}
+
+TEST(ParquetLeafReaderTest, DecodedColumnViewCapturesBinaryFixedLengthAndFloat16Override) {
+    ParquetTypeDescriptor binary_descriptor;
+    binary_descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY;
+    binary_descriptor.fixed_length = 4;
+    auto type = std::make_shared<DataTypeString>();
+
+    CapturedDecodedView binary_view;
+    auto binary_reader = make_spy_leaf_reader(binary_descriptor, type, &binary_view);
+    ParquetLeafBatch binary_batch;
+    binary_batch._value_kind = DecodedValueKind::FIXED_BINARY;
+    binary_batch._binary_chunks = {fixed_binary_array({"abcd", "wxyz"}, 4)};
+    binary_batch._values_written = 2;
+    auto binary_column = type->create_column();
+    ASSERT_TRUE(binary_reader.append_values(binary_batch, 2, nullptr, binary_column).ok());
+    EXPECT_EQ(binary_view.value_kind, DecodedValueKind::FIXED_BINARY);
+    EXPECT_EQ(binary_view.fixed_length, 4);
+    ASSERT_EQ(binary_view.owned_binary_values.size(), 2);
+    EXPECT_EQ(binary_view.owned_binary_values[0], "abcd");
+    EXPECT_EQ(binary_view.owned_binary_values[1], "wxyz");
+
+    ParquetTypeDescriptor float16_descriptor;
+    float16_descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY;
+    float16_descriptor.extra_type_info = ParquetExtraTypeInfo::FLOAT16;
+    float16_descriptor.fixed_length = 2;
+    CapturedDecodedView float16_view;
+    auto float16_reader = make_spy_leaf_reader(float16_descriptor,
+                                               std::make_shared<DataTypeFloat32>(), &float16_view);
+    auto half = [](uint16_t value) {
+        std::string bytes(sizeof(value), '\0');
+        memcpy(bytes.data(), &value, sizeof(value));
+        return bytes;
+    };
+    ParquetLeafBatch float16_batch;
+    float16_batch._value_kind = DecodedValueKind::FIXED_BINARY;
+    float16_batch._binary_chunks = {fixed_binary_array({half(0x3E00), half(0x4000)}, 2)};
+    float16_batch._values_written = 2;
+    auto float16_column = std::make_shared<DataTypeFloat32>()->create_column();
+    ASSERT_TRUE(float16_reader.append_values(float16_batch, 2, nullptr, float16_column).ok());
+    EXPECT_EQ(float16_view.value_kind, DecodedValueKind::FLOAT);
+    ASSERT_EQ(float16_view.fixed_values.size(), sizeof(float) * 2);
+    const auto* floats = reinterpret_cast<const float*>(float16_view.fixed_values.data());
+    EXPECT_FLOAT_EQ(floats[0], 1.5F);
+    EXPECT_FLOAT_EQ(floats[1], 2.0F);
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchValueLayoutLevels) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {2, 2, 2};
+    const std::vector<int16_t> rep_levels = {0, 1, 0};
+    const std::vector<int32_t> values = {10, 20, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 2, 2, 1,
+                                                                  &nested_batch);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(nested_batch.records_read, 2);
+    EXPECT_EQ(nested_batch.levels_written, 3);
+    EXPECT_EQ(nested_batch.value_indices, std::vector<int64_t>({0, 1, 2}));
+    const auto& nested_values = assert_cast<const ColumnInt32&>(*nested_batch.values_column);
+    ASSERT_EQ(nested_values.size(), 3);
+    EXPECT_EQ(nested_values.get_element(0), 10);
+    EXPECT_EQ(nested_values.get_element(2), 30);
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchValueLayoutValueSlots) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {2, 1, 2, 0};
+    const std::vector<int16_t> rep_levels = {0, 1, 0, 0};
+    const std::vector<int32_t> values = {10, 777, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 3, 1, 1,
+                                                                  &nested_batch);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(nested_batch.value_indices, std::vector<int64_t>({0, -1, 2, -1}));
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchValueLayoutLeafValues) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {2, 1, 2, 0};
+    const std::vector<int16_t> rep_levels = {0, 1, 0, 0};
+    const std::vector<int32_t> values = {10, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 3, 1, 1,
+                                                                  &nested_batch);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(nested_batch.value_indices, std::vector<int64_t>({0, -1, 1, -1}));
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchValueLayoutPayloadSlots) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {1, 2, 0, 2};
+    const std::vector<int16_t> rep_levels = {0, 0, 0, 0};
+    const std::vector<int32_t> values = {777, 10, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 4, 2, 1,
+                                                                  &nested_batch);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(nested_batch.value_indices, std::vector<int64_t>({-1, 1, -1, 2}));
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchRejectsMismatchedValueLayout) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {2, 0, 2, 0};
+    const std::vector<int16_t> rep_levels = {0, 0, 0, 0};
+    const std::vector<int32_t> values = {10, 20, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    const auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 4, 2, 1,
+                                                                        &nested_batch);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("inconsistent value count"), std::string::npos);
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchRejectsDenseNullable) {
+    auto descriptor = int32_column_descriptor(1, 0);
+    auto reader =
+            make_nested_leaf_reader(descriptor, make_nullable(std::make_shared<DataTypeInt32>()));
+    const std::vector<int16_t> def_levels = {1};
+    const std::vector<int16_t> rep_levels = {0};
+    const std::vector<int32_t> values = {10};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values, true);
+
+    ParquetNestedScalarBatch nested_batch;
+    const auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 1, 0, 0,
+                                                                        &nested_batch);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Dense nullable parquet nested reader is not supported"),
+              std::string::npos);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_reader_control_test.cpp b/be/test/format_v2/parquet/parquet_reader_control_test.cpp
new file mode 100644
index 00000000000000..c7d430350d1b26
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_reader_control_test.cpp
@@ -0,0 +1,1034 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/column/column_array.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "format_v2/column_data.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_statistics.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/parquet/reader/global_rowid_column_reader.h"
+#include "format_v2/parquet/reader/list_column_reader.h"
+#include "format_v2/parquet/reader/map_column_reader.h"
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+#include "format_v2/parquet/reader/row_position_column_reader.h"
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+#include "format_v2/parquet/reader/struct_column_reader.h"
+#include "format_v2/parquet/selection_vector.h"
+#include "storage/utils.h"
+
+namespace doris::format::parquet {
+namespace {
+
+ParquetColumnSchema int64_schema(std::string name = "mock") {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = std::move(name);
+    schema.type = std::make_shared<DataTypeInt64>();
+    return schema;
+}
+
+ParquetColumnSchema nested_int64_schema(std::string name, int16_t nullable_definition_level,
+                                        int16_t definition_level, int16_t repetition_level = 0,
+                                        int16_t repeated_ancestor_definition_level = 0) {
+    ParquetColumnSchema schema = int64_schema(std::move(name));
+    schema.type = make_nullable(std::make_shared<DataTypeInt64>());
+    schema.nullable_definition_level = nullable_definition_level;
+    schema.definition_level = definition_level;
+    schema.repetition_level = repetition_level;
+    schema.repeated_repetition_level = repetition_level;
+    schema.repeated_ancestor_definition_level = repeated_ancestor_definition_level;
+    return schema;
+}
+
+ParquetColumnSchema nested_struct_schema() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "struct";
+    schema.kind = ParquetColumnSchemaKind::STRUCT;
+    schema.nullable_definition_level = 1;
+    schema.definition_level = 2;
+    schema.type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {make_nullable(std::make_shared<DataTypeInt64>()),
+                       make_nullable(std::make_shared<DataTypeInt64>())},
+            Strings {"a", "b"}));
+    return schema;
+}
+
+ParquetColumnSchema nested_list_schema(std::string name, DataTypePtr element_type,
+                                       int16_t nullable_definition_level, int16_t definition_level,
+                                       int16_t repetition_level,
+                                       int16_t repeated_ancestor_definition_level) {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = std::move(name);
+    schema.kind = ParquetColumnSchemaKind::LIST;
+    schema.nullable_definition_level = nullable_definition_level;
+    schema.definition_level = definition_level;
+    schema.repetition_level = repetition_level;
+    schema.repeated_repetition_level = repetition_level;
+    schema.repeated_ancestor_definition_level = repeated_ancestor_definition_level;
+    schema.type = make_nullable(std::make_shared<DataTypeArray>(std::move(element_type)));
+    return schema;
+}
+
+ParquetColumnSchema nested_map_schema(
+        DataTypePtr value_type = make_nullable(std::make_shared<DataTypeInt64>())) {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "map";
+    schema.kind = ParquetColumnSchemaKind::MAP;
+    schema.nullable_definition_level = 1;
+    schema.definition_level = 2;
+    schema.repetition_level = 1;
+    schema.repeated_ancestor_definition_level = 2;
+    schema.type = make_nullable(std::make_shared<DataTypeMap>(
+            make_nullable(std::make_shared<DataTypeInt64>()), std::move(value_type)));
+    return schema;
+}
+
+ParquetColumnSchema bare_repeated_int64_list_schema() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "repeated";
+    schema.kind = ParquetColumnSchemaKind::LIST;
+    schema.definition_level = 1;
+    schema.repetition_level = 1;
+    schema.repeated_repetition_level = 1;
+    schema.repeated_ancestor_definition_level = 1;
+    schema.type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt64>());
+    return schema;
+}
+
+std::unique_ptr<ParquetColumnSchema> primitive_child(int local_id, std::string name,
+                                                     DataTypePtr type) {
+    auto child = std::make_unique<ParquetColumnSchema>();
+    child->local_id = local_id;
+    child->name = std::move(name);
+    child->kind = ParquetColumnSchemaKind::PRIMITIVE;
+    child->leaf_column_id = local_id;
+    child->type = std::move(type);
+    child->type_descriptor.physical_type = ::parquet::Type::INT32;
+    child->type_descriptor.doris_type = child->type;
+    return child;
+}
+
+ParquetColumnSchema struct_schema_for_projection() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "s";
+    schema.kind = ParquetColumnSchemaKind::STRUCT;
+    schema.children.push_back(primitive_child(0, "a", std::make_shared<DataTypeInt32>()));
+    schema.children.push_back(primitive_child(1, "b", std::make_shared<DataTypeInt32>()));
+    DataTypes types = {make_nullable(schema.children[0]->type),
+                       make_nullable(schema.children[1]->type)};
+    Strings names = {"a", "b"};
+    schema.type = std::make_shared<DataTypeStruct>(types, names);
+    return schema;
+}
+
+ParquetColumnSchema list_schema_for_projection() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "xs";
+    schema.kind = ParquetColumnSchemaKind::LIST;
+    schema.children.push_back(primitive_child(0, "element", std::make_shared<DataTypeInt32>()));
+    schema.type = std::make_shared<DataTypeArray>(schema.children[0]->type);
+    return schema;
+}
+
+ParquetColumnSchema map_schema_for_projection() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "m";
+    schema.kind = ParquetColumnSchemaKind::MAP;
+    schema.children.push_back(primitive_child(0, "key", std::make_shared<DataTypeString>()));
+    schema.children.push_back(primitive_child(1, "value", std::make_shared<DataTypeInt32>()));
+    schema.type = std::make_shared<DataTypeMap>(make_nullable(schema.children[0]->type),
+                                                make_nullable(schema.children[1]->type));
+    return schema;
+}
+
+class CursorColumnReader final : public ParquetColumnReader {
+public:
+    CursorColumnReader() : ParquetColumnReader(int64_schema(), std::make_shared<DataTypeInt64>()) {}
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override {
+        if (column.get() == nullptr || rows_read == nullptr) {
+            return Status::InvalidArgument("invalid mock read arguments");
+        }
+        auto* values = assert_cast<ColumnInt64*>(column.get());
+        for (int64_t row = 0; row < rows; ++row) {
+            values->insert_value(_cursor + row);
+        }
+        _read_lengths.push_back(rows);
+        _cursor += rows;
+        *rows_read = rows;
+        return Status::OK();
+    }
+
+    Status skip(int64_t rows) override {
+        _skip_lengths.push_back(rows);
+        _cursor += rows;
+        return Status::OK();
+    }
+
+    int64_t cursor() const { return _cursor; }
+    const std::vector<int64_t>& skip_lengths() const { return _skip_lengths; }
+    const std::vector<int64_t>& read_lengths() const { return _read_lengths; }
+
+private:
+    int64_t _cursor = 0;
+    std::vector<int64_t> _skip_lengths;
+    std::vector<int64_t> _read_lengths;
+};
+
+class NestedBuildReader final : public ParquetColumnReader {
+public:
+    explicit NestedBuildReader(int64_t values_to_build)
+            : ParquetColumnReader(int64_schema("nested"), std::make_shared<DataTypeInt64>()),
+              _values_to_build(values_to_build) {}
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override {
+        return Status::NotSupported("unused");
+    }
+
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override {
+        if (column.get() == nullptr || values_read == nullptr) {
+            return Status::InvalidArgument("invalid mock nested build arguments");
+        }
+        _last_length_upper_bound = length_upper_bound;
+        auto* values = assert_cast<ColumnInt64*>(column.get());
+        for (int64_t value = 0; value < _values_to_build; ++value) {
+            values->insert_value(value);
+        }
+        *values_read = _values_to_build;
+        return Status::OK();
+    }
+
+    int64_t last_length_upper_bound() const { return _last_length_upper_bound; }
+
+private:
+    int64_t _values_to_build = 0;
+    int64_t _last_length_upper_bound = 0;
+};
+
+class ScriptedNestedReader final : public ParquetColumnReader {
+public:
+    ScriptedNestedReader(ParquetColumnSchema schema, DataTypePtr type,
+                         std::vector<int16_t> def_levels, std::vector<int16_t> rep_levels,
+                         bool has_repeated_child = false, bool build_nulls = false)
+            : ParquetColumnReader(schema, std::move(type)),
+              _def_levels(std::move(def_levels)),
+              _rep_levels(std::move(rep_levels)),
+              _has_repeated_child(has_repeated_child),
+              _build_nulls(build_nulls) {}
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override {
+        return Status::NotSupported("unused");
+    }
+
+    Status load_nested_batch(int64_t rows) override {
+        _load_lengths.push_back(rows);
+        return Status::OK();
+    }
+
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override {
+        _build_lengths.push_back(length_upper_bound);
+        if (column.get() == nullptr || values_read == nullptr) {
+            return Status::InvalidArgument("invalid scripted nested build arguments");
+        }
+        for (int64_t row = 0; row < length_upper_bound; ++row) {
+            insert_value(column, _next_value++, _build_nulls);
+        }
+        *values_read = length_upper_bound;
+        return Status::OK();
+    }
+
+    const std::vector<int16_t>& nested_definition_levels() const override { return _def_levels; }
+    const std::vector<int16_t>& nested_repetition_levels() const override { return _rep_levels; }
+    int64_t nested_levels_written() const override {
+        return static_cast<int64_t>(_def_levels.size());
+    }
+    bool is_or_has_repeated_child() const override { return _has_repeated_child; }
+
+    const std::vector<int64_t>& build_lengths() const { return _build_lengths; }
+
+private:
+    static void insert_value(MutableColumnPtr& column, int64_t value, bool is_null) {
+        if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column);
+            nullable_column != nullptr) {
+            if (is_null) {
+                nullable_column->insert_default();
+                return;
+            }
+            assert_cast<ColumnInt64&>(nullable_column->get_nested_column()).insert_value(value);
+            nullable_column->get_null_map_data().push_back(0);
+            return;
+        }
+        assert_cast<ColumnInt64&>(*column).insert_value(value);
+    }
+
+    std::vector<int16_t> _def_levels;
+    std::vector<int16_t> _rep_levels;
+    bool _has_repeated_child = false;
+    bool _build_nulls = false;
+    int64_t _next_value = 0;
+    std::vector<int64_t> _load_lengths;
+    std::vector<int64_t> _build_lengths;
+};
+
+} // namespace
+
+struct ScalarColumnReaderTestAccess {
+    static void set_nested_batch(ScalarColumnReader* reader,
+                                 std::unique_ptr<ParquetNestedScalarBatch> batch) {
+        reader->_nested_batch = std::move(batch);
+    }
+
+    static int64_t page_filtered_rows_to_skip(const ScalarColumnReader& reader, int64_t rows) {
+        return reader.page_filtered_rows_to_skip(rows);
+    }
+
+    static void set_row_group_rows_read(ScalarColumnReader* reader, int64_t rows) {
+        reader->_row_group_rows_read = rows;
+    }
+};
+
+namespace {
+
+std::unique_ptr<ScalarColumnReader> make_scripted_scalar_reader(
+        ParquetColumnSchema schema, std::unique_ptr<ParquetNestedScalarBatch> batch) {
+    auto reader = std::make_unique<ScalarColumnReader>(schema, nullptr);
+    ScalarColumnReaderTestAccess::set_nested_batch(reader.get(), std::move(batch));
+    return reader;
+}
+
+std::unique_ptr<ParquetNestedScalarBatch> scalar_batch(std::vector<int16_t> def_levels,
+                                                       std::vector<int16_t> rep_levels,
+                                                       std::vector<int64_t> value_indices,
+                                                       std::vector<int64_t> values) {
+    auto batch = std::make_unique<ParquetNestedScalarBatch>();
+    batch->levels_written = static_cast<int64_t>(def_levels.size());
+    batch->def_levels = std::move(def_levels);
+    batch->rep_levels = std::move(rep_levels);
+    batch->value_indices = std::move(value_indices);
+    auto column = ColumnInt64::create();
+    for (const auto value : values) {
+        column->insert_value(value);
+    }
+    batch->values_column = std::move(column);
+    return batch;
+}
+
+class DefaultOnlyReader final : public ParquetColumnReader {
+public:
+    DefaultOnlyReader()
+            : ParquetColumnReader(int64_schema("default_only"), std::make_shared<DataTypeInt64>()) {
+    }
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override {
+        return Status::NotSupported("unused");
+    }
+};
+
+GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) {
+    const auto ref = column.get_data_at(row);
+    EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2));
+    GlobalRowLoacationV2 location(0, 0, 0, 0);
+    std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2));
+    return location;
+}
+
+} // namespace
+
+TEST(SelectionVectorTest, IdentitySelectionToRanges) {
+    SelectionVector selection;
+    const auto ranges = selection_to_ranges(selection, 5);
+    ASSERT_EQ(ranges.size(), 1);
+    EXPECT_EQ(ranges[0].start, 0);
+    EXPECT_EQ(ranges[0].length, 5);
+    EXPECT_TRUE(selection.verify(5, 5).ok());
+}
+
+TEST(SelectionVectorTest, ExternalBufferSelectionToRanges) {
+    SelectionVector::Index indices[] = {0, 1, 4, 6, 7};
+    SelectionVector selection(indices, std::size(indices));
+    const auto ranges = selection_to_ranges(selection, std::size(indices));
+    ASSERT_EQ(ranges.size(), 3);
+    EXPECT_EQ(ranges[0].start, 0);
+    EXPECT_EQ(ranges[0].length, 2);
+    EXPECT_EQ(ranges[1].start, 4);
+    EXPECT_EQ(ranges[1].length, 1);
+    EXPECT_EQ(ranges[2].start, 6);
+    EXPECT_EQ(ranges[2].length, 2);
+    EXPECT_TRUE(selection.verify(std::size(indices), 8).ok());
+}
+
+TEST(SelectionVectorTest, VerifyRejectsInvalidSelection) {
+    SelectionVector selection(2);
+    EXPECT_FALSE(selection.verify(3, 3).ok());
+    EXPECT_FALSE(selection.verify(1, -1).ok());
+
+    selection.set_index(0, 2);
+    selection.set_index(1, 1);
+    EXPECT_FALSE(selection.verify(2, 3).ok());
+
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    EXPECT_FALSE(selection.verify(2, 3).ok());
+}
+
+TEST(ParquetColumnReaderControlTest, BaseSelectUsesSkipReadRanges) {
+    CursorColumnReader reader;
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 2);
+    selection.set_index(2, 4);
+
+    auto column = std::make_shared<DataTypeInt64>()->create_column();
+    ASSERT_TRUE(reader.select(selection, 3, 6, column).ok());
+
+    const auto& values = assert_cast<const ColumnInt64&>(*column);
+    ASSERT_EQ(values.size(), 3);
+    EXPECT_EQ(values.get_element(0), 0);
+    EXPECT_EQ(values.get_element(1), 2);
+    EXPECT_EQ(values.get_element(2), 4);
+    EXPECT_EQ(reader.cursor(), 6);
+    EXPECT_EQ(reader.read_lengths(), std::vector<int64_t>({1, 1, 1}));
+    EXPECT_EQ(reader.skip_lengths(), std::vector<int64_t>({0, 1, 1, 1}));
+}
+
+TEST(ParquetColumnReaderControlTest, BaseSelectZeroRowsConsumesBatch) {
+    CursorColumnReader reader;
+    SelectionVector selection;
+    auto column = std::make_shared<DataTypeInt64>()->create_column();
+    ASSERT_TRUE(reader.select(selection, 0, 4, column).ok());
+    EXPECT_EQ(column->size(), 0);
+    EXPECT_EQ(reader.cursor(), 4);
+    EXPECT_TRUE(reader.read_lengths().empty());
+    EXPECT_EQ(reader.skip_lengths(), std::vector<int64_t>({4}));
+}
+
+TEST(ParquetColumnReaderControlTest, BaseNestedDefaultsAndSkipNested) {
+    DefaultOnlyReader base_reader;
+    EXPECT_FALSE(base_reader.skip(1).ok());
+    EXPECT_FALSE(base_reader.load_nested_batch(1).ok());
+
+    auto column = std::make_shared<DataTypeInt64>()->create_column();
+    int64_t values_read = 0;
+    EXPECT_FALSE(base_reader.build_nested_column(1, column, &values_read).ok());
+
+    NestedBuildReader ok_reader(3);
+    ASSERT_TRUE(ok_reader.skip_nested_column(3).ok());
+    EXPECT_EQ(ok_reader.last_length_upper_bound(), 3);
+
+    NestedBuildReader short_reader(2);
+    EXPECT_FALSE(short_reader.skip_nested_column(3).ok());
+}
+
+TEST(ParquetColumnReaderControlTest, NestedMaterializerHelpersAppendOffsetsAndParentNulls) {
+    ColumnArray::Offsets64 offsets;
+    append_offsets(offsets, {3, 0, 2});
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 3);
+    EXPECT_EQ(offsets[1], 3);
+    EXPECT_EQ(offsets[2], 5);
+    append_offsets(offsets, {1, 4});
+    ASSERT_EQ(offsets.size(), 5);
+    EXPECT_EQ(offsets[3], 6);
+    EXPECT_EQ(offsets[4], 10);
+
+    const NullMap parent_nulls = {0, 1, 0};
+    append_parent_nulls(nullptr, parent_nulls);
+    NullMap dst = {1};
+    append_parent_nulls(&dst, parent_nulls);
+    EXPECT_EQ(dst, NullMap({1, 0, 1, 0}));
+}
+
+TEST(ParquetColumnReaderControlTest, PageFilteredRowsToSkipUsesOnlyFullSkippedRanges) {
+    ParquetPageSkipPlan page_skip_plan;
+    page_skip_plan.skipped_ranges = {RowRange {0, 3}, RowRange {5, 2}, RowRange {10, 4}};
+
+    auto schema = nested_int64_schema("page_filtered", 0, 0);
+    ScalarColumnReader reader(schema, nullptr, &page_skip_plan);
+    EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 3), 3);
+    EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 5), 3);
+
+    ScalarColumnReaderTestAccess::set_row_group_rows_read(&reader, 5);
+    EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 2), 2);
+    EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 5), 2);
+}
+
+TEST(ParquetColumnReaderControlTest, StructSkipsNullParentForRepeatedChildAndBatchesPresentRows) {
+    auto repeated_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("repeated_shape", 1, 2, 1),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {2, 2, 2, 2},
+            std::vector<int16_t> {0, 0, 0, 0}, true);
+    auto* repeated_child_ptr = repeated_child.get();
+    auto scalar_child = make_scripted_scalar_reader(
+            nested_int64_schema("scalar_child", 1, 2),
+            scalar_batch({2, 0, 2, 2}, {0, 0, 0, 0}, {0, -1, 1, 2}, {10, 20, 30}));
+    auto* scalar_child_ptr = scalar_child.get();
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> children;
+    children.push_back(std::move(repeated_child));
+    children.push_back(std::move(scalar_child));
+    StructColumnReader reader(nested_struct_schema(),
+                              make_nullable(std::make_shared<DataTypeStruct>(
+                                      DataTypes {make_nullable(std::make_shared<DataTypeInt64>()),
+                                                 make_nullable(std::make_shared<DataTypeInt64>())},
+                                      Strings {"a", "b"})),
+                              std::move(children), {0, 1});
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(4, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 4);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 4);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_EQ(repeated_child_ptr->build_lengths(), std::vector<int64_t>({1, 2}));
+    EXPECT_EQ(scalar_child_ptr->nested_build_level_cursor(), 4);
+}
+
+TEST(ParquetColumnReaderControlTest, StructFallsBackToFirstChildWhenAllChildrenAreRepeated) {
+    auto first_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("first", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 0}, std::vector<int16_t> {0, 0}, true);
+    auto second_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("second", 1, 2, 1),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {2, 2},
+            std::vector<int16_t> {0, 0}, true);
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> children;
+    children.push_back(std::move(first_child));
+    children.push_back(std::move(second_child));
+    StructColumnReader reader(nested_struct_schema(), nested_struct_schema().type,
+                              std::move(children), {0, 1});
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(2, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(rows_read, 2);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+}
+
+TEST(ParquetColumnReaderControlTest, StructNullParentAdvancesComplexChildShapeOnly) {
+    auto shape_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("shape", 1, 2), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 2, 0, 0, 2, 2}, std::vector<int16_t> {0, 0, 0, 0, 0, 0});
+
+    ParquetColumnSchema map_schema = nested_map_schema();
+    map_schema.nullable_definition_level = 2;
+    map_schema.definition_level = 3;
+    map_schema.repeated_ancestor_definition_level = 0;
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 3, 3, 1, 0),
+            make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {3, 3, 0, 0, 3, 3}, std::vector<int16_t> {0, 0, 0, 0, 0, 0});
+    auto value_reader =
+            make_scripted_scalar_reader(nested_int64_schema("value", 4, 4, 1, 0),
+                                        scalar_batch({4, 4, 0, 0, 4, 4}, {0, 0, 0, 0, 0, 0},
+                                                     {0, 1, -1, -1, 2, 3}, {10, 20, 30, 40}));
+    auto map_reader = std::make_unique<MapColumnReader>(
+            map_schema, map_schema.type, std::move(key_reader), std::move(value_reader));
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> children;
+    children.push_back(std::move(shape_child));
+    children.push_back(std::move(map_reader));
+    auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(DataTypes {map_schema.type},
+                                                                      Strings {"partitionValues"}));
+    StructColumnReader reader(nested_struct_schema(), struct_type, std::move(children), {-1, 0});
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(6, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 6);
+
+    const auto& nullable_struct = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_struct.size(), 6);
+    EXPECT_FALSE(nullable_struct.is_null_at(0));
+    EXPECT_FALSE(nullable_struct.is_null_at(1));
+    EXPECT_TRUE(nullable_struct.is_null_at(2));
+    EXPECT_TRUE(nullable_struct.is_null_at(3));
+    EXPECT_FALSE(nullable_struct.is_null_at(4));
+    EXPECT_FALSE(nullable_struct.is_null_at(5));
+
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_struct.get_nested_column());
+    const auto& map_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(map_nullable.size(), 6);
+    EXPECT_FALSE(map_nullable.is_null_at(0));
+    EXPECT_FALSE(map_nullable.is_null_at(1));
+    EXPECT_TRUE(map_nullable.is_null_at(2));
+    EXPECT_TRUE(map_nullable.is_null_at(3));
+    EXPECT_FALSE(map_nullable.is_null_at(4));
+    EXPECT_FALSE(map_nullable.is_null_at(5));
+    const auto& map_column = assert_cast<const ColumnMap&>(map_nullable.get_nested_column());
+    ASSERT_EQ(map_column.get_offsets().size(), 6);
+    EXPECT_EQ(map_column.get_offsets()[0], 1);
+    EXPECT_EQ(map_column.get_offsets()[1], 2);
+    EXPECT_EQ(map_column.get_offsets()[2], 2);
+    EXPECT_EQ(map_column.get_offsets()[3], 2);
+    EXPECT_EQ(map_column.get_offsets()[4], 3);
+    EXPECT_EQ(map_column.get_offsets()[5], 4);
+}
+
+TEST(ParquetColumnReaderControlTest, StructNullParentAdvancesNestedStructDescendants) {
+    auto shape_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("shape", 1, 2), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 0, 2}, std::vector<int16_t> {0, 0, 0});
+
+    auto id_batch = scalar_batch({4, 3, 4}, {0, 0, 0}, {0, -1, 1}, {10, 20});
+    id_batch->value_slot_definition_level = 3;
+    auto id_reader =
+            make_scripted_scalar_reader(nested_int64_schema("id", 3, 4), std::move(id_batch));
+
+    ParquetColumnSchema inner_schema;
+    inner_schema.local_id = 0;
+    inner_schema.name = "stats_parsed";
+    inner_schema.kind = ParquetColumnSchemaKind::STRUCT;
+    inner_schema.nullable_definition_level = 2;
+    inner_schema.definition_level = 3;
+    inner_schema.type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {make_nullable(std::make_shared<DataTypeInt64>())}, Strings {"id"}));
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> inner_children;
+    inner_children.push_back(std::move(id_reader));
+    auto inner_reader = std::make_unique<StructColumnReader>(
+            inner_schema, inner_schema.type, std::move(inner_children), std::vector<int> {0});
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> outer_children;
+    outer_children.push_back(std::move(shape_child));
+    outer_children.push_back(std::move(inner_reader));
+    auto outer_type = make_nullable(std::make_shared<DataTypeStruct>(DataTypes {inner_schema.type},
+                                                                     Strings {"stats_parsed"}));
+    StructColumnReader reader(nested_struct_schema(), outer_type, std::move(outer_children),
+                              {-1, 0});
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(3, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& outer_nullable = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(outer_nullable.size(), 3);
+    EXPECT_FALSE(outer_nullable.is_null_at(0));
+    EXPECT_TRUE(outer_nullable.is_null_at(1));
+    EXPECT_FALSE(outer_nullable.is_null_at(2));
+
+    const auto& outer_struct = assert_cast<const ColumnStruct&>(outer_nullable.get_nested_column());
+    const auto& inner_nullable = assert_cast<const ColumnNullable&>(outer_struct.get_column(0));
+    ASSERT_EQ(inner_nullable.size(), 3);
+    EXPECT_FALSE(inner_nullable.is_null_at(0));
+    EXPECT_TRUE(inner_nullable.is_null_at(1));
+    EXPECT_FALSE(inner_nullable.is_null_at(2));
+
+    const auto& inner_struct = assert_cast<const ColumnStruct&>(inner_nullable.get_nested_column());
+    const auto& id_nullable = assert_cast<const ColumnNullable&>(inner_struct.get_column(0));
+    const auto& id_values = assert_cast<const ColumnInt64&>(id_nullable.get_nested_column());
+    EXPECT_EQ(id_values.get_element(0), 10);
+    EXPECT_EQ(id_values.get_element(2), 20);
+}
+
+TEST(ParquetColumnReaderControlTest, ListKeepsEmptyBareRepeatedPrimitiveRows) {
+    auto element_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("element", 0, 1, 1, 1), std::make_shared<DataTypeInt64>(),
+            std::vector<int16_t> {0, 1, 1, 0}, std::vector<int16_t> {0, 0, 1, 0});
+    auto* element_reader_ptr = element_reader.get();
+    ListColumnReader reader(bare_repeated_int64_list_schema(),
+                            bare_repeated_int64_list_schema().type, std::move(element_reader));
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(3, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& array_column = assert_cast<const ColumnArray&>(*column);
+    ASSERT_EQ(array_column.get_offsets().size(), 3);
+    EXPECT_EQ(array_column.get_offsets()[0], 0);
+    EXPECT_EQ(array_column.get_offsets()[1], 2);
+    EXPECT_EQ(array_column.get_offsets()[2], 2);
+    EXPECT_EQ(element_reader_ptr->build_lengths(), std::vector<int64_t>({2}));
+}
+
+TEST(ParquetColumnReaderControlTest, NestedListSkipsAncestorEmptyRowsButKeepsNullElements) {
+    auto element_reader =
+            std::make_unique<ScriptedNestedReader>(nested_int64_schema("element", 5, 5, 2, 4),
+                                                   make_nullable(std::make_shared<DataTypeInt64>()),
+                                                   std::vector<int16_t> {1, 5, 5, 5, 2, 5, 2, 0},
+                                                   std::vector<int16_t> {0, 0, 2, 1, 0, 1, 1, 0});
+    auto* element_reader_ptr = element_reader.get();
+
+    const auto inner_type = make_nullable(
+            std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeInt64>())));
+    auto inner_reader = std::make_unique<ListColumnReader>(
+            nested_list_schema("inner", make_nullable(std::make_shared<DataTypeInt64>()), 3, 4, 2,
+                               2),
+            inner_type, std::move(element_reader));
+    auto outer_type = make_nullable(std::make_shared<DataTypeArray>(inner_type));
+    ListColumnReader reader(nested_list_schema("outer", inner_type, 1, 2, 1, 2), outer_type,
+                            std::move(inner_reader));
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(4, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 4);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 4);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_TRUE(nullable_column.is_null_at(3));
+
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 4);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 2);
+    EXPECT_EQ(outer_offsets[2], 5);
+    EXPECT_EQ(outer_offsets[3], 5);
+
+    const auto& inner_nullable = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(inner_nullable.size(), 5);
+    EXPECT_FALSE(inner_nullable.is_null_at(0));
+    EXPECT_FALSE(inner_nullable.is_null_at(1));
+    EXPECT_TRUE(inner_nullable.is_null_at(2));
+    EXPECT_FALSE(inner_nullable.is_null_at(3));
+    EXPECT_TRUE(inner_nullable.is_null_at(4));
+
+    const auto& inner_array = assert_cast<const ColumnArray&>(inner_nullable.get_nested_column());
+    const auto& inner_offsets = inner_array.get_offsets();
+    ASSERT_EQ(inner_offsets.size(), 5);
+    EXPECT_EQ(inner_offsets[0], 2);
+    EXPECT_EQ(inner_offsets[1], 3);
+    EXPECT_EQ(inner_offsets[2], 3);
+    EXPECT_EQ(inner_offsets[3], 4);
+    EXPECT_EQ(inner_offsets[4], 4);
+    EXPECT_EQ(element_reader_ptr->build_lengths(), std::vector<int64_t>({4}));
+}
+
+TEST(ParquetColumnReaderControlTest, MapKeepsEmptyMapRows) {
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1, 2),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {1},
+            std::vector<int16_t> {0});
+    auto value_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("value", 2, 3, 1, 2),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {1},
+            std::vector<int16_t> {0});
+    auto* value_reader_ptr = value_reader.get();
+    MapColumnReader reader(nested_map_schema(), nested_map_schema().type, std::move(key_reader),
+                           std::move(value_reader));
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(1, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 1);
+
+    const auto& nullable_map = assert_cast<const ColumnNullable&>(*column);
+    EXPECT_FALSE(nullable_map.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_map.get_nested_column());
+    ASSERT_EQ(map_column.get_offsets().size(), 1);
+    EXPECT_EQ(map_column.get_offsets()[0], 0);
+    EXPECT_EQ(value_reader_ptr->build_lengths(), std::vector<int64_t>({0}));
+}
+
+TEST(ParquetColumnReaderControlTest, ListMapSkipsAncestorEmptyRowsBeforeScalarValues) {
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 4, 4, 2, 4),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {1, 4},
+            std::vector<int16_t> {0, 0});
+    auto value_reader = make_scripted_scalar_reader(nested_int64_schema("value", 5, 5, 2, 4),
+                                                    scalar_batch({1, 5}, {0, 0}, {-1, 0}, {100}));
+
+    const auto map_type = make_nullable(
+            std::make_shared<DataTypeMap>(make_nullable(std::make_shared<DataTypeInt64>()),
+                                          make_nullable(std::make_shared<DataTypeInt64>())));
+    auto map_reader = std::make_unique<MapColumnReader>(
+            nested_map_schema(make_nullable(std::make_shared<DataTypeInt64>())), map_type,
+            std::move(key_reader), std::move(value_reader));
+    auto outer_type = make_nullable(std::make_shared<DataTypeArray>(map_type));
+    ListColumnReader reader(nested_list_schema("outer", map_type, 1, 2, 1, 2), outer_type,
+                            std::move(map_reader));
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(2, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 2);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 2);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 2);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 1);
+
+    const auto& map_nullable = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(map_nullable.size(), 1);
+    EXPECT_FALSE(map_nullable.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(map_nullable.get_nested_column());
+    ASSERT_EQ(map_column.get_offsets().size(), 1);
+    EXPECT_EQ(map_column.get_offsets()[0], 1);
+
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    const auto& value_data = assert_cast<const ColumnInt64&>(values.get_nested_column());
+    ASSERT_EQ(values.size(), 1);
+    EXPECT_FALSE(values.is_null_at(0));
+    EXPECT_EQ(value_data.get_element(0), 100);
+}
+
+TEST(ParquetColumnReaderControlTest, MapRejectsNullKeysAndMisalignedScalarValueRepLevels) {
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2}, std::vector<int16_t> {0}, false, true);
+    auto value_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("value", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2}, std::vector<int16_t> {0});
+    MapColumnReader null_key_reader(nested_map_schema(), nested_map_schema().type,
+                                    std::move(key_reader), std::move(value_reader));
+    auto column = null_key_reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = null_key_reader.build_nested_column(1, column, &rows_read);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains null key"), std::string::npos);
+
+    auto aligned_key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 2}, std::vector<int16_t> {0, 1});
+    auto misaligned_value_reader =
+            make_scripted_scalar_reader(nested_int64_schema("value", 2, 3, 1),
+                                        scalar_batch({3, 3}, {0, 0}, {0, 1}, {100, 200}));
+    MapColumnReader misaligned_reader(nested_map_schema(), nested_map_schema().type,
+                                      std::move(aligned_key_reader),
+                                      std::move(misaligned_value_reader));
+    column = misaligned_reader.type()->create_column();
+    status = misaligned_reader.build_nested_column(1, column, &rows_read);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("value repetition level is not aligned"), std::string::npos);
+}
+
+TEST(ParquetColumnReaderControlTest, MapBuildsScalarAndComplexValuePaths) {
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 2}, std::vector<int16_t> {0, 1});
+    auto scalar_value_reader =
+            make_scripted_scalar_reader(nested_int64_schema("value", 2, 3, 1),
+                                        scalar_batch({3, 3}, {0, 1}, {0, 1}, {100, 200}));
+    MapColumnReader scalar_reader(nested_map_schema(), nested_map_schema().type,
+                                  std::move(key_reader), std::move(scalar_value_reader));
+    auto column = scalar_reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = scalar_reader.build_nested_column(1, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    const auto& nullable_map = assert_cast<const ColumnNullable&>(*column);
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_map.get_nested_column());
+    ASSERT_EQ(map_column.get_offsets().size(), 1);
+    EXPECT_EQ(map_column.get_offsets()[0], 2);
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    const auto& value_data = assert_cast<const ColumnInt64&>(values.get_nested_column());
+    ASSERT_EQ(values.size(), 2);
+    EXPECT_EQ(value_data.get_element(0), 100);
+    EXPECT_EQ(value_data.get_element(1), 200);
+
+    auto complex_key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 2}, std::vector<int16_t> {0, 1});
+    auto complex_value_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("complex_value", 2, 3, 1),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {3, 3},
+            std::vector<int16_t> {0, 1});
+    auto* complex_value_reader_ptr = complex_value_reader.get();
+    MapColumnReader complex_reader(nested_map_schema(), nested_map_schema().type,
+                                   std::move(complex_key_reader), std::move(complex_value_reader));
+    column = complex_reader.type()->create_column();
+    status = complex_reader.build_nested_column(1, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(complex_value_reader_ptr->build_lengths(), std::vector<int64_t>({2}));
+}
+
+TEST(ParquetVirtualColumnReaderTest, RowPositionReadSkipAndInvalidArgs) {
+    RowPositionColumnReader reader(100);
+    EXPECT_EQ(reader.file_column_id(), format::ROW_POSITION_COLUMN_ID);
+    EXPECT_EQ(reader.parquet_leaf_column_id(), -1);
+    EXPECT_EQ(reader.name(), format::ROW_POSITION_COLUMN_NAME);
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader.read(2, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 2);
+    ASSERT_TRUE(reader.skip(3).ok());
+    ASSERT_TRUE(reader.read(2, column, &rows_read).ok());
+
+    const auto& values = assert_cast<const ColumnInt64&>(*column);
+    ASSERT_EQ(values.size(), 4);
+    EXPECT_EQ(values.get_element(0), 100);
+    EXPECT_EQ(values.get_element(1), 101);
+    EXPECT_EQ(values.get_element(2), 105);
+    EXPECT_EQ(values.get_element(3), 106);
+
+    MutableColumnPtr null_column;
+    EXPECT_FALSE(reader.read(1, null_column, &rows_read).ok());
+    EXPECT_FALSE(reader.read(-1, column, &rows_read).ok());
+    EXPECT_FALSE(reader.read(1, column, nullptr).ok());
+}
+
+TEST(ParquetVirtualColumnReaderTest, GlobalRowIdReadSkipSelectAndInvalidArgs) {
+    format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42};
+    GlobalRowIdColumnReader reader(context, 10);
+    EXPECT_EQ(reader.file_column_id(), format::GLOBAL_ROWID_COLUMN_ID);
+    EXPECT_EQ(reader.parquet_leaf_column_id(), -1);
+    EXPECT_EQ(reader.name(), BeConsts::GLOBAL_ROWID_COL);
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader.read(2, column, &rows_read).ok());
+    ASSERT_TRUE(reader.skip(2).ok());
+    ASSERT_TRUE(reader.read(1, column, &rows_read).ok());
+
+    const auto& strings = assert_cast<const ColumnString&>(*column);
+    ASSERT_EQ(strings.size(), 3);
+    const auto first = decode_rowid(strings, 0);
+    EXPECT_EQ(first.version, context.version);
+    EXPECT_EQ(first.backend_id, context.backend_id);
+    EXPECT_EQ(first.file_id, context.file_id);
+    EXPECT_EQ(first.row_id, 10);
+    EXPECT_EQ(decode_rowid(strings, 1).row_id, 11);
+    EXPECT_EQ(decode_rowid(strings, 2).row_id, 14);
+
+    GlobalRowIdColumnReader select_reader(context, 20);
+    SelectionVector selection(2);
+    selection.set_index(0, 1);
+    selection.set_index(1, 3);
+    auto selected_column = select_reader.type()->create_column();
+    ASSERT_TRUE(select_reader.select(selection, 2, 5, selected_column).ok());
+    const auto& selected_strings = assert_cast<const ColumnString&>(*selected_column);
+    ASSERT_EQ(selected_strings.size(), 2);
+    EXPECT_EQ(decode_rowid(selected_strings, 0).row_id, 21);
+    EXPECT_EQ(decode_rowid(selected_strings, 1).row_id, 23);
+
+    MutableColumnPtr null_column;
+    EXPECT_FALSE(reader.read(1, null_column, &rows_read).ok());
+    EXPECT_FALSE(reader.read(-1, column, &rows_read).ok());
+    EXPECT_FALSE(reader.read(1, column, nullptr).ok());
+}
+
+TEST(ParquetColumnReaderFactoryTest, RejectsInvalidLeafIdBeforeCreatingRecordReader) {
+    ParquetColumnSchema schema = int64_schema("bad_leaf");
+    schema.kind = ParquetColumnSchemaKind::PRIMITIVE;
+    schema.leaf_column_id = 3;
+    schema.type_descriptor.physical_type = ::parquet::Type::INT64;
+    schema.type_descriptor.doris_type = schema.type;
+
+    ParquetColumnReaderFactory factory(nullptr, 1);
+    std::unique_ptr<ParquetColumnReader> reader;
+    const auto status = factory.create(schema, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid parquet leaf column id"), std::string::npos);
+}
+
+TEST(ParquetColumnReaderFactoryTest, RejectsStructInvalidAndEmptyProjection) {
+    auto schema = struct_schema_for_projection();
+    ParquetColumnReaderFactory factory(nullptr, 0);
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    auto invalid_projection = format::LocalColumnIndex::partial_local(0);
+    invalid_projection.children.push_back(format::LocalColumnIndex::local(9));
+    auto status = factory.create(schema, &invalid_projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("invalid child"), std::string::npos);
+
+    auto empty_projection = format::LocalColumnIndex::partial_local(0);
+    status = factory.create(schema, &empty_projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no children"), std::string::npos);
+}
+
+TEST(ParquetColumnReaderFactoryTest, RejectsListProjectionWithoutElement) {
+    auto schema = list_schema_for_projection();
+    ParquetColumnReaderFactory factory(nullptr, 0);
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    auto projection = format::LocalColumnIndex::partial_local(0);
+    const auto status = factory.create(schema, &projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no element"), std::string::npos);
+}
+
+TEST(ParquetColumnReaderFactoryTest, RejectsMapInvalidAndKeyOnlyProjection) {
+    auto schema = map_schema_for_projection();
+    ParquetColumnReaderFactory factory(nullptr, 0);
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    auto invalid_projection = format::LocalColumnIndex::partial_local(0);
+    invalid_projection.children.push_back(format::LocalColumnIndex::local(1));
+    invalid_projection.children.push_back(format::LocalColumnIndex::local(9));
+    auto status = factory.create(schema, &invalid_projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("invalid child"), std::string::npos);
+
+    auto key_only_projection = format::LocalColumnIndex::partial_local(0);
+    key_only_projection.children.push_back(format::LocalColumnIndex::local(0));
+    status = factory.create(schema, &key_only_projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no value"), std::string::npos);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_reader_test.cpp b/be/test/format_v2/parquet/parquet_reader_test.cpp
new file mode 100644
index 00000000000000..db8f826fd44f66
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_reader_test.cpp
@@ -0,0 +1,1982 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_reader.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/page_index.h>
+
+#include <cstring>
+#include <filesystem>
+#include <map>
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/primitive_type.h"
+#include "core/field.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/expr/delete_predicate.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_scan.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Types_types.h"
+#include "io/io_common.h"
+#include "runtime/runtime_state.h"
+#include "storage/predicate/predicate_creator.h"
+#include "storage/segment/condition_cache.h"
+#include "storage/utils.h"
+
+namespace doris {
+namespace {
+
+constexpr int64_t ROW_COUNT = 5;
+
+format::LocalColumnIndex field_projection(int32_t column_id) {
+    return format::LocalColumnIndex {.index = column_id};
+}
+
+template <typename ColumnType>
+const ColumnType& nullable_nested_column(const Block& block, size_t position) {
+    const IColumn* column = block.get_by_position(position).column.get();
+    int nullable_depth = 0;
+    while (const auto* nullable = check_and_get_column<ColumnNullable>(*column)) {
+        const auto& null_map = nullable->get_null_map_data();
+        for (size_t row = 0; row < null_map.size(); ++row) {
+            EXPECT_EQ(null_map[row], 0) << "Unexpected null at row " << row << ", column position "
+                                        << position << ", nullable depth " << nullable_depth;
+        }
+        column = &nullable->get_nested_column();
+        ++nullable_depth;
+    }
+    EXPECT_GT(nullable_depth, 0) << "Expected a nullable file-local column at position "
+                                 << position;
+    return assert_cast<const ColumnType&>(*column);
+}
+
+class Int32GreaterThanExpr final : public VExpr {
+public:
+    Int32GreaterThanExpr(int column_id, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _column_id(column_id),
+              _value(value) {}
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        const auto& input = nullable_nested_column<ColumnInt32>(*block, _column_id);
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const size_t input_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] = input.get_element(input_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    const int _column_id;
+    const int32_t _value;
+    const std::string _expr_name = "Int32GreaterThanExpr";
+};
+
+class Int32SumGreaterThanExpr final : public VExpr {
+public:
+    Int32SumGreaterThanExpr(int left_column_id, int right_column_id, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _left_column_id(left_column_id),
+              _right_column_id(right_column_id),
+              _value(value) {}
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        const auto& left_input = nullable_nested_column<ColumnInt32>(*block, _left_column_id);
+        const auto& right_input = nullable_nested_column<ColumnInt32>(*block, _right_column_id);
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const size_t input_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    left_input.get_element(input_row) + right_input.get_element(input_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    const int _left_column_id;
+    const int _right_column_id;
+    const int32_t _value;
+    const std::string _expr_name = "Int32SumGreaterThanExpr";
+};
+
+class StringInExpr final : public VExpr {
+public:
+    StringInExpr(int column_id, std::vector<std::string> values)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _column_id(column_id),
+              _values(std::move(values)) {}
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        const auto& input = nullable_nested_column<ColumnString>(*block, _column_id);
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const size_t input_row = selector == nullptr ? row : (*selector)[row];
+            const auto value = input.get_data_at(input_row).to_string();
+            result_data[row] = std::find(_values.begin(), _values.end(), value) != _values.end();
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    const int _column_id;
+    const std::vector<std::string> _values;
+    const std::string _expr_name = "StringInExpr";
+};
+
+VExprContextSPtr create_int32_greater_than_conjunct(int column_id, int32_t value) {
+    auto ctx =
+            VExprContext::create_shared(std::make_shared<Int32GreaterThanExpr>(column_id, value));
+    ctx->_prepared = true;
+    ctx->_opened = true;
+    return ctx;
+}
+
+VExprContextSPtr create_int32_sum_greater_than_conjunct(int left_column_id, int right_column_id,
+                                                        int32_t value) {
+    auto ctx = VExprContext::create_shared(
+            std::make_shared<Int32SumGreaterThanExpr>(left_column_id, right_column_id, value));
+    ctx->_prepared = true;
+    ctx->_opened = true;
+    return ctx;
+}
+
+VExprContextSPtr create_string_in_conjunct(int column_id, std::vector<std::string> values) {
+    auto ctx = VExprContext::create_shared(
+            std::make_shared<StringInExpr>(column_id, std::move(values)));
+    ctx->_prepared = true;
+    ctx->_opened = true;
+    return ctx;
+}
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_timestamp_array(const std::shared_ptr<arrow::DataType>& type,
+                                                    const std::vector<int64_t>& values) {
+    arrow::TimestampBuilder builder(type, arrow::default_memory_pool());
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_struct_array(const std::vector<int32_t>& ids,
+                                                 const std::vector<std::string>& names) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false),
+                                       arrow::field("name", arrow::utf8(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto id_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(id_builder)));
+    auto name_builder = std::make_unique<arrow::StringBuilder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(name_builder)));
+    arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                 std::move(field_builders));
+    auto* struct_id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    auto* struct_name_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+    for (size_t row = 0; row < ids.size(); ++row) {
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_id_builder->Append(ids[row]).ok());
+        EXPECT_TRUE(struct_name_builder->Append(names[row]).ok());
+    }
+    return finish_array(&builder);
+}
+
+void write_parquet_file(const std::string& file_path, int64_t row_group_size = ROW_COUNT) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(schema,
+                                    {build_int32_array({1, 2, 3, 4, 5}),
+                                     build_string_array({"one", "two", "three", "four", "five"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      row_group_size, builder.build()));
+}
+
+void write_int96_timestamp_parquet_file(const std::string& file_path) {
+    auto field = arrow::field("ts_tz", arrow::timestamp(arrow::TimeUnit::MICRO), true);
+    auto array =
+            build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                  {1735660800000000LL, 1735660800123456LL, 1735689600000000LL});
+    auto table = arrow::Table::Make(arrow::schema({field}), {array});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    ::parquet::ArrowWriterProperties::Builder arrow_builder;
+    arrow_builder.enable_force_write_int96_timestamps();
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ROW_COUNT, writer_builder.build(),
+                                                      arrow_builder.build()));
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, int64_t row_group_size = ROW_COUNT) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("score", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_int32_array({1, 2, 3, 4, 5}), build_int32_array({1, 2, 3, 4, 5}),
+                     build_string_array({"one", "two", "three", "four", "five"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      row_group_size, builder.build()));
+}
+
+void write_condition_cache_parquet_file(const std::string& file_path) {
+    constexpr int64_t row_count = ConditionCacheContext::GRANULE_SIZE * 2;
+    std::vector<int32_t> ids(row_count);
+    std::iota(ids.begin(), ids.end(), 0);
+
+    auto schema = arrow::schema({arrow::field("id", arrow::int32(), false)});
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      row_count, builder.build()));
+}
+
+void write_struct_filter_parquet_file(const std::string& file_path) {
+    auto id_field = arrow::field("id", arrow::int32(), false);
+    auto name_field = arrow::field("name", arrow::utf8(), false);
+    auto struct_type = arrow::struct_({id_field, name_field});
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_struct_array({1, 2, 10, 11}, {"one", "two", "ten", "eleven"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2,
+                                                      builder.build()));
+}
+
+void write_dictionary_filter_parquet_file(const std::string& file_path) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table =
+            arrow::Table::Make(schema, {build_int32_array({1, 2, 3, 4, 5, 6}),
+                                        build_string_array({"aa", "az", "lm", "lz", "za", "zz"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.enable_dictionary("value");
+    builder.disable_dictionary("id");
+    builder.disable_statistics();
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_nested_dictionary_filter_parquet_file(const std::string& file_path) {
+    auto id_field = arrow::field("id", arrow::int32(), false);
+    auto name_field = arrow::field("name", arrow::utf8(), false);
+    auto struct_type = arrow::struct_({id_field, name_field});
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_struct_array({1, 2, 3, 4, 5, 6}, {"aa", "az", "lm", "lz", "za", "zz"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.enable_dictionary("s.name");
+    builder.disable_dictionary("s.identifier.field_id");
+    builder.disable_statistics();
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_dictionary_edge_parquet_file(const std::string& file_path) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(
+            schema,
+            {build_int32_array({1, 2, 3, 4, 5, 6, 7, 8}),
+             build_string_array({"", "same", "other", "long-value", "", "tail", "same", "last"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.enable_dictionary("value");
+    builder.disable_dictionary("id");
+    builder.disable_statistics();
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2,
+                                                      builder.build()));
+}
+
+void write_nested_page_index_filter_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    std::vector<std::string> names;
+    names.reserve(ids.size());
+    for (const auto id : ids) {
+        names.push_back("name-" + std::to_string(id));
+    }
+    auto id_field = arrow::field("id", arrow::int32(), false);
+    auto name_field = arrow::field("name", arrow::utf8(), false);
+    auto struct_type = arrow::struct_({id_field, name_field});
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {build_struct_array(ids, names)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.disable_dictionary();
+    builder.enable_write_page_index();
+    builder.write_batch_size(8);
+    builder.data_pagesize(10);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ids.size(), builder.build()));
+}
+
+void write_page_index_filter_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.disable_dictionary();
+    builder.enable_write_page_index();
+    builder.write_batch_size(8);
+    builder.data_pagesize(10);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ids.size(), builder.build()));
+}
+
+void write_page_index_filter_pair_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    std::vector<int32_t> payloads;
+    payloads.reserve(ids.size());
+    for (const auto id : ids) {
+        payloads.push_back(id + 1000);
+    }
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("payload", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(payloads)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.disable_dictionary();
+    builder.enable_write_page_index();
+    builder.write_batch_size(8);
+    builder.data_pagesize(10);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ids.size(), builder.build()));
+}
+
+Block build_file_block(const std::vector<format::ColumnDefinition>& schema) {
+    Block block;
+    for (const auto& field : schema) {
+        block.insert({field.type->create_column(), field.type, field.name});
+    }
+    return block;
+}
+
+Block build_file_block_with_row_position(const std::vector<format::ColumnDefinition>& schema) {
+    auto block = build_file_block(schema);
+    const auto row_position_field = format::row_position_column_definition();
+    block.insert({row_position_field.type->create_column(), row_position_field.type,
+                  row_position_field.name});
+    return block;
+}
+
+void use_schema_order_positions(format::FileScanRequest* request,
+                                const std::vector<format::ColumnDefinition>& schema) {
+    DORIS_CHECK(request != nullptr);
+    for (size_t idx = 0; idx < schema.size(); ++idx) {
+        request->local_positions.emplace(format::LocalColumnId(schema[idx].local_id),
+                                         format::LocalIndex(idx));
+    }
+}
+
+int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? static_cast<int64_t>(column_metadata.dictionary_page_offset())
+                   : static_cast<int64_t>(column_metadata.data_page_offset());
+}
+
+std::pair<int64_t, int64_t> row_group_mid_range(const std::string& file_path, int row_group_idx) {
+    auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false);
+    auto metadata = reader->metadata();
+    auto row_group_metadata = metadata->RowGroup(row_group_idx);
+    auto first_column = row_group_metadata->ColumnChunk(0);
+    auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1);
+    const int64_t row_group_start_offset = parquet_column_start_offset(*first_column);
+    const int64_t row_group_end_offset =
+            parquet_column_start_offset(*last_column) + last_column->total_compressed_size();
+    const int64_t row_group_mid_offset =
+            row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2;
+    return {row_group_mid_offset, 1};
+}
+
+GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) {
+    const auto ref = column.get_data_at(row);
+    EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2));
+    GlobalRowLoacationV2 location(0, 0, 0, 0);
+    std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2));
+    return location;
+}
+
+class TestFileReader final : public format::FileReader {
+public:
+    TestFileReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                   std::unique_ptr<io::FileDescription>& file_description,
+                   std::shared_ptr<io::IOContext> io_ctx)
+            : format::FileReader(system_properties, file_description, io_ctx, nullptr) {}
+
+    Status get_schema(std::vector<format::ColumnDefinition>* file_schema) const override {
+        file_schema->clear();
+        format::ColumnDefinition field;
+        field.identifier = Field::create_field<TYPE_INT>(0);
+        field.name = "id";
+        field.type = std::make_shared<DataTypeInt32>();
+        file_schema->push_back(std::move(field));
+        return Status::OK();
+    }
+
+    bool has_request() const { return _request != nullptr; }
+
+    bool eof() const { return _eof; }
+
+    bool has_io_context() const { return _io_ctx != nullptr; }
+
+    long io_context_use_count() const { return _io_ctx.use_count(); }
+};
+
+TEST(FileReaderTest, OpenStoresRequestAndCloseKeepsRequest) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto file_description = std::make_unique<io::FileDescription>();
+    auto io_ctx = std::make_shared<io::IOContext>();
+    TestFileReader reader(system_properties, file_description, io_ctx);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns.push_back(field_projection(0));
+    ASSERT_TRUE(reader.open(request).ok());
+    EXPECT_NE(request, nullptr);
+    EXPECT_TRUE(reader.has_request());
+
+    ASSERT_TRUE(reader.close().ok());
+    EXPECT_TRUE(reader.has_request());
+    EXPECT_TRUE(reader.eof());
+}
+
+TEST(FileReaderTest, CloseReleasesSharedIOContext) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto file_description = std::make_unique<io::FileDescription>();
+    auto io_ctx = std::make_shared<io::IOContext>();
+    std::weak_ptr<io::IOContext> weak_io_ctx = io_ctx;
+    TestFileReader reader(system_properties, file_description, io_ctx);
+
+    EXPECT_TRUE(reader.has_io_context());
+    EXPECT_EQ(reader.io_context_use_count(), 2);
+    io_ctx.reset();
+    EXPECT_FALSE(weak_io_ctx.expired());
+    EXPECT_EQ(reader.io_context_use_count(), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    EXPECT_FALSE(reader.has_io_context());
+    EXPECT_TRUE(weak_io_ctx.expired());
+}
+
+class NewParquetReaderTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_parquet_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "reader.parquet").string();
+        write_parquet_file(_file_path);
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+    std::unique_ptr<format::parquet::ParquetReader> create_reader(
+            int64_t range_start_offset = 0, int64_t range_size = -1,
+            RuntimeProfile* profile = nullptr, bool enable_mapping_timestamp_tz = false,
+            std::shared_ptr<io::IOContext> io_ctx = nullptr,
+            std::optional<format::GlobalRowIdContext> global_rowid_context = std::nullopt) const {
+        auto system_properties = std::make_shared<io::FileSystemProperties>();
+        system_properties->system_type = TFileType::FILE_LOCAL;
+        auto file_description = std::make_unique<io::FileDescription>();
+        file_description->path = _file_path;
+        file_description->file_size = static_cast<int64_t>(std::filesystem::file_size(_file_path));
+        file_description->range_start_offset = range_start_offset;
+        file_description->range_size = range_size;
+        return std::make_unique<format::parquet::ParquetReader>(
+                system_properties, file_description, std::move(io_ctx), profile,
+                global_rowid_context, enable_mapping_timestamp_tz);
+    }
+
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+};
+
+TEST_F(NewParquetReaderTest, GetSchemaReturnsFileLocalColumns) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 2);
+    EXPECT_EQ(schema[0].local_id, 0);
+    EXPECT_EQ(schema[0].name, "id");
+    ASSERT_TRUE(schema[0].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[0].type)->get_primitive_type(), TYPE_INT);
+    EXPECT_EQ(schema[1].local_id, 1);
+    EXPECT_EQ(schema[1].name, "value");
+    ASSERT_TRUE(schema[1].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[1].type)->get_primitive_type(), TYPE_STRING);
+}
+
+// Scenario: Parquet is columnar and supports predicate/non-predicate split, nested projection and
+// file-layer pruning hints. The reader declares those scan-request capabilities by choosing
+// ParquetColumnMapper itself.
+TEST_F(NewParquetReaderTest, CreatesParquetColumnMapper) {
+    auto reader = create_reader();
+    auto mapper =
+            reader->create_column_mapper({.mode = format::TableColumnMappingMode::BY_FIELD_ID});
+
+    ASSERT_NE(dynamic_cast<format::ParquetColumnMapper*>(mapper.get()), nullptr);
+}
+
+TEST_F(NewParquetReaderTest, GetSchemaReturnsNullableNestedChildren) {
+    write_struct_filter_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 1);
+    EXPECT_EQ(schema[0].name, "s");
+    ASSERT_TRUE(schema[0].type->is_nullable());
+    ASSERT_EQ(schema[0].children.size(), 2);
+    EXPECT_EQ(schema[0].children[0].name, "id");
+    ASSERT_TRUE(schema[0].children[0].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[0].children[0].type)->get_primitive_type(), TYPE_INT);
+    EXPECT_EQ(schema[0].children[1].name, "name");
+    ASSERT_TRUE(schema[0].children[1].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[0].children[1].type)->get_primitive_type(), TYPE_STRING);
+
+    const auto* struct_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(schema[0].type).get());
+    ASSERT_EQ(struct_type->get_elements().size(), 2);
+    EXPECT_TRUE(struct_type->get_element(0)->is_nullable());
+    EXPECT_TRUE(struct_type->get_element(1)->is_nullable());
+}
+
+TEST_F(NewParquetReaderTest, GetSchemaMapsInt96ToTimestampTzWhenTimestampTzMappingEnabled) {
+    write_int96_timestamp_parquet_file(_file_path);
+    auto reader = create_reader(0, -1, nullptr, true);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 1);
+    EXPECT_EQ(schema[0].name, "ts_tz");
+    ASSERT_TRUE(schema[0].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[0].type)->get_primitive_type(), TYPE_TIMESTAMPTZ);
+    EXPECT_EQ(remove_nullable(schema[0].type)->get_scale(), 6);
+}
+
+TEST_F(NewParquetReaderTest, ReadSingleRowGroupThenEof) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0), field_projection(1)};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, ROW_COUNT);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    ASSERT_EQ(ids.size(), ROW_COUNT);
+    ASSERT_EQ(values.size(), ROW_COUNT);
+    EXPECT_EQ(ids.get_element(0), 1);
+    EXPECT_EQ(ids.get_element(4), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "one");
+    EXPECT_EQ(values.get_data_at(4).to_string(), "five");
+
+    rows = 0;
+    eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, ConditionCacheMissMarksSurvivingGranules) {
+    write_condition_cache_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 1);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(
+            create_int32_greater_than_conjunct(0, ConditionCacheContext::GRANULE_SIZE - 1));
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto ctx = std::make_shared<ConditionCacheContext>();
+    ctx->is_hit = false;
+    ctx->filter_result = std::make_shared<std::vector<bool>>(3, false);
+    reader->set_condition_cache_context(ctx);
+
+    std::vector<int32_t> ids;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+        }
+    }
+
+    ASSERT_EQ(ids.size(), ConditionCacheContext::GRANULE_SIZE);
+    EXPECT_EQ(ids.front(), ConditionCacheContext::GRANULE_SIZE);
+    EXPECT_EQ(ids.back(), ConditionCacheContext::GRANULE_SIZE * 2 - 1);
+    EXPECT_FALSE((*ctx->filter_result)[0]);
+    EXPECT_TRUE((*ctx->filter_result)[1]);
+    EXPECT_FALSE((*ctx->filter_result)[2]);
+}
+
+TEST_F(NewParquetReaderTest, ConditionCacheHitSkipsFalseGranulesBeforeColumnRead) {
+    write_condition_cache_parquet_file(_file_path);
+    auto io_ctx = std::make_shared<io::IOContext>();
+    auto reader = create_reader(0, -1, nullptr, false, io_ctx);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 1);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(
+            create_int32_greater_than_conjunct(0, ConditionCacheContext::GRANULE_SIZE - 1));
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto ctx = std::make_shared<ConditionCacheContext>();
+    ctx->is_hit = true;
+    ctx->filter_result =
+            std::make_shared<std::vector<bool>>(std::vector<bool> {false, true, false});
+    reader->set_condition_cache_context(ctx);
+
+    Block block = build_file_block(schema);
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, ConditionCacheContext::GRANULE_SIZE);
+    EXPECT_EQ(io_ctx->condition_cache_filtered_rows, ConditionCacheContext::GRANULE_SIZE);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    EXPECT_EQ(ids.get_element(0), ConditionCacheContext::GRANULE_SIZE);
+    EXPECT_EQ(ids.get_element(rows - 1), ConditionCacheContext::GRANULE_SIZE * 2 - 1);
+
+    block = build_file_block(schema);
+    rows = 0;
+    eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, ReadMultipleRowGroups) {
+    write_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0), field_projection(1)};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 3, 4, 5}));
+    EXPECT_EQ(values, std::vector<std::string>({"one", "two", "three", "four", "five"}));
+}
+
+TEST_F(NewParquetReaderTest, ReadPredicateAndNonPredicateColumnsWithSelection) {
+    RuntimeProfile profile("new_parquet_reader_filter_profile");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(2), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    ASSERT_EQ(ids.size(), 3);
+    ASSERT_EQ(values.size(), 3);
+    EXPECT_EQ(ids.get_element(0), 3);
+    EXPECT_EQ(ids.get_element(1), 4);
+    EXPECT_EQ(ids.get_element(2), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "three");
+    EXPECT_EQ(values.get_data_at(1).to_string(), "four");
+    EXPECT_EQ(values.get_data_at(2).to_string(), "five");
+
+    ASSERT_NE(profile.get_counter("FileReaderCreateTime"), nullptr);
+    ASSERT_NE(profile.get_counter("FileNum"), nullptr);
+    ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr);
+    ASSERT_NE(profile.get_counter("SelectedRows"), nullptr);
+    ASSERT_NE(profile.get_counter("RowsFilteredByConjunct"), nullptr);
+    ASSERT_NE(profile.get_counter("TotalBatches"), nullptr);
+    ASSERT_NE(profile.get_counter("EmptySelectionBatches"), nullptr);
+    ASSERT_NE(profile.get_counter("ReaderReadRows"), nullptr);
+    ASSERT_NE(profile.get_counter("ReaderSkipRows"), nullptr);
+    ASSERT_NE(profile.get_counter("ReaderSelectRows"), nullptr);
+    ASSERT_NE(profile.get_counter("ArrowReadRecordsTime"), nullptr);
+    ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr);
+    ASSERT_GT(profile.get_counter("FileReaderCreateTime")->value(), 0);
+    EXPECT_EQ(profile.get_counter("FileNum")->value(), 1);
+    EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), ROW_COUNT);
+    EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 3);
+    EXPECT_EQ(profile.get_counter("RowsFilteredByConjunct")->value(), 2);
+    EXPECT_EQ(profile.get_counter("TotalBatches")->value(), 1);
+    EXPECT_EQ(profile.get_counter("EmptySelectionBatches")->value(), 0);
+    EXPECT_EQ(profile.get_counter("ReaderReadRows")->value(), ROW_COUNT + 3);
+    EXPECT_EQ(profile.get_counter("ReaderSkipRows")->value(), 2);
+    EXPECT_EQ(profile.get_counter("ReaderSelectRows")->value(), 3);
+    EXPECT_GT(profile.get_counter("ArrowReadRecordsTime")->value(), 0);
+    EXPECT_GT(profile.get_counter("MaterializationTime")->value(), 0);
+
+    rows = 0;
+    eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, GlobalRowIdSchemaAndSelectionUseFileRowPosition) {
+    format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42};
+    auto reader = create_reader(0, -1, nullptr, false, nullptr, context);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    EXPECT_EQ(schema[2].local_id, format::GLOBAL_ROWID_COLUMN_ID);
+    EXPECT_EQ(schema[2].column_type, format::GLOBAL_ROWID);
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1),
+                                      field_projection(format::GLOBAL_ROWID_COLUMN_ID)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    const auto& rowids = assert_cast<const ColumnString&>(*block.get_by_position(2).column);
+    ASSERT_EQ(ids.size(), 3);
+    ASSERT_EQ(values.size(), 3);
+    ASSERT_EQ(rowids.size(), 3);
+    EXPECT_EQ(ids.get_element(0), 3);
+    EXPECT_EQ(ids.get_element(1), 4);
+    EXPECT_EQ(ids.get_element(2), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "three");
+    EXPECT_EQ(values.get_data_at(1).to_string(), "four");
+    EXPECT_EQ(values.get_data_at(2).to_string(), "five");
+
+    for (size_t row = 0; row < rows; ++row) {
+        const auto location = decode_rowid(rowids, row);
+        EXPECT_EQ(location.version, context.version);
+        EXPECT_EQ(location.backend_id, context.backend_id);
+        EXPECT_EQ(location.file_id, context.file_id);
+        EXPECT_EQ(location.row_id, static_cast<uint32_t>(row + 2));
+    }
+}
+
+TEST_F(NewParquetReaderTest, ColumnPredicateOnlyPrunesAndDoesNotFilterRowsInsideRowGroup) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(2), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, ROW_COUNT);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    ASSERT_EQ(ids.size(), ROW_COUNT);
+    ASSERT_EQ(values.size(), ROW_COUNT);
+    EXPECT_EQ(ids.get_element(0), 1);
+    EXPECT_EQ(ids.get_element(4), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "one");
+    EXPECT_EQ(values.get_data_at(4).to_string(), "five");
+}
+
+TEST_F(NewParquetReaderTest, EmptySelectionUpdatesProfileCounters) {
+    RuntimeProfile profile("new_parquet_reader_empty_selection_profile");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 10));
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+
+    ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr);
+    ASSERT_NE(profile.get_counter("SelectedRows"), nullptr);
+    ASSERT_NE(profile.get_counter("RowsFilteredByConjunct"), nullptr);
+    ASSERT_NE(profile.get_counter("TotalBatches"), nullptr);
+    ASSERT_NE(profile.get_counter("EmptySelectionBatches"), nullptr);
+    EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), ROW_COUNT);
+    EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 0);
+    EXPECT_EQ(profile.get_counter("RowsFilteredByConjunct")->value(), ROW_COUNT);
+    EXPECT_EQ(profile.get_counter("TotalBatches")->value(), 1);
+    EXPECT_EQ(profile.get_counter("EmptySelectionBatches")->value(), 1);
+}
+
+TEST_F(NewParquetReaderTest, ReadMultiPredicateColumnsBeforeExpressionFilter) {
+    write_int_pair_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0), field_projection(1)};
+    request->non_predicate_columns = {};
+    request->conjuncts.push_back(create_int32_sum_greater_than_conjunct(0, 1, 7));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 2);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& scores = nullable_nested_column<ColumnInt32>(block, 1);
+    ASSERT_EQ(ids.size(), 2);
+    ASSERT_EQ(scores.size(), 2);
+    EXPECT_EQ(ids.get_element(0), 4);
+    EXPECT_EQ(ids.get_element(1), 5);
+    EXPECT_EQ(scores.get_element(0), 4);
+    EXPECT_EQ(scores.get_element(1), 5);
+}
+
+TEST_F(NewParquetReaderTest, PredicateColumnFiltersBeforeNonPredicateRead) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    ASSERT_EQ(ids.size(), 3);
+    ASSERT_EQ(values.size(), 3);
+    EXPECT_EQ(ids.get_element(0), 3);
+    EXPECT_EQ(ids.get_element(1), 4);
+    EXPECT_EQ(ids.get_element(2), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "three");
+    EXPECT_EQ(values.get_data_at(1).to_string(), "four");
+    EXPECT_EQ(values.get_data_at(2).to_string(), "five");
+}
+
+TEST_F(NewParquetReaderTest, NonPredicateColumnKeepsSelectionFromPredicateColumn) {
+    write_int_pair_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& scores = nullable_nested_column<ColumnInt32>(block, 1);
+    ASSERT_EQ(ids.size(), 3);
+    ASSERT_EQ(scores.size(), 3);
+    EXPECT_EQ(ids.get_element(0), 3);
+    EXPECT_EQ(ids.get_element(1), 4);
+    EXPECT_EQ(ids.get_element(2), 5);
+    EXPECT_EQ(scores.get_element(0), 3);
+    EXPECT_EQ(scores.get_element(1), 4);
+    EXPECT_EQ(scores.get_element(2), 5);
+}
+
+TEST_F(NewParquetReaderTest, PredicateFiltersRowGroupsByStatistics) {
+    write_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(2), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({3, 4, 5}));
+    EXPECT_EQ(values, std::vector<std::string>({"three", "four", "five"}));
+}
+
+TEST_F(NewParquetReaderTest, PredicateFiltersRowGroupsByDictionary) {
+    write_dictionary_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6);
+    for (int row_group_idx = 0; row_group_idx < 6; ++row_group_idx) {
+        auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx);
+        ASSERT_NE(row_group, nullptr);
+        auto value_chunk = row_group->ColumnChunk(1);
+        ASSERT_NE(value_chunk, nullptr);
+        ASSERT_TRUE(value_chunk->has_dictionary_page());
+        ASSERT_TRUE(value_chunk->statistics() == nullptr ||
+                    !value_chunk->statistics()->HasMinMax());
+    }
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 2);
+
+    format::FileScanRequest plan_request;
+    format::FileColumnPredicateFilter plan_column_filter;
+    plan_column_filter.file_column_id = format::LocalColumnId(1);
+    auto value_type = std::make_shared<DataTypeString>();
+    plan_column_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            1, "value", value_type, Field::create_field<TYPE_STRING>("lm"), false));
+    plan_request.column_predicate_filters.push_back(std::move(plan_column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         plan_request, scan_range, false, &plan)
+                        .ok());
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 6);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 5);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 5);
+    EXPECT_EQ(plan.pruning_stats.selected_row_ranges, 1);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(1)};
+    request->non_predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(create_string_in_conjunct(1, {"lm"}));
+    use_schema_order_positions(request.get(), schema);
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(1);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            1, "value", schema[1].type, Field::create_field<TYPE_STRING>("lm"), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({3}));
+    EXPECT_EQ(values, std::vector<std::string>({"lm"}));
+}
+
+TEST_F(NewParquetReaderTest, ScanRangeFiltersRowGroupsBeforeDictionaryPruning) {
+    write_dictionary_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6);
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(1);
+    auto value_type = std::make_shared<DataTypeString>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            1, "value", value_type, Field::create_field<TYPE_STRING>("lm"), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 2);
+    format::parquet::ParquetScanRange scan_range;
+    scan_range.start_offset = range_start_offset;
+    scan_range.size = range_size;
+    scan_range.file_size = static_cast<int64_t>(std::filesystem::file_size(_file_path));
+
+    format::parquet::RowGroupScanPlan plan;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].row_group_id, 2);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 6);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 0);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, NestedStructPredicateFiltersRowGroupsByStatistics) {
+    write_struct_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 2);
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 1);
+    ASSERT_EQ(file_schema[0]->children.size(), 2);
+    ASSERT_EQ(file_schema[0]->children[0]->name, "id");
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.file_child_id_path = {0};
+    auto id_type = std::make_shared<DataTypeInt32>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", id_type, Field::create_field<TYPE_INT>(5), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].row_group_id, 1);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 2);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 2);
+}
+
+TEST_F(NewParquetReaderTest, NestedStructPredicateFiltersRowGroupsByDictionary) {
+    write_nested_dictionary_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6);
+    for (int row_group_idx = 0; row_group_idx < 6; ++row_group_idx) {
+        auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx);
+        ASSERT_NE(row_group, nullptr);
+        auto name_chunk = row_group->ColumnChunk(1);
+        ASSERT_NE(name_chunk, nullptr);
+        ASSERT_TRUE(name_chunk->has_dictionary_page());
+        ASSERT_TRUE(name_chunk->statistics() == nullptr || !name_chunk->statistics()->HasMinMax());
+    }
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 1);
+    ASSERT_EQ(file_schema[0]->children.size(), 2);
+    ASSERT_EQ(file_schema[0]->children[1]->name, "name");
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.file_child_id_path = {1};
+    auto name_type = std::make_shared<DataTypeString>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            0, "name", name_type, Field::create_field<TYPE_STRING>("lm"), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].row_group_id, 2);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 6);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 5);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 5);
+}
+
+TEST_F(NewParquetReaderTest, PlannerNarrowsRowRangesByPageIndex) {
+    write_page_index_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1);
+    auto page_index_reader = parquet_file_reader->GetPageIndexReader();
+    ASSERT_NE(page_index_reader, nullptr);
+    auto row_group_index_reader = page_index_reader->RowGroup(0);
+    ASSERT_NE(row_group_index_reader, nullptr);
+    auto offset_index = row_group_index_reader->GetOffsetIndex(0);
+    ASSERT_NE(offset_index, nullptr);
+    ASSERT_GT(offset_index->page_locations().size(), 1);
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 1);
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    auto id_type = std::make_shared<DataTypeInt32>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", id_type, Field::create_field<TYPE_INT>(63), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    ASSERT_FALSE(plan.row_groups[0].selected_ranges.empty());
+    EXPECT_GT(plan.row_groups[0].selected_ranges.front().start, 0);
+    EXPECT_LT(plan.row_groups[0].selected_ranges.front().length, 128);
+    auto skip_plan_it = plan.row_groups[0].page_skip_plans.find(0);
+    ASSERT_NE(skip_plan_it, plan.row_groups[0].page_skip_plans.end());
+    EXPECT_EQ(skip_plan_it->second.leaf_column_id, 0);
+    EXPECT_GT(skip_plan_it->second.skipped_ranges.size(), 0);
+    EXPECT_GT(skip_plan_it->second.skipped_pages.size(), 1);
+    ASSERT_EQ(skip_plan_it->second.skipped_pages.size(),
+              skip_plan_it->second.skipped_page_compressed_sizes.size());
+    int64_t skipped_compressed_bytes = 0;
+    for (size_t page_idx = 0; page_idx < skip_plan_it->second.skipped_pages.size(); ++page_idx) {
+        if (skip_plan_it->second.should_skip_page(page_idx)) {
+            skipped_compressed_bytes += skip_plan_it->second.skipped_page_compressed_size(page_idx);
+        }
+    }
+    EXPECT_GT(skipped_compressed_bytes, 0);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 0);
+    EXPECT_GT(plan.pruning_stats.filtered_page_rows, 0);
+    EXPECT_EQ(plan.pruning_stats.selected_row_ranges, plan.row_groups[0].selected_ranges.size());
+}
+
+TEST_F(NewParquetReaderTest, NestedStructPredicateNarrowsRowRangesByPageIndex) {
+    write_nested_page_index_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1);
+    auto page_index_reader = parquet_file_reader->GetPageIndexReader();
+    ASSERT_NE(page_index_reader, nullptr);
+    auto row_group_index_reader = page_index_reader->RowGroup(0);
+    ASSERT_NE(row_group_index_reader, nullptr);
+    auto offset_index = row_group_index_reader->GetOffsetIndex(0);
+    ASSERT_NE(offset_index, nullptr);
+    ASSERT_GT(offset_index->page_locations().size(), 1);
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 1);
+    ASSERT_EQ(file_schema[0]->children.size(), 2);
+    ASSERT_EQ(file_schema[0]->children[0]->name, "id");
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.file_child_id_path = {0};
+    auto id_type = std::make_shared<DataTypeInt32>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", id_type, Field::create_field<TYPE_INT>(63), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    ASSERT_FALSE(plan.row_groups[0].selected_ranges.empty());
+    EXPECT_GT(plan.row_groups[0].selected_ranges.front().start, 0);
+    EXPECT_LT(plan.row_groups[0].selected_ranges.front().length, 128);
+    auto skip_plan_it = plan.row_groups[0].page_skip_plans.find(0);
+    ASSERT_NE(skip_plan_it, plan.row_groups[0].page_skip_plans.end());
+    EXPECT_EQ(skip_plan_it->second.leaf_column_id, 0);
+    EXPECT_GT(skip_plan_it->second.skipped_ranges.size(), 0);
+    EXPECT_GT(skip_plan_it->second.skipped_pages.size(), 1);
+    ASSERT_EQ(skip_plan_it->second.skipped_pages.size(),
+              skip_plan_it->second.skipped_page_compressed_sizes.size());
+    int64_t skipped_compressed_bytes = 0;
+    for (size_t page_idx = 0; page_idx < skip_plan_it->second.skipped_pages.size(); ++page_idx) {
+        if (skip_plan_it->second.should_skip_page(page_idx)) {
+            skipped_compressed_bytes += skip_plan_it->second.skipped_page_compressed_size(page_idx);
+        }
+    }
+    EXPECT_GT(skipped_compressed_bytes, 0);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 0);
+    EXPECT_GT(plan.pruning_stats.filtered_page_rows, 0);
+    EXPECT_EQ(plan.pruning_stats.selected_row_ranges, plan.row_groups[0].selected_ranges.size());
+}
+
+TEST_F(NewParquetReaderTest, PageIndexFilteredPagesDoNotDoubleSkipOutputColumns) {
+    write_page_index_filter_pair_parquet_file(_file_path);
+    RuntimeProfile profile("new_parquet_reader_page_skip");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 2);
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 63));
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(63), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<int32_t> payloads;
+    bool eof = false;
+    while (!eof) {
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& payload_column = nullable_nested_column<ColumnInt32>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            payloads.push_back(payload_column.get_element(row));
+        }
+    }
+
+    ASSERT_NE(profile.get_counter("PagesSkippedByDataPageFilter"), nullptr);
+    ASSERT_NE(profile.get_counter("DataPageFilterSkipBytes"), nullptr);
+    ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr);
+    ASSERT_NE(profile.get_counter("SelectedRows"), nullptr);
+    ASSERT_NE(profile.get_counter("RangeGapSkippedRows"), nullptr);
+    ASSERT_NE(profile.get_counter("ReaderSkipRows"), nullptr);
+    ASSERT_NE(profile.get_counter("RowGroupFilterTime"), nullptr);
+    ASSERT_NE(profile.get_counter("PageIndexFilterTime"), nullptr);
+    ASSERT_NE(profile.get_counter("PageIndexReadTime"), nullptr);
+    EXPECT_GT(profile.get_counter("PagesSkippedByDataPageFilter")->value(), 0);
+    EXPECT_GT(profile.get_counter("DataPageFilterSkipBytes")->value(), 0);
+    EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), 64);
+    EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 64);
+    EXPECT_GT(profile.get_counter("RangeGapSkippedRows")->value(), 0);
+    EXPECT_EQ(profile.get_counter("ReaderSkipRows")->value(), 0);
+    EXPECT_GT(profile.get_counter("RowGroupFilterTime")->value(), 0);
+    EXPECT_GT(profile.get_counter("PageIndexFilterTime")->value(), 0);
+    EXPECT_GT(profile.get_counter("PageIndexReadTime")->value(), 0);
+
+    ASSERT_EQ(ids.size(), 64);
+    ASSERT_EQ(payloads.size(), ids.size());
+    for (size_t row = 0; row < ids.size(); ++row) {
+        EXPECT_EQ(ids[row], static_cast<int32_t>(row + 64));
+        EXPECT_EQ(payloads[row], ids[row] + 1000);
+    }
+}
+
+TEST_F(NewParquetReaderTest, InPredicateFiltersRowGroupsByDictionary) {
+    write_dictionary_filter_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(1)};
+    request->non_predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(create_string_in_conjunct(1, {"az", "za"}));
+    use_schema_order_positions(request.get(), schema);
+    auto set = build_set<TYPE_STRING>();
+    set->insert(const_cast<char*>("az"), 2);
+    set->insert(const_cast<char*>("za"), 2);
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(1);
+    column_filter.predicates.push_back(create_in_list_predicate<PredicateType::IN_LIST>(
+            1, "value", schema[1].type, set, false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({2, 5}));
+    EXPECT_EQ(values, std::vector<std::string>({"az", "za"}));
+}
+
+TEST_F(NewParquetReaderTest, DictionaryPageV2StringEdgesSurviveSelection) {
+    write_dictionary_edge_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 4);
+    for (int row_group_idx = 0; row_group_idx < 4; ++row_group_idx) {
+        auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx);
+        ASSERT_NE(row_group, nullptr);
+        ASSERT_TRUE(row_group->ColumnChunk(1)->has_dictionary_page());
+    }
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(1)};
+    request->non_predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(create_string_in_conjunct(1, {"", "same"}));
+    use_schema_order_positions(request.get(), schema);
+    auto set = build_set<TYPE_STRING>();
+    set->insert(const_cast<char*>(""), 0);
+    set->insert(const_cast<char*>("same"), 4);
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(1);
+    column_filter.predicates.push_back(create_in_list_predicate<PredicateType::IN_LIST>(
+            1, "value", schema[1].type, set, false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 5, 7}));
+    EXPECT_EQ(values, std::vector<std::string>({"", "same", "", "same"}));
+}
+
+TEST_F(NewParquetReaderTest, StatisticsPruningSkipsPrefixRowGroupsAndReadsLaterGroups) {
+    write_parquet_file(_file_path, 1);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 5);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 3));
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(4), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({4, 5}));
+    EXPECT_EQ(values, std::vector<std::string>({"four", "five"}));
+}
+
+TEST_F(NewParquetReaderTest, RowPositionReaderReturnsFileLocalPositions) {
+    write_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID),
+                                      field_projection(0)};
+    request->local_positions = {
+            {format::LocalColumnId(0), format::LocalIndex(0)},
+            {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+    };
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int64_t> row_positions;
+    std::vector<int32_t> ids;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block_with_row_position(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& row_position_column =
+                assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            row_positions.push_back(row_position_column.get_element(row));
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 3, 4, 5}));
+    EXPECT_EQ(row_positions, std::vector<int64_t>({0, 1, 2, 3, 4}));
+}
+
+TEST_F(NewParquetReaderTest, RowPositionReaderKeepsPositionsAfterSelection) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block_with_row_position(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID)};
+    request->local_positions = {
+            {format::LocalColumnId(0), format::LocalIndex(0)},
+            {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+    };
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& row_position_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    EXPECT_EQ(id_column.get_element(1), 4);
+    EXPECT_EQ(id_column.get_element(2), 5);
+    EXPECT_EQ(row_position_column.get_element(0), 2);
+    EXPECT_EQ(row_position_column.get_element(1), 3);
+    EXPECT_EQ(row_position_column.get_element(2), 4);
+}
+
+TEST_F(NewParquetReaderTest, DeletePredicateFiltersRowPositions) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block_with_row_position(schema);
+
+    static const std::vector<int64_t> deleted_rows {1, 3};
+    auto delete_predicate = std::make_shared<format::DeletePredicate>(deleted_rows);
+    delete_predicate->add_child(VSlotRef::create_shared(2, 2, -1, std::make_shared<DataTypeInt64>(),
+                                                        format::ROW_POSITION_COLUMN_NAME));
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID)};
+    request->non_predicate_columns = {field_projection(0)};
+    request->local_positions = {
+            {format::LocalColumnId(0), format::LocalIndex(0)},
+            {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+    };
+    request->delete_conjuncts.push_back(VExprContext::create_shared(std::move(delete_predicate)));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& row_position_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 3);
+    EXPECT_EQ(id_column.get_element(2), 5);
+    EXPECT_EQ(row_position_column.get_element(0), 0);
+    EXPECT_EQ(row_position_column.get_element(1), 2);
+    EXPECT_EQ(row_position_column.get_element(2), 4);
+}
+
+TEST_F(NewParquetReaderTest, QueryPredicateAndDeletePredicateFilterRowPositions) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block_with_row_position(schema);
+
+    static const std::vector<int64_t> deleted_rows {3};
+    auto delete_predicate = std::make_shared<format::DeletePredicate>(deleted_rows);
+    delete_predicate->add_child(VSlotRef::create_shared(2, 2, -1, std::make_shared<DataTypeInt64>(),
+                                                        format::ROW_POSITION_COLUMN_NAME));
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0),
+                                  field_projection(format::ROW_POSITION_COLUMN_ID)};
+    request->non_predicate_columns = {};
+    request->local_positions = {
+            {format::LocalColumnId(0), format::LocalIndex(0)},
+            {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+    };
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    request->delete_conjuncts.push_back(VExprContext::create_shared(std::move(delete_predicate)));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 2);
+
+    const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& row_position_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    EXPECT_EQ(id_column.get_element(1), 5);
+    EXPECT_EQ(row_position_column.get_element(0), 2);
+    EXPECT_EQ(row_position_column.get_element(1), 4);
+}
+
+TEST_F(NewParquetReaderTest, RowPositionReaderUsesFileLocalPositionsForScanRange) {
+    write_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+
+    const std::vector<std::vector<int32_t>> expected_ids = {{1, 2}, {3, 4}, {5}};
+    const std::vector<std::vector<int64_t>> expected_row_positions = {{0, 1}, {2, 3}, {4}};
+    for (int row_group_idx = 0; row_group_idx < 3; ++row_group_idx) {
+        const auto [range_start_offset, range_size] =
+                row_group_mid_range(_file_path, row_group_idx);
+        auto reader = create_reader(range_start_offset, range_size);
+        RuntimeState state {TQueryOptions(), TQueryGlobals()};
+        ASSERT_TRUE(reader->init(&state).ok());
+
+        std::vector<format::ColumnDefinition> schema;
+        ASSERT_TRUE(reader->get_schema(&schema).ok());
+        auto request = std::make_shared<format::FileScanRequest>();
+        request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID),
+                                          field_projection(0)};
+        request->local_positions = {
+                {format::LocalColumnId(0), format::LocalIndex(0)},
+                {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+        };
+        ASSERT_TRUE(reader->open(request).ok());
+
+        std::vector<int32_t> ids;
+        std::vector<int64_t> row_positions;
+        bool eof = false;
+        while (!eof) {
+            Block block = build_file_block_with_row_position(schema);
+            size_t rows = 0;
+            ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+            if (rows == 0) {
+                continue;
+            }
+            const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+            const auto& row_position_column =
+                    assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+            for (size_t row = 0; row < rows; ++row) {
+                ids.push_back(id_column.get_element(row));
+                row_positions.push_back(row_position_column.get_element(row));
+            }
+        }
+
+        EXPECT_EQ(ids, expected_ids[row_group_idx]);
+        EXPECT_EQ(row_positions, expected_row_positions[row_group_idx]);
+    }
+}
+
+} // namespace
+} // namespace doris
diff --git a/be/test/format_v2/parquet/parquet_scan_test.cpp b/be/test/format_v2/parquet/parquet_scan_test.cpp
new file mode 100644
index 00000000000000..3b381c3158fd45
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_scan_test.cpp
@@ -0,0 +1,804 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_scan.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <cstring>
+#include <filesystem>
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/config.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "gen_cpp/Types_types.h"
+#include "io/io_common.h"
+#include "runtime/runtime_state.h"
+#include "storage/predicate/predicate_creator.h"
+#include "storage/utils.h"
+
+namespace doris {
+namespace {
+
+format::LocalColumnIndex field_projection(int32_t column_id) {
+    return format::LocalColumnIndex {.index = column_id};
+}
+
+const ColumnInt32& int32_data_column(const IColumn& column) {
+    if (const auto* nullable_column = check_and_get_column<ColumnNullable>(&column)) {
+        return assert_cast<const ColumnInt32&>(nullable_column->get_nested_column());
+    }
+    return assert_cast<const ColumnInt32&>(column);
+}
+
+const ColumnString& string_data_column(const IColumn& column) {
+    if (const auto* nullable_column = check_and_get_column<ColumnNullable>(&column)) {
+        return assert_cast<const ColumnString&>(nullable_column->get_nested_column());
+    }
+    return assert_cast<const ColumnString&>(column);
+}
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_struct_array(const std::vector<int32_t>& ids,
+                                                 const std::vector<std::string>& names) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false),
+                                       arrow::field("name", arrow::utf8(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(
+            std::make_unique<arrow::Int32Builder>().release()));
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(
+            std::make_unique<arrow::StringBuilder>().release()));
+    arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                 std::move(field_builders));
+    auto* id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    auto* name_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+    for (size_t row = 0; row < ids.size(); ++row) {
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(id_builder->Append(ids[row]).ok());
+        EXPECT_TRUE(name_builder->Append(names[row]).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_list_array() {
+    auto value_builder = std::make_unique<arrow::Int32Builder>();
+    arrow::ListBuilder builder(arrow::default_memory_pool(), std::move(value_builder));
+    auto* int_builder = assert_cast<arrow::Int32Builder*>(builder.value_builder());
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(int_builder->Append(1).ok());
+    EXPECT_TRUE(int_builder->Append(2).ok());
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(int_builder->Append(3).ok());
+    EXPECT_TRUE(builder.Append().ok());
+    return finish_array(&builder);
+}
+
+void write_table(const std::string& file_path, const std::shared_ptr<arrow::Table>& table,
+                 int64_t row_group_size, bool enable_dictionary = false,
+                 bool enable_page_index = false, bool enable_statistics = true) {
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    if (enable_dictionary) {
+        builder.enable_dictionary();
+    } else {
+        builder.disable_dictionary();
+    }
+    if (enable_page_index) {
+        builder.enable_write_page_index();
+        builder.write_batch_size(8);
+        builder.data_pagesize(10);
+    }
+    if (!enable_statistics) {
+        builder.disable_statistics();
+    }
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      row_group_size, builder.build()));
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, int64_t row_group_size = 2,
+                                 bool enable_statistics = true) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("score", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array({1, 2, 3, 4, 5, 6}),
+                                             build_int32_array({10, 20, 30, 40, 50, 60})});
+    write_table(file_path, table, row_group_size, false, false, enable_statistics);
+}
+
+void write_struct_parquet_file(const std::string& file_path) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false),
+                                       arrow::field("name", arrow::utf8(), false)});
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_struct_array({1, 2, 10, 11}, {"one", "two", "ten", "eleven"})});
+    write_table(file_path, table, 2);
+}
+
+void write_list_parquet_file(const std::string& file_path) {
+    auto schema = arrow::schema({
+            arrow::field("xs", arrow::list(arrow::int32()), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_list_array()});
+    write_table(file_path, table, 2);
+}
+
+void write_page_index_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids)});
+    write_table(file_path, table, ids.size(), false, true);
+}
+
+void write_page_index_pair_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("score", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(ids)});
+    write_table(file_path, table, ids.size(), false, true);
+}
+
+int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? static_cast<int64_t>(column_metadata.dictionary_page_offset())
+                   : static_cast<int64_t>(column_metadata.data_page_offset());
+}
+
+std::pair<int64_t, int64_t> row_group_mid_range(const std::string& file_path, int row_group_idx) {
+    auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false);
+    auto metadata = reader->metadata();
+    auto row_group_metadata = metadata->RowGroup(row_group_idx);
+    auto first_column = row_group_metadata->ColumnChunk(0);
+    auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1);
+    const int64_t row_group_start_offset = parquet_column_start_offset(*first_column);
+    const int64_t row_group_end_offset =
+            parquet_column_start_offset(*last_column) + last_column->total_compressed_size();
+    const int64_t row_group_mid_offset =
+            row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2;
+    return {row_group_mid_offset, 1};
+}
+
+Block build_file_block(const std::vector<format::ColumnDefinition>& schema) {
+    Block block;
+    for (const auto& field : schema) {
+        block.insert({field.type->create_column(), field.type, field.name});
+    }
+    return block;
+}
+
+GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) {
+    const auto ref = column.get_data_at(row);
+    EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2));
+    GlobalRowLoacationV2 location(0, 0, 0, 0);
+    std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2));
+    return location;
+}
+
+void use_schema_order_positions(format::FileScanRequest* request,
+                                const std::vector<format::ColumnDefinition>& schema) {
+    DORIS_CHECK(request != nullptr);
+    for (size_t idx = 0; idx < schema.size(); ++idx) {
+        request->local_positions.emplace(format::LocalColumnId(schema[idx].local_id),
+                                         format::LocalIndex(idx));
+    }
+}
+
+std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> build_file_schema(
+        const ::parquet::ParquetFileReader& reader) {
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = reader.metadata()->schema();
+    EXPECT_NE(schema_descriptor, nullptr);
+    EXPECT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    return file_schema;
+}
+
+format::FileColumnPredicateFilter int32_filter(int32_t column_id, std::string column_name,
+                                               const DataTypePtr& type,
+                                               PredicateType predicate_type, int32_t value) {
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(column_id);
+    switch (predicate_type) {
+    case PredicateType::GE:
+        column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+                column_id, column_name, type, Field::create_field<TYPE_INT>(value), false));
+        break;
+    case PredicateType::GT:
+        column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+                column_id, column_name, type, Field::create_field<TYPE_INT>(value), false));
+        break;
+    case PredicateType::LT:
+        column_filter.predicates.push_back(create_comparison_predicate<PredicateType::LT>(
+                column_id, column_name, type, Field::create_field<TYPE_INT>(value), false));
+        break;
+    default:
+        DORIS_CHECK(false);
+    }
+    return column_filter;
+}
+
+int64_t count_range_rows(const std::vector<format::parquet::RowRange>& ranges) {
+    int64_t rows = 0;
+    for (const auto& range : ranges) {
+        rows += range.length;
+    }
+    return rows;
+}
+
+class ParquetScanTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_parquet_scan_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "scan.parquet").string();
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+    std::unique_ptr<format::parquet::ParquetReader> create_reader(
+            int64_t range_start_offset = 0, int64_t range_size = -1,
+            RuntimeProfile* profile = nullptr,
+            std::optional<format::GlobalRowIdContext> global_rowid_context = std::nullopt) const {
+        auto system_properties = std::make_shared<io::FileSystemProperties>();
+        system_properties->system_type = TFileType::FILE_LOCAL;
+        auto file_description = std::make_unique<io::FileDescription>();
+        file_description->path = _file_path;
+        file_description->file_size = static_cast<int64_t>(std::filesystem::file_size(_file_path));
+        file_description->range_start_offset = range_start_offset;
+        file_description->range_size = range_size;
+        return std::make_unique<format::parquet::ParquetReader>(
+                system_properties, file_description, nullptr, profile, global_rowid_context);
+    }
+
+    std::shared_ptr<format::FileScanRequest> open_all_row_groups(
+            format::parquet::ParquetReader* reader) {
+        auto request = std::make_shared<format::FileScanRequest>();
+        EXPECT_TRUE(reader->open(request).ok());
+        return request;
+    }
+
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+};
+
+TEST_F(ParquetScanTest, PlanRowGroupsAppliesScanRangeBeforeStatistics) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 5));
+
+    const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 1);
+    format::parquet::ParquetScanRange scan_range;
+    scan_range.start_offset = range_start_offset;
+    scan_range.size = range_size;
+    scan_range.file_size = static_cast<int64_t>(std::filesystem::file_size(_file_path));
+
+    format::parquet::RowGroupScanPlan plan;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    EXPECT_TRUE(plan.row_groups.empty());
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 3);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 0);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 2);
+}
+
+TEST_F(ParquetScanTest, PlanRowGroupsPreservesFirstFileRowAcrossPrunedRowGroups) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 5));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].row_group_id, 2);
+    EXPECT_EQ(plan.row_groups[0].first_file_row, 4);
+    EXPECT_EQ(plan.row_groups[0].row_group_rows, 2);
+    ASSERT_EQ(plan.row_groups[0].selected_ranges.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].selected_ranges[0].start, 0);
+    EXPECT_EQ(plan.row_groups[0].selected_ranges[0].length, 2);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 2);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 4);
+}
+
+TEST_F(ParquetScanTest, PlanRowGroupsSelectsAllRowGroupsWithoutFilters) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+
+    ASSERT_EQ(plan.row_groups.size(), 3);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 3);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 3);
+    for (size_t row_group_idx = 0; row_group_idx < plan.row_groups.size(); ++row_group_idx) {
+        EXPECT_EQ(plan.row_groups[row_group_idx].row_group_id, row_group_idx);
+        EXPECT_EQ(plan.row_groups[row_group_idx].first_file_row,
+                  static_cast<int64_t>(row_group_idx * 2));
+        ASSERT_EQ(plan.row_groups[row_group_idx].selected_ranges.size(), 1);
+        EXPECT_EQ(plan.row_groups[row_group_idx].selected_ranges[0].start, 0);
+        EXPECT_EQ(plan.row_groups[row_group_idx].selected_ranges[0].length, 2);
+        EXPECT_TRUE(plan.row_groups[row_group_idx].page_skip_plans.empty());
+    }
+}
+
+TEST_F(ParquetScanTest, PageIndexIntersectsMultipleFiltersAndBuildsSkipPlan) {
+    write_page_index_pair_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest single_filter_request;
+    single_filter_request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32));
+    format::parquet::RowGroupScanPlan single_filter_plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(
+                        *parquet_file_reader->metadata(), parquet_file_reader.get(), file_schema,
+                        single_filter_request, scan_range, false, &single_filter_plan)
+                        .ok());
+    ASSERT_EQ(single_filter_plan.row_groups.size(), 1);
+    const int64_t single_filter_rows =
+            count_range_rows(single_filter_plan.row_groups[0].selected_ranges);
+
+    format::FileScanRequest intersect_request;
+    intersect_request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32));
+    intersect_request.column_predicate_filters.push_back(
+            int32_filter(1, "score", file_schema[1]->type, PredicateType::LT, 96));
+    format::parquet::RowGroupScanPlan intersect_plan;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(
+                        *parquet_file_reader->metadata(), parquet_file_reader.get(), file_schema,
+                        intersect_request, scan_range, false, &intersect_plan)
+                        .ok());
+    ASSERT_EQ(intersect_plan.row_groups.size(), 1);
+    ASSERT_FALSE(intersect_plan.row_groups[0].selected_ranges.empty());
+    const int64_t intersect_rows = count_range_rows(intersect_plan.row_groups[0].selected_ranges);
+    EXPECT_GT(single_filter_rows, intersect_rows);
+    EXPECT_GT(intersect_plan.row_groups[0].selected_ranges.front().start, 0);
+    const auto& last_range = intersect_plan.row_groups[0].selected_ranges.back();
+    EXPECT_LT(last_range.start + last_range.length, 128);
+    EXPECT_GT(intersect_plan.pruning_stats.filtered_page_rows, 0);
+    EXPECT_EQ(intersect_plan.pruning_stats.selected_row_ranges,
+              intersect_plan.row_groups[0].selected_ranges.size());
+
+    auto id_skip_plan = intersect_plan.row_groups[0].page_skip_plans.find(0);
+    ASSERT_NE(id_skip_plan, intersect_plan.row_groups[0].page_skip_plans.end());
+    EXPECT_EQ(id_skip_plan->second.leaf_column_id, 0);
+    EXPECT_FALSE(id_skip_plan->second.empty());
+    auto score_skip_plan = intersect_plan.row_groups[0].page_skip_plans.find(1);
+    ASSERT_NE(score_skip_plan, intersect_plan.row_groups[0].page_skip_plans.end());
+    EXPECT_EQ(score_skip_plan->second.leaf_column_id, 1);
+    EXPECT_FALSE(score_skip_plan->second.empty());
+}
+
+TEST_F(ParquetScanTest, PageIndexCanFullyFilterRowGroupAfterRangeIntersection) {
+    write_page_index_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32));
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::LT, 32));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    EXPECT_TRUE(plan.row_groups.empty());
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 0);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 0);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_page_rows, 128);
+}
+
+TEST_F(ParquetScanTest, PageIndexFullRangeWhenDisabledOrUnavailable) {
+    write_page_index_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GT, 63));
+
+    const bool old_enable_page_index = config::enable_parquet_page_index;
+    config::enable_parquet_page_index = false;
+    std::vector<format::parquet::RowRange> selected_ranges;
+    std::map<int, format::parquet::ParquetPageSkipPlan> page_skip_plans;
+    format::parquet::ParquetPruningStats pruning_stats;
+    ASSERT_TRUE(format::parquet::select_row_group_ranges_by_page_index(
+                        parquet_file_reader.get(), file_schema, request, 0, 128, &selected_ranges,
+                        &page_skip_plans, &pruning_stats)
+                        .ok());
+    config::enable_parquet_page_index = old_enable_page_index;
+    ASSERT_EQ(selected_ranges.size(), 1);
+    EXPECT_EQ(selected_ranges[0].start, 0);
+    EXPECT_EQ(selected_ranges[0].length, 128);
+    EXPECT_TRUE(page_skip_plans.empty());
+    EXPECT_EQ(pruning_stats.page_index_read_calls, 0);
+
+    write_int_pair_parquet_file(_file_path, 6);
+    auto no_index_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    auto no_index_schema = build_file_schema(*no_index_reader);
+    format::FileScanRequest no_index_request;
+    no_index_request.column_predicate_filters.push_back(
+            int32_filter(0, "id", no_index_schema[0]->type, PredicateType::GT, 3));
+    selected_ranges.clear();
+    page_skip_plans.clear();
+    pruning_stats = {};
+    ASSERT_TRUE(format::parquet::select_row_group_ranges_by_page_index(
+                        no_index_reader.get(), no_index_schema, no_index_request, 0, 6,
+                        &selected_ranges, &page_skip_plans, &pruning_stats)
+                        .ok());
+    ASSERT_EQ(selected_ranges.size(), 1);
+    EXPECT_EQ(selected_ranges[0].start, 0);
+    EXPECT_EQ(selected_ranges[0].length, 6);
+    EXPECT_TRUE(page_skip_plans.empty());
+}
+
+TEST_F(ParquetScanTest, AggregateCountAndMinMaxUseAllSelectedRowGroups) {
+    write_int_pair_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+    open_all_row_groups(reader.get());
+
+    format::FileAggregateResult count_result;
+    format::FileAggregateRequest count_request;
+    count_request.agg_type = TPushAggOp::COUNT;
+    ASSERT_TRUE(reader->get_aggregate_result(count_request, &count_result).ok());
+    EXPECT_EQ(count_result.count, 6);
+    EXPECT_TRUE(count_result.columns.empty());
+
+    format::FileAggregateResult minmax_result;
+    format::FileAggregateRequest minmax_request;
+    minmax_request.agg_type = TPushAggOp::MINMAX;
+    minmax_request.columns.push_back({.projection = field_projection(0)});
+    minmax_request.columns.push_back({.projection = field_projection(1)});
+    ASSERT_TRUE(reader->get_aggregate_result(minmax_request, &minmax_result).ok());
+    EXPECT_EQ(minmax_result.count, 6);
+    ASSERT_EQ(minmax_result.columns.size(), 2);
+    EXPECT_TRUE(minmax_result.columns[0].has_min);
+    EXPECT_TRUE(minmax_result.columns[0].has_max);
+    EXPECT_EQ(minmax_result.columns[0].min_value.get<TYPE_INT>(), 1);
+    EXPECT_EQ(minmax_result.columns[0].max_value.get<TYPE_INT>(), 6);
+    EXPECT_EQ(minmax_result.columns[1].min_value.get<TYPE_INT>(), 10);
+    EXPECT_EQ(minmax_result.columns[1].max_value.get<TYPE_INT>(), 60);
+}
+
+TEST_F(ParquetScanTest, AggregateRespectsStatisticsPrunedRowGroups) {
+    write_int_pair_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(5), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    format::FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::MINMAX;
+    aggregate_request.columns.push_back({.projection = field_projection(0)});
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok());
+    EXPECT_EQ(result.count, 2);
+    ASSERT_EQ(result.columns.size(), 1);
+    EXPECT_EQ(result.columns[0].min_value.get<TYPE_INT>(), 5);
+    EXPECT_EQ(result.columns[0].max_value.get<TYPE_INT>(), 6);
+}
+
+TEST_F(ParquetScanTest, AggregateCountKeepsRowGroupRowsAfterPageIndexPruning) {
+    write_page_index_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(63), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    format::FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::COUNT;
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok());
+    EXPECT_EQ(result.count, 128);
+}
+
+TEST_F(ParquetScanTest, AggregateMinMaxSupportsNestedSingleLeafProjection) {
+    write_struct_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+    open_all_row_groups(reader.get());
+
+    format::LocalColumnIndex nested_id = format::LocalColumnIndex::partial_local(0);
+    nested_id.children.push_back(field_projection(0));
+    format::FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::MINMAX;
+    aggregate_request.columns.push_back({.projection = nested_id});
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok());
+    EXPECT_EQ(result.count, 4);
+    ASSERT_EQ(result.columns.size(), 1);
+    EXPECT_EQ(result.columns[0].min_value.get<TYPE_INT>(), 1);
+    EXPECT_EQ(result.columns[0].max_value.get<TYPE_INT>(), 11);
+}
+
+TEST_F(ParquetScanTest, AggregateRejectsRepeatedMissingStatisticsAndInvalidRequests) {
+    write_list_parquet_file(_file_path);
+    auto repeated_reader = create_reader();
+    RuntimeState repeated_state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(repeated_reader->init(&repeated_state).ok());
+    open_all_row_groups(repeated_reader.get());
+
+    format::FileAggregateRequest repeated_request;
+    repeated_request.agg_type = TPushAggOp::MINMAX;
+    repeated_request.columns.push_back({.projection = field_projection(0)});
+    format::FileAggregateResult repeated_result;
+    EXPECT_FALSE(repeated_reader->get_aggregate_result(repeated_request, &repeated_result).ok());
+
+    write_int_pair_parquet_file(_file_path, 2, false);
+    auto no_stats_reader = create_reader();
+    RuntimeState no_stats_state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(no_stats_reader->init(&no_stats_state).ok());
+    open_all_row_groups(no_stats_reader.get());
+    format::FileAggregateRequest no_stats_request;
+    no_stats_request.agg_type = TPushAggOp::MINMAX;
+    no_stats_request.columns.push_back({.projection = field_projection(0)});
+    format::FileAggregateResult no_stats_result;
+    EXPECT_FALSE(no_stats_reader->get_aggregate_result(no_stats_request, &no_stats_result).ok());
+
+    format::FileAggregateRequest invalid_type_request;
+    invalid_type_request.agg_type = TPushAggOp::MIX;
+    format::FileAggregateResult invalid_type_result;
+    EXPECT_FALSE(
+            no_stats_reader->get_aggregate_result(invalid_type_request, &invalid_type_result).ok());
+
+    format::FileAggregateRequest invalid_column_request;
+    invalid_column_request.agg_type = TPushAggOp::MINMAX;
+    invalid_column_request.columns.push_back({.projection = field_projection(100)});
+    format::FileAggregateResult invalid_column_result;
+    EXPECT_FALSE(
+            no_stats_reader->get_aggregate_result(invalid_column_request, &invalid_column_result)
+                    .ok());
+}
+
+TEST_F(ParquetScanTest, GlobalRowIdUsesFileLocalPositionForScanRange) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+    const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 1);
+    format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42};
+    auto reader = create_reader(range_start_offset, range_size, nullptr, context);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0),
+                                      field_projection(format::GLOBAL_ROWID_COLUMN_ID)};
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<uint32_t> row_ids;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = int32_data_column(*block.get_by_position(0).column);
+        const auto& rowid_column = string_data_column(*block.get_by_position(2).column);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            const auto location = decode_rowid(rowid_column, row);
+            EXPECT_EQ(location.version, context.version);
+            EXPECT_EQ(location.backend_id, context.backend_id);
+            EXPECT_EQ(location.file_id, context.file_id);
+            row_ids.push_back(location.row_id);
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({3, 4}));
+    EXPECT_EQ(row_ids, std::vector<uint32_t>({2, 3}));
+}
+
+TEST_F(ParquetScanTest, EmptyScanPlanReturnsEofWithoutReadingColumns) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(100), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    Block block = build_file_block(schema);
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_EQ(rows, 0);
+    EXPECT_TRUE(eof);
+}
+
+TEST_F(ParquetScanTest, NoRequestedColumnsReturnsRowsOnlyAcrossRowGroups) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t total_rows = 0;
+    bool eof = false;
+    while (!eof) {
+        Block block;
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        EXPECT_EQ(block.columns(), 0);
+        total_rows += rows;
+    }
+    EXPECT_EQ(total_rows, 6);
+}
+
+TEST_F(ParquetScanTest, ProfileCountersReflectPageIndexAndRangeGapPruning) {
+    write_page_index_parquet_file(_file_path);
+    RuntimeProfile profile("profile");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0)};
+    use_schema_order_positions(request.get(), schema);
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(63), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t total_rows = 0;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        total_rows += rows;
+    }
+
+    EXPECT_EQ(total_rows, 64);
+    ASSERT_NE(profile.get_counter("RowGroupsTotalNum"), nullptr);
+    ASSERT_NE(profile.get_counter("RowGroupsReadNum"), nullptr);
+    ASSERT_NE(profile.get_counter("FilteredRowsByPage"), nullptr);
+    ASSERT_NE(profile.get_counter("SelectedRowRanges"), nullptr);
+    ASSERT_NE(profile.get_counter("PageIndexReadCalls"), nullptr);
+    ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr);
+    ASSERT_NE(profile.get_counter("RangeGapSkippedRows"), nullptr);
+    EXPECT_EQ(profile.get_counter("RowGroupsTotalNum")->value(), 1);
+    EXPECT_EQ(profile.get_counter("RowGroupsReadNum")->value(), 1);
+    EXPECT_GT(profile.get_counter("FilteredRowsByPage")->value(), 0);
+    EXPECT_GT(profile.get_counter("SelectedRowRanges")->value(), 0);
+    EXPECT_GT(profile.get_counter("PageIndexReadCalls")->value(), 0);
+    EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), 64);
+    EXPECT_GT(profile.get_counter("RangeGapSkippedRows")->value(), 0);
+}
+
+} // namespace
+} // namespace doris
diff --git a/be/test/format_v2/parquet/parquet_schema_test.cpp b/be/test/format_v2/parquet/parquet_schema_test.cpp
new file mode 100644
index 00000000000000..e620ed718efbf2
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_schema_test.cpp
@@ -0,0 +1,527 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <parquet/api/schema.h>
+
+#include <string>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/primitive_type.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+
+namespace doris::format::parquet {
+namespace {
+
+std::vector<std::unique_ptr<ParquetColumnSchema>> build_fields(
+        const std::vector<::parquet::schema::NodePtr>& nodes) {
+    auto schema =
+            ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, nodes);
+    ::parquet::SchemaDescriptor descriptor;
+    descriptor.Init(schema);
+    std::vector<std::unique_ptr<ParquetColumnSchema>> fields;
+    EXPECT_TRUE(build_parquet_column_schema(descriptor, &fields).ok());
+    return fields;
+}
+
+Status build_status(const std::vector<::parquet::schema::NodePtr>& nodes) {
+    auto schema =
+            ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, nodes);
+    ::parquet::SchemaDescriptor descriptor;
+    descriptor.Init(schema);
+    std::vector<std::unique_ptr<ParquetColumnSchema>> fields;
+    return build_parquet_column_schema(descriptor, &fields);
+}
+
+} // namespace
+
+TEST(ParquetSchemaTest, PrimitiveStateAndFieldIdArePreserved) {
+    const auto fields = build_fields({
+            ::parquet::schema::PrimitiveNode::Make("required_i32", ::parquet::Repetition::REQUIRED,
+                                                   ::parquet::Type::INT32),
+            ::parquet::schema::PrimitiveNode::Make("optional_i64", ::parquet::Repetition::OPTIONAL,
+                                                   ::parquet::Type::INT64,
+                                                   ::parquet::ConvertedType::NONE, -1, -1, -1, 42),
+    });
+
+    ASSERT_EQ(fields.size(), 2);
+    EXPECT_EQ(fields[0]->local_id, 0);
+    EXPECT_EQ(fields[0]->name, "required_i32");
+    EXPECT_EQ(fields[0]->kind, ParquetColumnSchemaKind::PRIMITIVE);
+    EXPECT_EQ(fields[0]->leaf_column_id, 0);
+    EXPECT_EQ(fields[0]->nullable_definition_level, 0);
+    EXPECT_FALSE(fields[0]->type->is_nullable());
+
+    EXPECT_EQ(fields[1]->local_id, 1);
+    EXPECT_EQ(fields[1]->parquet_field_id, 42);
+    EXPECT_EQ(fields[1]->leaf_column_id, 1);
+    EXPECT_EQ(fields[1]->nullable_definition_level, 1);
+    EXPECT_TRUE(fields[1]->type->is_nullable());
+}
+
+TEST(ParquetSchemaTest, PrimitiveTypeDescriptorCoversLogicalConvertedAndPhysicalFallback) {
+    const auto fields = build_fields({
+            ::parquet::schema::PrimitiveNode::Make(
+                    "ts", ::parquet::Repetition::OPTIONAL,
+                    ::parquet::LogicalType::Timestamp(false,
+                                                      ::parquet::LogicalType::TimeUnit::MICROS),
+                    ::parquet::Type::INT64),
+            ::parquet::schema::PrimitiveNode::Make("i8", ::parquet::Repetition::REQUIRED,
+                                                   ::parquet::Type::INT32,
+                                                   ::parquet::ConvertedType::INT_8),
+            ::parquet::schema::PrimitiveNode::Make("plain", ::parquet::Repetition::REQUIRED,
+                                                   ::parquet::Type::DOUBLE),
+    });
+
+    ASSERT_EQ(fields.size(), 3);
+    EXPECT_EQ(remove_nullable(fields[0]->type)->get_primitive_type(), TYPE_DATETIMEV2);
+    EXPECT_EQ(fields[0]->type_descriptor.time_unit, ParquetTimeUnit::MICROS);
+    EXPECT_EQ(fields[0]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS);
+    EXPECT_TRUE(fields[0]->type_descriptor.is_timestamp);
+    EXPECT_FALSE(fields[0]->type_descriptor.timestamp_is_adjusted_to_utc);
+
+    EXPECT_EQ(remove_nullable(fields[1]->type)->get_primitive_type(), TYPE_TINYINT);
+    EXPECT_EQ(fields[1]->type_descriptor.integer_bit_width, 8);
+    EXPECT_FALSE(fields[1]->type_descriptor.is_unsigned_integer);
+
+    EXPECT_EQ(remove_nullable(fields[2]->type)->get_primitive_type(), TYPE_DOUBLE);
+    EXPECT_EQ(fields[2]->type_descriptor.physical_type, ::parquet::Type::DOUBLE);
+    EXPECT_EQ(fields[2]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::NONE);
+}
+
+TEST(ParquetSchemaTest, StructMakesDataTypeChildrenNullableAndPropagatesLevels) {
+    const auto fields = build_fields({::parquet::schema::GroupNode::Make(
+            "s", ::parquet::Repetition::OPTIONAL,
+            {
+                    ::parquet::schema::PrimitiveNode::Make("a", ::parquet::Repetition::REQUIRED,
+                                                           ::parquet::Type::INT32),
+                    ::parquet::schema::PrimitiveNode::Make("b", ::parquet::Repetition::OPTIONAL,
+                                                           ::parquet::Type::BYTE_ARRAY,
+                                                           ::parquet::ConvertedType::UTF8),
+            })});
+
+    ASSERT_EQ(fields.size(), 1);
+    const auto& struct_schema = *fields[0];
+    EXPECT_EQ(struct_schema.kind, ParquetColumnSchemaKind::STRUCT);
+    EXPECT_EQ(struct_schema.nullable_definition_level, 1);
+    ASSERT_EQ(struct_schema.children.size(), 2);
+    EXPECT_EQ(struct_schema.children[0]->definition_level, 1);
+    EXPECT_EQ(struct_schema.children[1]->definition_level, 2);
+    EXPECT_EQ(struct_schema.max_definition_level, 2);
+
+    const auto& struct_type =
+            assert_cast<const DataTypeStruct&>(*remove_nullable(struct_schema.type));
+    ASSERT_EQ(struct_type.get_elements().size(), 2);
+    EXPECT_TRUE(struct_type.get_elements()[0]->is_nullable());
+    EXPECT_TRUE(struct_type.get_elements()[1]->is_nullable());
+}
+
+TEST(ParquetSchemaTest, ListCompatibilityRulesAndLevels) {
+    const auto standard_list = ::parquet::schema::GroupNode::Make(
+            "xs", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::LIST);
+    const auto structural_array = ::parquet::schema::GroupNode::Make(
+            "ys", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "array", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "value", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64)})},
+            ::parquet::ConvertedType::LIST);
+
+    const auto fields = build_fields({standard_list, structural_array});
+    ASSERT_EQ(fields.size(), 2);
+
+    const auto& xs = *fields[0];
+    EXPECT_EQ(xs.kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(xs.definition_level, 2);
+    EXPECT_EQ(xs.repetition_level, 1);
+    ASSERT_EQ(xs.children.size(), 1);
+    EXPECT_EQ(xs.children[0]->name, "element");
+    EXPECT_EQ(xs.children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE);
+    EXPECT_TRUE(xs.children[0]->type->is_nullable());
+    const auto& xs_type = assert_cast<const DataTypeArray&>(*remove_nullable(xs.type));
+    EXPECT_TRUE(xs_type.get_nested_type()->is_nullable());
+
+    const auto& ys = *fields[1];
+    EXPECT_EQ(ys.kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(ys.children.size(), 1);
+    EXPECT_EQ(ys.children[0]->kind, ParquetColumnSchemaKind::STRUCT);
+    EXPECT_EQ(remove_nullable(ys.children[0]->type)->get_primitive_type(), TYPE_STRUCT);
+}
+
+TEST(ParquetSchemaTest, LegacyListElementResolutionRulesArePreserved) {
+    const auto two_level_list = ::parquet::schema::GroupNode::Make(
+            "two_level", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::REPEATED,
+                                                    ::parquet::Type::INT32)},
+            ::parquet::ConvertedType::LIST);
+    const auto tuple_list = ::parquet::schema::GroupNode::Make(
+            "tuple_list", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "tuple_list_tuple", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "value", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64)})},
+            ::parquet::ConvertedType::LIST);
+    const auto multi_field_list = ::parquet::schema::GroupNode::Make(
+            "records", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("id", ::parquet::Repetition::REQUIRED,
+                                                            ::parquet::Type::INT32),
+                     ::parquet::schema::PrimitiveNode::Make("name", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8)})},
+            ::parquet::ConvertedType::LIST);
+    const auto fields = build_fields({two_level_list, tuple_list, multi_field_list});
+    ASSERT_EQ(fields.size(), 3);
+
+    const auto& two_level = *fields[0];
+    EXPECT_EQ(two_level.kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(two_level.definition_level, 2);
+    EXPECT_EQ(two_level.repetition_level, 1);
+    ASSERT_EQ(two_level.children.size(), 1);
+    EXPECT_EQ(two_level.children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE);
+    EXPECT_EQ(two_level.children[0]->name, "element");
+    EXPECT_EQ(remove_nullable(two_level.children[0]->type)->get_primitive_type(), TYPE_INT);
+
+    const auto& tuple = *fields[1];
+    ASSERT_EQ(tuple.children.size(), 1);
+    EXPECT_EQ(tuple.children[0]->kind, ParquetColumnSchemaKind::STRUCT);
+    EXPECT_EQ(tuple.children[0]->name, "element");
+    ASSERT_EQ(tuple.children[0]->children.size(), 1);
+    EXPECT_EQ(tuple.children[0]->children[0]->name, "value");
+
+    const auto& multi_field = *fields[2];
+    ASSERT_EQ(multi_field.children.size(), 1);
+    EXPECT_EQ(multi_field.children[0]->kind, ParquetColumnSchemaKind::STRUCT);
+    ASSERT_EQ(multi_field.children[0]->children.size(), 2);
+    EXPECT_EQ(multi_field.children[0]->children[0]->name, "id");
+    EXPECT_EQ(multi_field.children[0]->children[1]->name, "name");
+}
+
+TEST(ParquetSchemaTest, NestedRepeatedInsideListElementIsWrappedOnce) {
+    const auto list_with_repeated_child = ::parquet::schema::GroupNode::Make(
+            "outer", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "items", ::parquet::Repetition::REPEATED, ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::LIST);
+
+    const auto fields = build_fields({list_with_repeated_child});
+    ASSERT_EQ(fields.size(), 1);
+    const auto& outer = *fields[0];
+    EXPECT_EQ(outer.kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(outer.children.size(), 1);
+    const auto& element = *outer.children[0];
+    EXPECT_EQ(element.kind, ParquetColumnSchemaKind::STRUCT);
+    ASSERT_EQ(element.children.size(), 1);
+    EXPECT_EQ(element.children[0]->kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(element.children[0]->name, "items");
+    ASSERT_EQ(element.children[0]->children.size(), 1);
+    EXPECT_EQ(element.children[0]->children[0]->name, "element");
+}
+
+TEST(ParquetSchemaTest, ListWrapperWithLogicalAnnotationIsPreservedAsElement) {
+    const auto annotated_repeated_group = ::parquet::schema::GroupNode::Make(
+            "xs", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)},
+                    ::parquet::ConvertedType::LIST)},
+            ::parquet::ConvertedType::LIST);
+
+    EXPECT_FALSE(build_status({annotated_repeated_group}).ok());
+
+    const auto nested_list_wrapper = ::parquet::schema::GroupNode::Make(
+            "xs", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::GroupNode::Make(
+                            "list", ::parquet::Repetition::REPEATED,
+                            {::parquet::schema::PrimitiveNode::Make("value",
+                                                                    ::parquet::Repetition::OPTIONAL,
+                                                                    ::parquet::Type::INT32)})},
+                    ::parquet::ConvertedType::LIST)},
+            ::parquet::ConvertedType::LIST);
+
+    const auto fields = build_fields({nested_list_wrapper});
+    ASSERT_EQ(fields.size(), 1);
+    const auto& xs = *fields[0];
+    EXPECT_EQ(xs.kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(xs.children.size(), 1);
+    const auto& element = *xs.children[0];
+    EXPECT_EQ(element.kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(element.name, "element");
+    ASSERT_EQ(element.children.size(), 1);
+    EXPECT_EQ(element.children[0]->name, "element");
+    EXPECT_EQ(remove_nullable(element.children[0]->type)->get_primitive_type(), TYPE_INT);
+}
+
+TEST(ParquetSchemaTest, MapWrapperIsFoldedAndOptionalKeyIsAllowed) {
+    const auto fields = build_fields({::parquet::schema::GroupNode::Make(
+            "m", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::REPEATED,
+                    {
+                            ::parquet::schema::PrimitiveNode::Make(
+                                    "key", ::parquet::Repetition::OPTIONAL,
+                                    ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8),
+                            ::parquet::schema::PrimitiveNode::Make("value",
+                                                                   ::parquet::Repetition::OPTIONAL,
+                                                                   ::parquet::Type::INT32),
+                    })},
+            ::parquet::ConvertedType::MAP)});
+
+    ASSERT_EQ(fields.size(), 1);
+    const auto& map_schema = *fields[0];
+    EXPECT_EQ(map_schema.kind, ParquetColumnSchemaKind::MAP);
+    EXPECT_EQ(map_schema.definition_level, 2);
+    EXPECT_EQ(map_schema.repetition_level, 1);
+    ASSERT_EQ(map_schema.children.size(), 2);
+    EXPECT_EQ(map_schema.children[0]->name, "key");
+    EXPECT_EQ(map_schema.children[1]->name, "value");
+    EXPECT_TRUE(map_schema.children[0]->type->is_nullable());
+
+    const auto& map_type = assert_cast<const DataTypeMap&>(*remove_nullable(map_schema.type));
+    EXPECT_TRUE(map_type.get_key_type()->is_nullable());
+    EXPECT_TRUE(map_type.get_value_type()->is_nullable());
+}
+
+TEST(ParquetSchemaTest, StandardMapLevelsAndDataTypesAreBuiltFromEntryContext) {
+    const auto fields = build_fields({::parquet::schema::GroupNode::Make(
+            "m", ::parquet::Repetition::REQUIRED,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::REPEATED,
+                    {
+                            ::parquet::schema::PrimitiveNode::Make(
+                                    "key", ::parquet::Repetition::REQUIRED,
+                                    ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8),
+                            ::parquet::schema::PrimitiveNode::Make("value",
+                                                                   ::parquet::Repetition::OPTIONAL,
+                                                                   ::parquet::Type::INT32),
+                    })},
+            ::parquet::ConvertedType::MAP)});
+
+    ASSERT_EQ(fields.size(), 1);
+    const auto& map_schema = *fields[0];
+    EXPECT_FALSE(map_schema.type->is_nullable());
+    EXPECT_EQ(map_schema.definition_level, 1);
+    EXPECT_EQ(map_schema.repetition_level, 1);
+    EXPECT_EQ(map_schema.repeated_repetition_level, 1);
+    EXPECT_EQ(map_schema.max_definition_level, 2);
+    EXPECT_EQ(map_schema.max_repetition_level, 1);
+    ASSERT_EQ(map_schema.children.size(), 2);
+    EXPECT_EQ(map_schema.children[0]->definition_level, 1);
+    EXPECT_EQ(map_schema.children[0]->repetition_level, 1);
+    EXPECT_EQ(map_schema.children[1]->definition_level, 2);
+    EXPECT_EQ(map_schema.children[1]->nullable_definition_level, 2);
+
+    const auto& map_type = assert_cast<const DataTypeMap&>(*remove_nullable(map_schema.type));
+    EXPECT_TRUE(map_type.get_key_type()->is_nullable());
+    EXPECT_TRUE(map_type.get_value_type()->is_nullable());
+}
+
+TEST(ParquetSchemaTest, BareRepeatedFieldsAreWrappedAsLists) {
+    const auto fields = build_fields({
+            ::parquet::schema::PrimitiveNode::Make("items", ::parquet::Repetition::REPEATED,
+                                                   ::parquet::Type::INT32),
+            ::parquet::schema::GroupNode::Make(
+                    "links", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("url", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8),
+                     ::parquet::schema::PrimitiveNode::Make("rank", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::INT32)}),
+    });
+
+    ASSERT_EQ(fields.size(), 2);
+    EXPECT_EQ(fields[0]->kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(fields[0]->children.size(), 1);
+    EXPECT_EQ(fields[0]->children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE);
+    EXPECT_EQ(fields[0]->children[0]->name, "element");
+
+    EXPECT_EQ(fields[1]->kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(fields[1]->children.size(), 1);
+    EXPECT_EQ(fields[1]->children[0]->kind, ParquetColumnSchemaKind::STRUCT);
+    EXPECT_EQ(fields[1]->children[0]->name, "element");
+}
+
+TEST(ParquetSchemaTest, DeepLevelChainPropagatesDefinitionAndRepetitionLevels) {
+    const auto fields = build_fields({::parquet::schema::GroupNode::Make(
+            "s", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "inner", ::parquet::Repetition::OPTIONAL,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "items", ::parquet::Repetition::REPEATED, ::parquet::Type::INT32)})})});
+
+    ASSERT_EQ(fields.size(), 1);
+    const auto& s = *fields[0];
+    EXPECT_EQ(s.definition_level, 1);
+    EXPECT_EQ(s.nullable_definition_level, 1);
+    ASSERT_EQ(s.children.size(), 1);
+    const auto& inner = *s.children[0];
+    EXPECT_EQ(inner.definition_level, 2);
+    EXPECT_EQ(inner.nullable_definition_level, 2);
+    ASSERT_EQ(inner.children.size(), 1);
+    const auto& items = *inner.children[0];
+    EXPECT_EQ(items.kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(items.definition_level, 3);
+    EXPECT_EQ(items.repetition_level, 1);
+    EXPECT_EQ(items.repeated_ancestor_definition_level, 3);
+    EXPECT_EQ(items.repeated_repetition_level, 1);
+    EXPECT_EQ(items.max_definition_level, 3);
+    EXPECT_EQ(items.max_repetition_level, 1);
+    ASSERT_EQ(items.children.size(), 1);
+    EXPECT_EQ(items.children[0]->definition_level, 3);
+    EXPECT_EQ(items.children[0]->repetition_level, 1);
+}
+
+TEST(ParquetSchemaTest, BuildEntryValidatesNullPointerAndEmptyRoot) {
+    auto empty_root = ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED,
+                                                         ::parquet::schema::NodeVector {});
+    ::parquet::SchemaDescriptor descriptor;
+    descriptor.Init(empty_root);
+
+    EXPECT_FALSE(build_parquet_column_schema(descriptor, nullptr).ok());
+
+    std::vector<std::unique_ptr<ParquetColumnSchema>> fields;
+    ASSERT_TRUE(build_parquet_column_schema(descriptor, &fields).ok());
+    EXPECT_TRUE(fields.empty());
+}
+
+TEST(ParquetSchemaTest, RejectInvalidListMapAndUnsupportedTime) {
+    const auto bad_list = ::parquet::schema::GroupNode::Make(
+            "bad_list", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL,
+                                                    ::parquet::Type::INT32)},
+            ::parquet::ConvertedType::LIST);
+    EXPECT_FALSE(build_status({bad_list}).ok());
+
+    const auto bad_map = ::parquet::schema::GroupNode::Make(
+            "bad_map", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::PrimitiveNode::Make("entry", ::parquet::Repetition::REPEATED,
+                                                    ::parquet::Type::INT32)},
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({bad_map}).ok());
+
+    const auto converted_time = ::parquet::schema::PrimitiveNode::Make(
+            "time_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32,
+            ::parquet::ConvertedType::TIME_MILLIS);
+    const auto status = build_status({converted_time});
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Parquet TIME with isAdjustedToUTC=true is not supported"),
+              std::string::npos);
+}
+
+TEST(ParquetSchemaTest, RejectAdditionalInvalidListAndMapLayouts) {
+    const auto zero_child_list = ::parquet::schema::GroupNode::Make(
+            "zero_child_list", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make("list", ::parquet::Repetition::REPEATED,
+                                                ::parquet::schema::NodeVector {})},
+            ::parquet::ConvertedType::LIST);
+    EXPECT_FALSE(build_status({zero_child_list}).ok());
+
+    const auto repeated_list = ::parquet::schema::GroupNode::Make(
+            "repeated_list", ::parquet::Repetition::REPEATED,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::LIST);
+    EXPECT_FALSE(build_status({repeated_list}).ok());
+
+    const auto map_with_two_fields = ::parquet::schema::GroupNode::Make(
+            "bad_map", ::parquet::Repetition::OPTIONAL,
+            {
+                    ::parquet::schema::GroupNode::Make(
+                            "entry1", ::parquet::Repetition::REPEATED,
+                            {::parquet::schema::PrimitiveNode::Make(
+                                     "key", ::parquet::Repetition::REQUIRED,
+                                     ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8),
+                             ::parquet::schema::PrimitiveNode::Make("value",
+                                                                    ::parquet::Repetition::OPTIONAL,
+                                                                    ::parquet::Type::INT32)}),
+                    ::parquet::schema::GroupNode::Make(
+                            "entry2", ::parquet::Repetition::REPEATED,
+                            {::parquet::schema::PrimitiveNode::Make(
+                                     "key", ::parquet::Repetition::REQUIRED,
+                                     ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8),
+                             ::parquet::schema::PrimitiveNode::Make("value",
+                                                                    ::parquet::Repetition::OPTIONAL,
+                                                                    ::parquet::Type::INT32)}),
+            },
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({map_with_two_fields}).ok());
+
+    const auto non_repeated_map_entry = ::parquet::schema::GroupNode::Make(
+            "bad_map", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::OPTIONAL,
+                    {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8),
+                     ::parquet::schema::PrimitiveNode::Make(
+                             "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({non_repeated_map_entry}).ok());
+
+    const auto map_entry_with_one_child = ::parquet::schema::GroupNode::Make(
+            "bad_map", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8)})},
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({map_entry_with_one_child}).ok());
+
+    const auto repeated_map = ::parquet::schema::GroupNode::Make(
+            "repeated_map", ::parquet::Repetition::REPEATED,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8),
+                     ::parquet::schema::PrimitiveNode::Make(
+                             "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({repeated_map}).ok());
+}
+
+TEST(ParquetSchemaTest, LogicalUtcTimeIsRejected) {
+    const auto adjusted_time = ::parquet::schema::PrimitiveNode::Make(
+            "time_ms", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::MILLIS),
+            ::parquet::Type::INT32);
+    const auto status = build_status({adjusted_time});
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Parquet TIME with isAdjustedToUTC=true is not supported"),
+              std::string::npos);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_serde_reader_test.cpp b/be/test/format_v2/parquet/parquet_serde_reader_test.cpp
new file mode 100644
index 00000000000000..c35138e3263723
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_serde_reader_test.cpp
@@ -0,0 +1,459 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <cmath>
+#include <filesystem>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_decimal.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/types.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+namespace {
+
+constexpr int64_t ROW_COUNT = 5;
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+class ParquetSerdeReaderTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_parquet_serde_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "serde.parquet").string();
+        write_parquet_file();
+        open_file(_file_path);
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+    template <typename Builder, typename Value>
+    std::shared_ptr<arrow::Array> build_required_array(const std::vector<Value>& values) {
+        Builder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int32_array() {
+        arrow::Int32Builder builder;
+        EXPECT_TRUE(builder.Append(1).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(3).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(5).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_float16_array() {
+        arrow::HalfFloatBuilder builder;
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(0x0000).ok());
+        EXPECT_TRUE(builder.Append(0x8000).ok());
+        EXPECT_TRUE(builder.Append(0x3E00).ok());
+        EXPECT_TRUE(builder.Append(0x7E00).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_binary_array(const std::vector<std::string>& values) {
+        arrow::BinaryBuilder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(reinterpret_cast<const uint8_t*>(value.data()),
+                                       static_cast<int32_t>(value.size()))
+                                .ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+        arrow::StringBuilder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_fixed_binary_array(
+            const std::shared_ptr<arrow::DataType>& type, const std::vector<std::string>& values) {
+        arrow::FixedSizeBinaryBuilder builder(type, arrow::default_memory_pool());
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(reinterpret_cast<const uint8_t*>(value.data())).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_timestamp_array(
+            const std::shared_ptr<arrow::DataType>& type, const std::vector<int64_t>& values) {
+        arrow::TimestampBuilder builder(type, arrow::default_memory_pool());
+        for (const auto value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_decimal_array(const std::shared_ptr<arrow::DataType>& type,
+                                                      const std::vector<int64_t>& values) {
+        arrow::Decimal128Builder builder(type, arrow::default_memory_pool());
+        for (const auto value : values) {
+            EXPECT_TRUE(builder.Append(arrow::Decimal128(value)).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    void add_field(const std::shared_ptr<arrow::Field>& field,
+                   std::shared_ptr<arrow::Array> array) {
+        _arrow_fields.push_back(field);
+        _arrays.push_back(std::move(array));
+    }
+
+    void write_table(const std::string& file_path, const std::shared_ptr<arrow::Table>& table,
+                     std::shared_ptr<::parquet::ArrowWriterProperties> arrow_properties = nullptr) {
+        auto file_result = arrow::io::FileOutputStream::Open(file_path);
+        ASSERT_TRUE(file_result.ok()) << file_result.status();
+        ::parquet::WriterProperties::Builder writer_builder;
+        writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+        writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+        writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+        if (arrow_properties == nullptr) {
+            ::parquet::ArrowWriterProperties::Builder arrow_builder;
+            arrow_properties = arrow_builder.build();
+        }
+        PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(
+                *table, arrow::default_memory_pool(), *file_result, ROW_COUNT,
+                writer_builder.build(), std::move(arrow_properties)));
+    }
+
+    void write_parquet_file() {
+        add_field(arrow::field("bool_col", arrow::boolean(), false),
+                  build_required_array<arrow::BooleanBuilder, bool>(
+                          {true, false, true, false, true}));
+        add_field(arrow::field("int32_col", arrow::int32(), false),
+                  build_required_array<arrow::Int32Builder, int32_t>({10, 20, 30, 40, 50}));
+        add_field(arrow::field("int64_col", arrow::int64(), false),
+                  build_required_array<arrow::Int64Builder, int64_t>(
+                          {10000000000L, -9L, 42L, 77L, 123L}));
+        add_field(arrow::field("uint32_col", arrow::uint32(), false),
+                  build_required_array<arrow::UInt32Builder, uint32_t>(
+                          {0U, 1U, 1U << 31, std::numeric_limits<uint32_t>::max(), 42U}));
+        add_field(arrow::field("uint64_col", arrow::uint64(), false),
+                  build_required_array<arrow::UInt64Builder, uint64_t>(
+                          {0ULL, 1ULL, 1ULL << 63, std::numeric_limits<uint64_t>::max(), 42ULL}));
+        add_field(arrow::field("float_col", arrow::float32(), false),
+                  build_required_array<arrow::FloatBuilder, float>(
+                          {1.5F, -2.25F, 3.0F, 4.5F, 5.75F}));
+        add_field(arrow::field("double_col", arrow::float64(), false),
+                  build_required_array<arrow::DoubleBuilder, double>({3.5, -4.75, 6.0, 7.25, 8.5}));
+        add_field(arrow::field("nullable_float16_col", arrow::float16(), true),
+                  build_nullable_float16_array());
+        add_field(arrow::field("binary_col", arrow::binary(), false),
+                  build_binary_array({"bin_a", "bin_b", "bin_c", "bin_d", "bin_e"}));
+        add_field(arrow::field("string_col", arrow::utf8(), false),
+                  build_string_array({"alpha", "beta", "gamma", "delta", "epsilon"}));
+        add_field(arrow::field("fixed_binary_col", arrow::fixed_size_binary(4), false),
+                  build_fixed_binary_array(arrow::fixed_size_binary(4),
+                                           {"aaaa", "bbbb", "cccc", "dddd", "eeee"}));
+        add_field(arrow::field("date_col", arrow::date32(), false),
+                  build_required_array<arrow::Date32Builder, int32_t>({0, 1, 18628, 18629, 18630}));
+        add_field(arrow::field("timestamp_millis_col", arrow::timestamp(arrow::TimeUnit::MILLI),
+                               false),
+                  build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MILLI),
+                                        {0, 1234, 1609459200000, 1609459201000, -1}));
+        add_field(arrow::field("timestamp_micros_col", arrow::timestamp(arrow::TimeUnit::MICRO),
+                               false),
+                  build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                        {0, 1234567, 1609459200000000, 1609459201000000, -1}));
+        add_field(arrow::field("timestamp_micros_utc_col",
+                               arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), false),
+                  build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"),
+                                        {0, 1234567, 1609459200000000, 1609459201000000, -1}));
+        add_field(arrow::field("decimal_fixed_binary_9_2_col", arrow::decimal128(9, 2), false),
+                  build_decimal_array(arrow::decimal128(9, 2), {12345, -67, 0, 987, 1000}));
+        add_field(arrow::field("decimal_fixed_binary_18_6_col", arrow::decimal128(18, 6), false),
+                  build_decimal_array(arrow::decimal128(18, 6),
+                                      {1234567, -670000, 0, 9870000, 1000000}));
+        add_field(arrow::field("nullable_int_col", arrow::int32(), true),
+                  build_nullable_int32_array());
+
+        write_table(_file_path, arrow::Table::Make(arrow::schema(_arrow_fields), _arrays));
+    }
+
+    void open_file(const std::string& file_path) {
+        _file_reader = ::parquet::ParquetFileReader::OpenFile(file_path, false);
+        ASSERT_NE(_file_reader, nullptr);
+        ASSERT_EQ(_file_reader->metadata()->num_row_groups(), 1);
+        _row_group = _file_reader->RowGroup(0);
+        ASSERT_NE(_row_group, nullptr);
+        auto schema_descriptor = _file_reader->metadata()->schema();
+        ASSERT_NE(schema_descriptor, nullptr);
+        auto st = build_parquet_column_schema(*schema_descriptor, &_fields);
+        ASSERT_TRUE(st.ok()) << st;
+    }
+
+    size_t find_field_idx(const std::string& name) const {
+        for (size_t field_idx = 0; field_idx < _fields.size(); ++field_idx) {
+            if (_fields[field_idx]->name == name) {
+                return field_idx;
+            }
+        }
+        ADD_FAILURE() << "Cannot find parquet serde test field " << name;
+        return _fields.size();
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_reader(size_t field_idx) const {
+        ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+        std::unique_ptr<ParquetColumnReader> reader;
+        auto st = factory.create(*_fields[field_idx], &reader);
+        EXPECT_TRUE(st.ok()) << st;
+        return reader;
+    }
+
+    template <typename Validator>
+    void read_and_validate(const std::string& name, Validator validator) const {
+        const auto field_idx = find_field_idx(name);
+        ASSERT_TRUE(supports_record_reader(_fields[field_idx]->type_descriptor));
+        auto reader = create_reader(field_idx);
+        ASSERT_NE(reader, nullptr);
+        MutableColumnPtr column = reader->type()->create_column();
+        int64_t rows_read = 0;
+        auto st = reader->read(ROW_COUNT, column, &rows_read);
+        ASSERT_TRUE(st.ok()) << st;
+        ASSERT_EQ(rows_read, ROW_COUNT);
+        ASSERT_EQ(column->size(), ROW_COUNT);
+        validator(*_fields[field_idx], *column);
+    }
+
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+    std::unique_ptr<::parquet::ParquetFileReader> _file_reader;
+    std::shared_ptr<::parquet::RowGroupReader> _row_group;
+    std::vector<std::unique_ptr<ParquetColumnSchema>> _fields;
+    std::vector<std::shared_ptr<arrow::Field>> _arrow_fields;
+    std::vector<std::shared_ptr<arrow::Array>> _arrays;
+};
+
+TEST_F(ParquetSerdeReaderTest, ReadAllSupportedPhysicalAndLogicalTypes) {
+    read_and_validate("bool_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::BOOLEAN);
+        const auto& values = assert_cast<const ColumnBool&>(column);
+        EXPECT_EQ(values.get_element(0), 1);
+        EXPECT_EQ(values.get_element(1), 0);
+        EXPECT_EQ(values.get_element(4), 1);
+    });
+    read_and_validate("int32_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32);
+        const auto& values = assert_cast<const ColumnInt32&>(column);
+        EXPECT_EQ(values.get_element(0), 10);
+        EXPECT_EQ(values.get_element(4), 50);
+    });
+    read_and_validate("int64_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+        const auto& values = assert_cast<const ColumnInt64&>(column);
+        EXPECT_EQ(values.get_element(0), 10000000000L);
+        EXPECT_EQ(values.get_element(1), -9L);
+    });
+    read_and_validate("uint32_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32);
+        EXPECT_TRUE(schema.type_descriptor.is_unsigned_integer);
+        EXPECT_EQ(schema.type_descriptor.integer_bit_width, 32);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_BIGINT);
+        const auto& values = assert_cast<const ColumnInt64&>(column);
+        EXPECT_EQ(values.get_element(2), 2147483648L);
+        EXPECT_EQ(values.get_element(3),
+                  static_cast<int64_t>(std::numeric_limits<uint32_t>::max()));
+    });
+    read_and_validate("uint64_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+        EXPECT_TRUE(schema.type_descriptor.is_unsigned_integer);
+        EXPECT_EQ(schema.type_descriptor.integer_bit_width, 64);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_LARGEINT);
+        const auto& values = assert_cast<const ColumnInt128&>(column);
+        EXPECT_EQ(values.get_element(2), static_cast<int128_t>(1) << 63);
+        EXPECT_EQ(values.get_element(3),
+                  static_cast<int128_t>(std::numeric_limits<uint64_t>::max()));
+    });
+    read_and_validate("float_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FLOAT);
+        const auto& values = assert_cast<const ColumnFloat32&>(column);
+        EXPECT_FLOAT_EQ(values.get_element(0), 1.5F);
+        EXPECT_FLOAT_EQ(values.get_element(1), -2.25F);
+    });
+    read_and_validate("double_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::DOUBLE);
+        const auto& values = assert_cast<const ColumnFloat64&>(column);
+        EXPECT_DOUBLE_EQ(values.get_element(0), 3.5);
+        EXPECT_DOUBLE_EQ(values.get_element(1), -4.75);
+    });
+    read_and_validate("nullable_float16_col", [](const ParquetColumnSchema& schema,
+                                                 const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+        EXPECT_EQ(schema.type_descriptor.fixed_length, 2);
+        EXPECT_EQ(schema.type_descriptor.extra_type_info, ParquetExtraTypeInfo::FLOAT16);
+        EXPECT_FALSE(schema.type_descriptor.is_string_like);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_FLOAT);
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+        const auto& values = assert_cast<const ColumnFloat32&>(nullable_column.get_nested_column());
+        ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+        EXPECT_TRUE(nullable_column.is_null_at(0));
+        EXPECT_FLOAT_EQ(values.get_element(1), 0.0F);
+        EXPECT_FALSE(std::signbit(values.get_element(1)));
+        EXPECT_FLOAT_EQ(values.get_element(2), -0.0F);
+        EXPECT_TRUE(std::signbit(values.get_element(2)));
+        EXPECT_FLOAT_EQ(values.get_element(3), 1.5F);
+        EXPECT_TRUE(std::isnan(values.get_element(4)));
+    });
+    read_and_validate("binary_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::BYTE_ARRAY);
+        const auto& values = assert_cast<const ColumnString&>(column);
+        EXPECT_EQ(values.get_data_at(0).to_string(), "bin_a");
+        EXPECT_EQ(values.get_data_at(3).to_string(), "bin_d");
+    });
+    read_and_validate("string_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_TRUE(schema.type_descriptor.is_string_like);
+        const auto& values = assert_cast<const ColumnString&>(column);
+        EXPECT_EQ(values.get_data_at(0).to_string(), "alpha");
+        EXPECT_EQ(values.get_data_at(4).to_string(), "epsilon");
+    });
+    read_and_validate("fixed_binary_col", [](const ParquetColumnSchema& schema,
+                                             const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+        EXPECT_EQ(schema.type_descriptor.fixed_length, 4);
+        const auto& values = assert_cast<const ColumnString&>(column);
+        EXPECT_EQ(values.get_data_at(0).to_string(), "aaaa");
+        EXPECT_EQ(values.get_data_at(2).to_string(), "cccc");
+    });
+    read_and_validate("date_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATEV2);
+        EXPECT_EQ(schema.type->to_string(column, 0), "1970-01-01");
+        EXPECT_EQ(schema.type->to_string(column, 2), "2021-01-01");
+    });
+    read_and_validate(
+            "timestamp_millis_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+                EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+                EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2);
+                EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234");
+                EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999");
+            });
+    read_and_validate(
+            "timestamp_micros_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+                EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+                EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2);
+                EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234567");
+                EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999999");
+            });
+    read_and_validate("timestamp_micros_utc_col", [](const ParquetColumnSchema& schema,
+                                                     const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+        EXPECT_TRUE(schema.type_descriptor.timestamp_is_adjusted_to_utc);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2);
+        EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234567");
+        EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999999");
+    });
+    read_and_validate("decimal_fixed_binary_9_2_col", [](const ParquetColumnSchema& schema,
+                                                         const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+        EXPECT_TRUE(schema.type_descriptor.is_decimal);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DECIMAL32);
+        const auto& values = assert_cast<const ColumnDecimal32&>(column);
+        EXPECT_EQ(values.get_element(0), Decimal32(12345));
+        EXPECT_EQ(schema.type->to_string(column, 0), "123.45");
+    });
+    read_and_validate("decimal_fixed_binary_18_6_col", [](const ParquetColumnSchema& schema,
+                                                          const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+        EXPECT_TRUE(schema.type_descriptor.is_decimal);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DECIMAL64);
+        const auto& values = assert_cast<const ColumnDecimal64&>(column);
+        EXPECT_EQ(values.get_element(0), Decimal64(1234567));
+        EXPECT_EQ(schema.type->to_string(column, 0), "1.234567");
+    });
+    read_and_validate(
+            "nullable_int_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+                EXPECT_TRUE(schema.type->is_nullable());
+                const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                const auto& nested_column =
+                        assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+                ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                EXPECT_FALSE(nullable_column.is_null_at(0));
+                EXPECT_TRUE(nullable_column.is_null_at(1));
+                EXPECT_FALSE(nullable_column.is_null_at(2));
+                EXPECT_TRUE(nullable_column.is_null_at(3));
+                EXPECT_EQ(nested_column.get_element(0), 1);
+                EXPECT_EQ(nested_column.get_element(2), 3);
+            });
+}
+
+TEST_F(ParquetSerdeReaderTest, ReadInt96TimestampAsDateTimeV2) {
+    const auto file_path = (_test_dir / "int96_timestamp.parquet").string();
+    auto field = arrow::field("col_datetime", arrow::timestamp(arrow::TimeUnit::MICRO), false);
+    auto array = build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                       {0, 1234567, 1609459200000000, 1609459201000000, -1});
+    auto table = arrow::Table::Make(arrow::schema({field}), {array});
+
+    ::parquet::ArrowWriterProperties::Builder arrow_builder;
+    arrow_builder.enable_force_write_int96_timestamps();
+    _fields.clear();
+    _file_reader.reset();
+    _row_group.reset();
+    write_table(file_path, table, arrow_builder.build());
+    open_file(file_path);
+
+    ASSERT_EQ(_fields.size(), 1);
+    EXPECT_EQ(_fields[0]->type_descriptor.physical_type, ::parquet::Type::INT96);
+    EXPECT_EQ(_fields[0]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::IMPALA_TIMESTAMP);
+    ASSERT_TRUE(supports_record_reader(_fields[0]->type_descriptor));
+    ASSERT_EQ(remove_nullable(_fields[0]->type)->get_primitive_type(), TYPE_DATETIMEV2);
+
+    auto reader = create_reader(0);
+    ASSERT_NE(reader, nullptr);
+    auto column = _fields[0]->type->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader->read(ROW_COUNT, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, ROW_COUNT);
+    EXPECT_EQ(_fields[0]->type->to_string(*column, 0), "1970-01-01 00:00:00.000000");
+    EXPECT_EQ(_fields[0]->type->to_string(*column, 1), "1970-01-01 00:00:01.234567");
+    EXPECT_EQ(_fields[0]->type->to_string(*column, 2), "2021-01-01 00:00:00.000000");
+    EXPECT_EQ(_fields[0]->type->to_string(*column, 4), "1969-12-31 23:59:59.999999");
+}
+
+} // namespace
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_statistics_test.cpp b/be/test/format_v2/parquet/parquet_statistics_test.cpp
new file mode 100644
index 00000000000000..f2ae2448013d26
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_statistics_test.cpp
@@ -0,0 +1,460 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_statistics.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <cctz/time_zone.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/bloom_filter.h>
+
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "storage/predicate/accept_null_predicate.h"
+#include "storage/predicate/null_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris {
+namespace {
+
+format::parquet::ParquetColumnSchema primitive_bloom_schema(const DataTypePtr& type) {
+    format::parquet::ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "c0";
+    schema.type = type;
+    schema.leaf_column_id = 0;
+    schema.kind = format::parquet::ParquetColumnSchemaKind::PRIMITIVE;
+    return schema;
+}
+
+format::FileColumnPredicateFilter bloom_filter_with_predicate(
+        const std::shared_ptr<ColumnPredicate>& predicate) {
+    format::FileColumnPredicateFilter filter;
+    filter.file_column_id = format::LocalColumnId(0);
+    filter.target = format::FileNestedPredicateTarget(filter.file_column_id);
+    filter.predicates.push_back(predicate);
+    return filter;
+}
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> int32_array(const std::vector<std::optional<int32_t>>& values) {
+    arrow::Int32Builder builder;
+    for (const auto& value : values) {
+        if (value.has_value()) {
+            EXPECT_TRUE(builder.Append(*value).ok());
+        } else {
+            EXPECT_TRUE(builder.AppendNull().ok());
+        }
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> uint32_array(const std::vector<uint32_t>& values) {
+    arrow::UInt32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> timestamp_array(const std::vector<int64_t>& values) {
+    arrow::TimestampBuilder builder(arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"),
+                                    arrow::default_memory_pool());
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::unique_ptr<::parquet::ParquetFileReader> make_reader(
+        const std::shared_ptr<arrow::Table>& table, int64_t row_group_size, bool enable_dictionary,
+        bool enable_statistics) {
+    auto out_result = arrow::io::BufferOutputStream::Create();
+    EXPECT_TRUE(out_result.ok());
+    auto out = *out_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    if (enable_dictionary) {
+        builder.enable_dictionary();
+    } else {
+        builder.disable_dictionary();
+    }
+    if (!enable_statistics) {
+        builder.disable_statistics();
+    }
+    EXPECT_TRUE(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                             row_group_size, builder.build())
+                        .ok());
+    auto buffer_result = out->Finish();
+    EXPECT_TRUE(buffer_result.ok());
+    return ::parquet::ParquetFileReader::Open(
+            std::make_shared<arrow::io::BufferReader>(*buffer_result));
+}
+
+std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> build_file_schema(
+        const ::parquet::ParquetFileReader& reader) {
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    EXPECT_TRUE(
+            format::parquet::build_parquet_column_schema(*reader.metadata()->schema(), &file_schema)
+                    .ok());
+    return file_schema;
+}
+
+format::FileScanRequest request_with_filter(format::FileColumnPredicateFilter filter) {
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(std::move(filter));
+    return request;
+}
+
+::parquet::BlockSplitBloomFilter bloom_filter_for_int32_values(const std::vector<int32_t>& values) {
+    ::parquet::BlockSplitBloomFilter bloom_filter;
+    bloom_filter.Init(::parquet::BlockSplitBloomFilter::kMinimumBloomFilterBytes);
+    for (const auto value : values) {
+        bloom_filter.InsertHash(bloom_filter.Hash(value));
+    }
+    return bloom_filter;
+}
+
+TEST(ParquetStatisticsTransformTest, ConvertsMinMaxNullCountUnsignedStringAndTimestamp) {
+    auto table = arrow::Table::Make(
+            arrow::schema({
+                    arrow::field("i", arrow::int32(), true),
+                    arrow::field("u", arrow::uint32(), false),
+                    arrow::field("s", arrow::utf8(), false),
+                    arrow::field("ts", arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), false),
+            }),
+            {int32_array({1, std::nullopt, 5}), uint32_array({7, 9, 11}),
+             string_array({"alpha", "beta", "omega"}), timestamp_array({1000, 2000, 3000})});
+    auto reader = make_reader(table, 3, false, true);
+    auto schema = build_file_schema(*reader);
+    auto row_group = reader->metadata()->RowGroup(0);
+
+    const auto int_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *schema[0], row_group->ColumnChunk(0)->statistics());
+    EXPECT_TRUE(int_stats.has_min_max);
+    EXPECT_TRUE(int_stats.has_null_count);
+    EXPECT_TRUE(int_stats.has_null);
+    EXPECT_TRUE(int_stats.has_not_null);
+    EXPECT_EQ(int_stats.min_value.get<TYPE_INT>(), 1);
+    EXPECT_EQ(int_stats.max_value.get<TYPE_INT>(), 5);
+
+    const auto uint_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *schema[1], row_group->ColumnChunk(1)->statistics());
+    EXPECT_TRUE(uint_stats.has_min_max);
+    EXPECT_EQ(uint_stats.min_value.get<TYPE_BIGINT>(), 7);
+    EXPECT_EQ(uint_stats.max_value.get<TYPE_BIGINT>(), 11);
+
+    const auto string_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *schema[2], row_group->ColumnChunk(2)->statistics());
+    EXPECT_TRUE(string_stats.has_min_max);
+    EXPECT_EQ(string_stats.min_value.get<TYPE_STRING>(), "alpha");
+    EXPECT_EQ(string_stats.max_value.get<TYPE_STRING>(), "omega");
+
+    auto utc = cctz::utc_time_zone();
+    const auto timestamp_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *schema[3], row_group->ColumnChunk(3)->statistics(), &utc);
+    EXPECT_TRUE(timestamp_stats.has_min_max);
+    EXPECT_EQ(timestamp_stats.min_value.get_type(), TYPE_DATETIMEV2);
+    EXPECT_EQ(timestamp_stats.max_value.get_type(), TYPE_DATETIMEV2);
+    EXPECT_LT(timestamp_stats.min_value, timestamp_stats.max_value);
+}
+
+TEST(ParquetStatisticsTransformTest, HandlesMissingStatisticsAndAllNullChunks) {
+    auto no_stats_table = arrow::Table::Make(
+            arrow::schema({arrow::field("i", arrow::int32(), true)}), {int32_array({1, 2, 3})});
+    auto no_stats_reader = make_reader(no_stats_table, 3, false, false);
+    auto no_stats_schema = build_file_schema(*no_stats_reader);
+    auto no_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *no_stats_schema[0],
+            no_stats_reader->metadata()->RowGroup(0)->ColumnChunk(0)->statistics());
+    EXPECT_FALSE(no_stats.has_min_max);
+
+    auto all_null_table =
+            arrow::Table::Make(arrow::schema({arrow::field("i", arrow::int32(), true)}),
+                               {int32_array({std::nullopt, std::nullopt})});
+    auto all_null_reader = make_reader(all_null_table, 2, false, true);
+    auto all_null_schema = build_file_schema(*all_null_reader);
+    auto all_null_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *all_null_schema[0],
+            all_null_reader->metadata()->RowGroup(0)->ColumnChunk(0)->statistics());
+    EXPECT_TRUE(all_null_stats.has_null_count);
+    EXPECT_TRUE(all_null_stats.has_null);
+    EXPECT_FALSE(all_null_stats.has_not_null);
+    EXPECT_FALSE(all_null_stats.has_min_max);
+}
+
+TEST(ParquetStatisticsPruningTest, StatisticsPredicatesAndNullPredicatesPruneRowGroups) {
+    auto table = arrow::Table::Make(arrow::schema({arrow::field("i", arrow::int32(), true)}),
+                                    {int32_array({std::nullopt, std::nullopt, 3, 4, 5, 6})});
+    auto reader = make_reader(table, 2, false, true);
+    auto schema = build_file_schema(*reader);
+
+    format::FileColumnPredicateFilter ge_filter;
+    ge_filter.file_column_id = format::LocalColumnId(0);
+    ge_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+            0, "i", schema[0]->type, Field::create_field<TYPE_INT>(5), false));
+    std::vector<int> selected;
+    format::parquet::ParquetPruningStats pruning_stats;
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema, request_with_filter(ge_filter),
+                        nullptr, &selected, false, &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({2}));
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_statistics, 2);
+
+    format::FileColumnPredicateFilter is_not_null_filter;
+    is_not_null_filter.file_column_id = format::LocalColumnId(0);
+    is_not_null_filter.predicates.push_back(
+            std::make_shared<NullPredicate>(0, "i", false, TYPE_INT));
+    selected.clear();
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(is_not_null_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({1, 2}));
+
+    format::FileColumnPredicateFilter is_null_filter;
+    is_null_filter.file_column_id = format::LocalColumnId(0);
+    is_null_filter.predicates.push_back(std::make_shared<NullPredicate>(0, "i", true, TYPE_INT));
+    selected.clear();
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(is_null_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({0}));
+}
+
+TEST(ParquetStatisticsPruningTest, DictionaryPruningHandlesExcludeIncludeAndUnsupportedPaths) {
+    auto table = arrow::Table::Make(arrow::schema({arrow::field("s", arrow::utf8(), false)}),
+                                    {string_array({"alpha", "beta", "gamma", "omega"})});
+    auto reader = make_reader(table, 2, true, false);
+    auto schema = build_file_schema(*reader);
+
+    format::FileColumnPredicateFilter absent_filter;
+    absent_filter.file_column_id = format::LocalColumnId(0);
+    absent_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            0, "s", schema[0]->type, Field::create_field<TYPE_STRING>("missing"), false));
+    std::vector<int> selected;
+    format::parquet::ParquetPruningStats pruning_stats;
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(absent_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_TRUE(selected.empty());
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 2);
+
+    format::FileColumnPredicateFilter present_filter;
+    present_filter.file_column_id = format::LocalColumnId(0);
+    present_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            0, "s", schema[0]->type, Field::create_field<TYPE_STRING>("gamma"), false));
+    selected.clear();
+    pruning_stats = {};
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(present_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({1}));
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 1);
+
+    auto plain_reader = make_reader(table, 2, false, false);
+    auto plain_schema = build_file_schema(*plain_reader);
+    selected.clear();
+    pruning_stats = {};
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *plain_reader->metadata(), plain_reader.get(), plain_schema,
+                        request_with_filter(absent_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({0, 1}));
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 0);
+}
+
+TEST(ParquetStatisticsPruningTest, StatisticsRunsBeforeDictionaryAndMissingBloomKeepsRows) {
+    auto table = arrow::Table::Make(arrow::schema({arrow::field("s", arrow::utf8(), false)}),
+                                    {string_array({"alpha", "beta", "gamma", "omega"})});
+    auto reader = make_reader(table, 2, true, true);
+    auto schema = build_file_schema(*reader);
+
+    format::FileColumnPredicateFilter beyond_max_filter;
+    beyond_max_filter.file_column_id = format::LocalColumnId(0);
+    beyond_max_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "s", schema[0]->type, Field::create_field<TYPE_STRING>("zzzz"), false));
+    std::vector<int> selected;
+    format::parquet::ParquetPruningStats pruning_stats;
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(beyond_max_filter), nullptr, &selected, true,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_TRUE(selected.empty());
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_statistics, 2);
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 0);
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_bloom_filter, 0);
+
+    auto no_stats_reader = make_reader(table, 2, false, false);
+    auto no_stats_schema = build_file_schema(*no_stats_reader);
+    format::FileColumnPredicateFilter missing_bloom_filter;
+    missing_bloom_filter.file_column_id = format::LocalColumnId(0);
+    missing_bloom_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            0, "s", no_stats_schema[0]->type, Field::create_field<TYPE_STRING>("absent"), false));
+    selected.clear();
+    pruning_stats = {};
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *no_stats_reader->metadata(), no_stats_reader.get(), no_stats_schema,
+                        request_with_filter(missing_bloom_filter), nullptr, &selected, true,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({0, 1}));
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_bloom_filter, 0);
+}
+
+::parquet::BlockSplitBloomFilter bloom_filter_for_string_values(
+        const std::vector<std::string>& values) {
+    ::parquet::BlockSplitBloomFilter bloom_filter;
+    bloom_filter.Init(::parquet::BlockSplitBloomFilter::kMinimumBloomFilterBytes);
+    for (const auto& value : values) {
+        ::parquet::ByteArray byte_array(static_cast<uint32_t>(value.size()),
+                                        reinterpret_cast<const uint8_t*>(value.data()));
+        bloom_filter.InsertHash(bloom_filter.Hash(&byte_array));
+    }
+    return bloom_filter;
+}
+
+TEST(ParquetBloomFilterPruningTest, EqPredicateUsesArrowHashAndPrunesAbsentIntValue) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeInt32>());
+    auto bloom_filter = bloom_filter_for_int32_values({1, 3});
+    auto absent_filter = bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_INT>(2), false));
+    auto present_filter =
+            bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+                    0, "c0", schema.type, Field::create_field<TYPE_INT>(3), false));
+
+    EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter,
+                                                                             bloom_filter));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            schema, present_filter, bloom_filter));
+}
+
+TEST(ParquetBloomFilterPruningTest, InPredicatePrunesOnlyWhenAllValuesAreAbsent) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeInt32>());
+    auto bloom_filter = bloom_filter_for_int32_values({1, 3});
+
+    auto absent_set = build_set<TYPE_INT>();
+    int32_t absent_first = 2;
+    int32_t absent_second = 4;
+    absent_set->insert(&absent_first);
+    absent_set->insert(&absent_second);
+    auto absent_filter =
+            bloom_filter_with_predicate(create_in_list_predicate<PredicateType::IN_LIST>(
+                    0, "c0", schema.type, absent_set, false));
+
+    auto present_set = build_set<TYPE_INT>();
+    int32_t present_first = 2;
+    int32_t present_second = 3;
+    present_set->insert(&present_first);
+    present_set->insert(&present_second);
+    auto present_filter =
+            bloom_filter_with_predicate(create_in_list_predicate<PredicateType::IN_LIST>(
+                    0, "c0", schema.type, present_set, false));
+
+    EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter,
+                                                                             bloom_filter));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            schema, present_filter, bloom_filter));
+}
+
+TEST(ParquetBloomFilterPruningTest, BooleanPredicateHashesAsParquetInt32) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeBool>());
+    auto bloom_filter = bloom_filter_for_int32_values({1});
+    auto false_filter = bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_BOOLEAN>(false), false));
+    auto true_filter = bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_BOOLEAN>(true), false));
+
+    EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, false_filter,
+                                                                             bloom_filter));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, true_filter,
+                                                                              bloom_filter));
+}
+
+TEST(ParquetBloomFilterPruningTest, StringPredicateUsesArrowByteArrayHash) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeString>());
+    auto bloom_filter = bloom_filter_for_string_values({"alpha", "omega"});
+    auto absent_filter = bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_STRING>("beta"), false));
+    auto present_filter =
+            bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+                    0, "c0", schema.type, Field::create_field<TYPE_STRING>("alpha"), false));
+
+    EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter,
+                                                                             bloom_filter));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            schema, present_filter, bloom_filter));
+}
+
+TEST(ParquetBloomFilterPruningTest, NullableAcceptingAndUnsupportedPredicatesKeepRowGroup) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeInt32>());
+    auto bloom_filter = bloom_filter_for_int32_values({1});
+    auto nested_predicate = create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_INT>(2), false);
+    auto accept_null_filter =
+            bloom_filter_with_predicate(std::make_shared<AcceptNullPredicate>(nested_predicate));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            schema, accept_null_filter, bloom_filter));
+
+    auto unsupported_schema = primitive_bloom_schema(std::make_shared<DataTypeInt16>());
+    auto unsupported_filter =
+            bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+                    0, "c0", unsupported_schema.type, Field::create_field<TYPE_SMALLINT>(2),
+                    false));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            unsupported_schema, unsupported_filter, bloom_filter));
+}
+
+} // namespace
+} // namespace doris
diff --git a/be/test/format_v2/parquet/parquet_type_test.cpp b/be/test/format_v2/parquet/parquet_type_test.cpp
new file mode 100644
index 00000000000000..4bca77c1803b49
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_type_test.cpp
@@ -0,0 +1,494 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_type.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/schema.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/file_reader.h>
+
+#include <vector>
+
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/primitive_type.h"
+
+namespace doris::format::parquet {
+namespace {
+
+::parquet::SchemaDescriptor make_descriptor(const ::parquet::schema::NodePtr& node) {
+    auto schema =
+            ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, {node});
+    ::parquet::SchemaDescriptor descriptor;
+    descriptor.Init(schema);
+    return descriptor;
+}
+
+ParquetTypeDescriptor resolve_node(const ::parquet::schema::NodePtr& node) {
+    auto descriptor = make_descriptor(node);
+    return resolve_parquet_type(descriptor.Column(0));
+}
+
+PrimitiveType primitive_type(const DataTypePtr& type) {
+    return remove_nullable(type)->get_primitive_type();
+}
+
+int scale_of(const DataTypePtr& type) {
+    return remove_nullable(type)->get_scale();
+}
+
+std::shared_ptr<arrow::Array> make_float16_array() {
+    arrow::HalfFloatBuilder builder;
+    EXPECT_TRUE(builder.Append(0x3E00).ok());
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder.Finish(&array).ok());
+    return array;
+}
+
+ParquetTypeDescriptor resolve_arrow_float16_type() {
+    const auto schema = arrow::schema({arrow::field("f16", arrow::float16(), true)});
+    const auto table = arrow::Table::Make(schema, {make_float16_array()});
+    auto out_result = arrow::io::BufferOutputStream::Create();
+    EXPECT_TRUE(out_result.ok());
+    auto out = *out_result;
+    EXPECT_TRUE(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1).ok());
+    auto buffer_result = out->Finish();
+    EXPECT_TRUE(buffer_result.ok());
+
+    auto reader = ::parquet::ParquetFileReader::Open(
+            std::make_shared<arrow::io::BufferReader>(*buffer_result));
+    return resolve_parquet_type(reader->metadata()->schema()->Column(0));
+}
+
+} // namespace
+
+TEST(ParquetTypeTest, ResolveLogicalIntegerMappings) {
+    struct Case {
+        int bit_width;
+        bool is_signed;
+        PrimitiveType expected_type;
+        bool expected_unsigned;
+    };
+    const std::vector<Case> cases = {
+            {8, true, TYPE_TINYINT, false},   {8, false, TYPE_SMALLINT, true},
+            {16, true, TYPE_SMALLINT, false}, {16, false, TYPE_INT, true},
+            {32, true, TYPE_INT, false},      {32, false, TYPE_BIGINT, true},
+            {64, true, TYPE_BIGINT, false},   {64, false, TYPE_LARGEINT, true},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.bit_width);
+        const auto node = ::parquet::schema::PrimitiveNode::Make(
+                "c", ::parquet::Repetition::REQUIRED,
+                ::parquet::LogicalType::Int(test_case.bit_width, test_case.is_signed),
+                test_case.bit_width == 64 ? ::parquet::Type::INT64 : ::parquet::Type::INT32);
+        const auto type = resolve_node(node);
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type);
+        EXPECT_EQ(type.integer_bit_width, test_case.bit_width);
+        EXPECT_EQ(type.is_unsigned_integer, test_case.expected_unsigned);
+        EXPECT_TRUE(type.supports_record_reader);
+    }
+}
+
+TEST(ParquetTypeTest, ResolveLogicalTimeAndTimestampMappings) {
+    const auto time_millis = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_ms", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::MILLIS),
+            ::parquet::Type::INT32));
+    ASSERT_NE(time_millis.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(time_millis.doris_type), TYPE_TIMEV2);
+    EXPECT_EQ(time_millis.time_unit, ParquetTimeUnit::MILLIS);
+    EXPECT_EQ(time_millis.extra_type_info, ParquetExtraTypeInfo::UNIT_MS);
+
+    const auto time_micros = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_us", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::MICROS),
+            ::parquet::Type::INT64));
+    ASSERT_NE(time_micros.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(time_micros.doris_type), TYPE_TIMEV2);
+    EXPECT_EQ(time_micros.time_unit, ParquetTimeUnit::MICROS);
+    EXPECT_EQ(time_micros.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS);
+
+    const auto adjusted_time = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_adjusted", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::MILLIS),
+            ::parquet::Type::INT32));
+    EXPECT_EQ(adjusted_time.doris_type, nullptr);
+    EXPECT_FALSE(adjusted_time.supports_record_reader);
+    EXPECT_FALSE(adjusted_time.unsupported_reason.empty());
+
+    const auto timestamp_nanos = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "ts_ns", ::parquet::Repetition::OPTIONAL,
+            ::parquet::LogicalType::Timestamp(true, ::parquet::LogicalType::TimeUnit::NANOS),
+            ::parquet::Type::INT64));
+    ASSERT_NE(timestamp_nanos.doris_type, nullptr);
+    EXPECT_TRUE(timestamp_nanos.doris_type->is_nullable());
+    EXPECT_EQ(primitive_type(timestamp_nanos.doris_type), TYPE_DATETIMEV2);
+    EXPECT_TRUE(timestamp_nanos.is_timestamp);
+    EXPECT_TRUE(timestamp_nanos.timestamp_is_adjusted_to_utc);
+    EXPECT_EQ(timestamp_nanos.time_unit, ParquetTimeUnit::NANOS);
+    EXPECT_EQ(timestamp_nanos.extra_type_info, ParquetExtraTypeInfo::UNIT_NS);
+}
+
+TEST(ParquetTypeTest, ResolveLogicalTimestampMatrix) {
+    struct Case {
+        ::parquet::LogicalType::TimeUnit::unit parquet_unit;
+        bool adjusted_to_utc;
+        ParquetTimeUnit expected_unit;
+        ParquetExtraTypeInfo expected_extra;
+        int expected_scale;
+    };
+    const std::vector<Case> cases = {
+            {::parquet::LogicalType::TimeUnit::MILLIS, true, ParquetTimeUnit::MILLIS,
+             ParquetExtraTypeInfo::UNIT_MS, 3},
+            {::parquet::LogicalType::TimeUnit::MILLIS, false, ParquetTimeUnit::MILLIS,
+             ParquetExtraTypeInfo::UNIT_MS, 3},
+            {::parquet::LogicalType::TimeUnit::MICROS, true, ParquetTimeUnit::MICROS,
+             ParquetExtraTypeInfo::UNIT_MICROS, 6},
+            {::parquet::LogicalType::TimeUnit::MICROS, false, ParquetTimeUnit::MICROS,
+             ParquetExtraTypeInfo::UNIT_MICROS, 6},
+            {::parquet::LogicalType::TimeUnit::NANOS, true, ParquetTimeUnit::NANOS,
+             ParquetExtraTypeInfo::UNIT_NS, 6},
+            {::parquet::LogicalType::TimeUnit::NANOS, false, ParquetTimeUnit::NANOS,
+             ParquetExtraTypeInfo::UNIT_NS, 6},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.expected_scale);
+        const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+                "ts", ::parquet::Repetition::OPTIONAL,
+                ::parquet::LogicalType::Timestamp(test_case.adjusted_to_utc,
+                                                  test_case.parquet_unit),
+                ::parquet::Type::INT64));
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_TRUE(type.doris_type->is_nullable());
+        EXPECT_EQ(primitive_type(type.doris_type), TYPE_DATETIMEV2);
+        EXPECT_EQ(scale_of(type.doris_type), test_case.expected_scale);
+        EXPECT_TRUE(type.is_timestamp);
+        EXPECT_EQ(type.timestamp_is_adjusted_to_utc, test_case.adjusted_to_utc);
+        EXPECT_EQ(type.time_unit, test_case.expected_unit);
+        EXPECT_EQ(type.extra_type_info, test_case.expected_extra);
+    }
+}
+
+TEST(ParquetTypeTest, ConvertedTimeIsRejectedButConvertedTimestampIsSupported) {
+    const auto converted_time = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32,
+            ::parquet::ConvertedType::TIME_MILLIS));
+    EXPECT_EQ(converted_time.doris_type, nullptr);
+    EXPECT_FALSE(converted_time.supports_record_reader);
+    EXPECT_FALSE(converted_time.unsupported_reason.empty());
+
+    const auto converted_timestamp = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "ts_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64,
+            ::parquet::ConvertedType::TIMESTAMP_MILLIS));
+    ASSERT_NE(converted_timestamp.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(converted_timestamp.doris_type), TYPE_DATETIMEV2);
+    EXPECT_TRUE(converted_timestamp.is_timestamp);
+    EXPECT_TRUE(converted_timestamp.timestamp_is_adjusted_to_utc);
+    EXPECT_EQ(converted_timestamp.time_unit, ParquetTimeUnit::MILLIS);
+
+    const auto converted_timestamp_micros = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "ts_us", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT64,
+            ::parquet::ConvertedType::TIMESTAMP_MICROS));
+    ASSERT_NE(converted_timestamp_micros.doris_type, nullptr);
+    EXPECT_TRUE(converted_timestamp_micros.doris_type->is_nullable());
+    EXPECT_EQ(primitive_type(converted_timestamp_micros.doris_type), TYPE_DATETIMEV2);
+    EXPECT_EQ(scale_of(converted_timestamp_micros.doris_type), 6);
+    EXPECT_TRUE(converted_timestamp_micros.is_timestamp);
+    EXPECT_TRUE(converted_timestamp_micros.timestamp_is_adjusted_to_utc);
+    EXPECT_EQ(converted_timestamp_micros.time_unit, ParquetTimeUnit::MICROS);
+    EXPECT_EQ(converted_timestamp_micros.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS);
+}
+
+TEST(ParquetTypeTest, ResolveConvertedIntegerMappingsAndDecodedKinds) {
+    struct Case {
+        ::parquet::ConvertedType::type converted_type;
+        ::parquet::Type::type physical_type;
+        PrimitiveType expected_type;
+        int bit_width;
+        bool expected_unsigned;
+        DecodedValueKind expected_value_kind;
+    };
+    const std::vector<Case> cases = {
+            {::parquet::ConvertedType::INT_8, ::parquet::Type::INT32, TYPE_TINYINT, 8, false,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::UINT_8, ::parquet::Type::INT32, TYPE_SMALLINT, 8, true,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::INT_16, ::parquet::Type::INT32, TYPE_SMALLINT, 16, false,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::UINT_16, ::parquet::Type::INT32, TYPE_INT, 16, true,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::INT_32, ::parquet::Type::INT32, TYPE_INT, 32, false,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::UINT_32, ::parquet::Type::INT32, TYPE_BIGINT, 32, true,
+             DecodedValueKind::UINT32},
+            {::parquet::ConvertedType::INT_64, ::parquet::Type::INT64, TYPE_BIGINT, 64, false,
+             DecodedValueKind::INT64},
+            {::parquet::ConvertedType::UINT_64, ::parquet::Type::INT64, TYPE_LARGEINT, 64, true,
+             DecodedValueKind::UINT64},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.converted_type);
+        const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+                "c", ::parquet::Repetition::REQUIRED, test_case.physical_type,
+                test_case.converted_type));
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type);
+        EXPECT_EQ(type.integer_bit_width, test_case.bit_width);
+        EXPECT_EQ(type.is_unsigned_integer, test_case.expected_unsigned);
+        EXPECT_EQ(decoded_value_kind(type), test_case.expected_value_kind);
+    }
+}
+
+TEST(ParquetTypeTest, ResolveConvertedDecimalCarriers) {
+    struct Case {
+        ::parquet::Type::type physical_type;
+        int type_length;
+        int precision;
+        int scale;
+        PrimitiveType expected_type;
+        ParquetExtraTypeInfo expected_extra;
+    };
+    const std::vector<Case> cases = {
+            {::parquet::Type::INT32, -1, 9, 2, TYPE_DECIMAL32, ParquetExtraTypeInfo::DECIMAL_INT32},
+            {::parquet::Type::INT64, -1, 18, 6, TYPE_DECIMAL64,
+             ParquetExtraTypeInfo::DECIMAL_INT64},
+            {::parquet::Type::BYTE_ARRAY, -1, 20, 5, TYPE_DECIMAL128I,
+             ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY},
+            {::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16, 38, 6, TYPE_DECIMAL128I,
+             ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY},
+            {::parquet::Type::FIXED_LEN_BYTE_ARRAY, 20, 39, 6, TYPE_DECIMAL256,
+             ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.physical_type);
+        const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+                "d", ::parquet::Repetition::REQUIRED, test_case.physical_type,
+                ::parquet::ConvertedType::DECIMAL, test_case.type_length, test_case.precision,
+                test_case.scale));
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type);
+        EXPECT_TRUE(type.is_decimal);
+        EXPECT_FALSE(type.is_string_like);
+        EXPECT_EQ(type.decimal_precision, test_case.precision);
+        EXPECT_EQ(type.decimal_scale, test_case.scale);
+        EXPECT_EQ(type.extra_type_info, test_case.expected_extra);
+    }
+}
+
+TEST(ParquetTypeTest, ResolveLogicalStringDateAndDecimalMappings) {
+    const std::vector<std::shared_ptr<const ::parquet::LogicalType>> string_like_logical_types = {
+            ::parquet::LogicalType::String(), ::parquet::LogicalType::Enum(),
+            ::parquet::LogicalType::JSON(), ::parquet::LogicalType::BSON()};
+    for (const auto& logical_type : string_like_logical_types) {
+        const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+                "s", ::parquet::Repetition::OPTIONAL, logical_type, ::parquet::Type::BYTE_ARRAY));
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_TRUE(type.doris_type->is_nullable());
+        EXPECT_EQ(primitive_type(type.doris_type), TYPE_STRING);
+        EXPECT_TRUE(type.is_string_like);
+    }
+
+    const auto uuid = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "uuid", ::parquet::Repetition::OPTIONAL, ::parquet::LogicalType::UUID(),
+            ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16));
+    ASSERT_NE(uuid.doris_type, nullptr);
+    EXPECT_TRUE(uuid.doris_type->is_nullable());
+    EXPECT_EQ(primitive_type(uuid.doris_type), TYPE_STRING);
+    EXPECT_TRUE(uuid.is_string_like);
+
+    const auto date = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Date(),
+            ::parquet::Type::INT32));
+    ASSERT_NE(date.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(date.doris_type), TYPE_DATEV2);
+
+    const auto decimal64 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d64", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(18, 6),
+            ::parquet::Type::INT64));
+    ASSERT_NE(decimal64.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(decimal64.doris_type), TYPE_DECIMAL64);
+    EXPECT_TRUE(decimal64.is_decimal);
+    EXPECT_EQ(decimal64.decimal_precision, 18);
+    EXPECT_EQ(decimal64.decimal_scale, 6);
+    EXPECT_EQ(decimal64.extra_type_info, ParquetExtraTypeInfo::DECIMAL_INT64);
+
+    const auto decimal128 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d128", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(38, 6),
+            ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16));
+    ASSERT_NE(decimal128.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(decimal128.doris_type), TYPE_DECIMAL128I);
+    EXPECT_TRUE(decimal128.is_decimal);
+    EXPECT_EQ(decimal128.decimal_precision, 38);
+    EXPECT_EQ(decimal128.decimal_scale, 6);
+    EXPECT_EQ(decimal128.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY);
+
+    const auto decimal256 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d256", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(39, 6),
+            ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 20));
+    ASSERT_NE(decimal256.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(decimal256.doris_type), TYPE_DECIMAL256);
+    EXPECT_TRUE(decimal256.is_decimal);
+    EXPECT_EQ(decimal256.decimal_precision, 39);
+    EXPECT_EQ(decimal256.decimal_scale, 6);
+    EXPECT_EQ(decimal256.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY);
+    EXPECT_FALSE(decimal256.is_string_like);
+}
+
+TEST(ParquetTypeTest, LogicalConvertedAndPhysicalFallbackLevelsAreDistinct) {
+    const auto logical_type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "c", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Int(8, true),
+            ::parquet::Type::INT32));
+    ASSERT_NE(logical_type.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(logical_type.doris_type), TYPE_TINYINT);
+    EXPECT_EQ(logical_type.integer_bit_width, 8);
+
+    const auto converted_type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "c", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32,
+            ::parquet::ConvertedType::INT_8));
+    ASSERT_NE(converted_type.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(converted_type.doris_type), TYPE_TINYINT);
+    EXPECT_EQ(converted_type.integer_bit_width, 8);
+
+    const auto physical_type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "c", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32));
+    ASSERT_NE(physical_type.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(physical_type.doris_type), TYPE_INT);
+    EXPECT_EQ(physical_type.integer_bit_width, -1);
+}
+
+TEST(ParquetTypeTest, ResolveDecimalStringLikeFloat16AndPhysicalFallback) {
+    const auto decimal256 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d", ::parquet::Repetition::REQUIRED, ::parquet::Type::FIXED_LEN_BYTE_ARRAY,
+            ::parquet::ConvertedType::DECIMAL, 20, 39, 6));
+    ASSERT_NE(decimal256.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(decimal256.doris_type), TYPE_DECIMAL256);
+    EXPECT_TRUE(decimal256.is_decimal);
+    EXPECT_FALSE(decimal256.is_string_like);
+    EXPECT_EQ(decimal256.decimal_precision, 39);
+    EXPECT_EQ(decimal256.decimal_scale, 6);
+    EXPECT_EQ(decimal256.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY);
+
+    const auto plain_binary = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "s", ::parquet::Repetition::REQUIRED, ::parquet::Type::BYTE_ARRAY));
+    ASSERT_NE(plain_binary.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(plain_binary.doris_type), TYPE_STRING);
+    EXPECT_TRUE(plain_binary.is_string_like);
+
+    const auto float16 = resolve_arrow_float16_type();
+    ASSERT_NE(float16.doris_type, nullptr);
+    EXPECT_TRUE(float16.doris_type->is_nullable());
+    EXPECT_EQ(float16.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+    EXPECT_EQ(float16.fixed_length, 2);
+    EXPECT_EQ(primitive_type(float16.doris_type), TYPE_FLOAT);
+    EXPECT_EQ(float16.extra_type_info, ParquetExtraTypeInfo::FLOAT16);
+    EXPECT_FALSE(float16.is_string_like);
+    EXPECT_EQ(decoded_value_kind(float16), DecodedValueKind::FIXED_BINARY);
+}
+
+TEST(ParquetTypeTest, ResolveNullDescriptorAndPhysicalFallback) {
+    const auto null_type = resolve_parquet_type(nullptr);
+    EXPECT_EQ(null_type.doris_type, nullptr);
+    EXPECT_EQ(null_type.physical_type, ::parquet::Type::UNDEFINED);
+    EXPECT_TRUE(null_type.supports_record_reader);
+
+    const auto int96 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "ts", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT96));
+    ASSERT_NE(int96.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(int96.doris_type), TYPE_DATETIMEV2);
+    EXPECT_EQ(int96.extra_type_info, ParquetExtraTypeInfo::IMPALA_TIMESTAMP);
+    EXPECT_EQ(decoded_value_kind(int96), DecodedValueKind::INT96);
+}
+
+TEST(ParquetTypeTest, ResolveEveryPhysicalFallback) {
+    struct Case {
+        ::parquet::schema::NodePtr node;
+        PrimitiveType expected_type;
+        DecodedValueKind expected_kind;
+        bool expected_string_like = false;
+    };
+    const std::vector<Case> cases = {
+            {::parquet::schema::PrimitiveNode::Make("b", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::BOOLEAN),
+             TYPE_BOOLEAN, DecodedValueKind::BOOL},
+            {::parquet::schema::PrimitiveNode::Make("i32", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::INT32),
+             TYPE_INT, DecodedValueKind::INT32},
+            {::parquet::schema::PrimitiveNode::Make("i64", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::INT64),
+             TYPE_BIGINT, DecodedValueKind::INT64},
+            {::parquet::schema::PrimitiveNode::Make("f", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::FLOAT),
+             TYPE_FLOAT, DecodedValueKind::FLOAT},
+            {::parquet::schema::PrimitiveNode::Make("d", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::DOUBLE),
+             TYPE_DOUBLE, DecodedValueKind::DOUBLE},
+            {::parquet::schema::PrimitiveNode::Make("s", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::BYTE_ARRAY),
+             TYPE_STRING, DecodedValueKind::BINARY, true},
+            {::parquet::schema::PrimitiveNode::Make("fs", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::FIXED_LEN_BYTE_ARRAY,
+                                                    ::parquet::ConvertedType::NONE, 4),
+             TYPE_STRING, DecodedValueKind::FIXED_BINARY, true},
+            {::parquet::schema::PrimitiveNode::Make("ts", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::INT96),
+             TYPE_DATETIMEV2, DecodedValueKind::INT96},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.expected_type);
+        const auto type = resolve_node(test_case.node);
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type);
+        EXPECT_EQ(decoded_value_kind(type), test_case.expected_kind);
+        EXPECT_EQ(type.is_string_like, test_case.expected_string_like);
+        EXPECT_TRUE(type.supports_record_reader);
+    }
+}
+
+TEST(ParquetTypeTest, InvalidLogicalAnnotationsFallBackOrRejectAsSpecified) {
+    EXPECT_THROW(::parquet::LogicalType::Int(24, true), ::parquet::ParquetException);
+
+    const auto nanos_time = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_ns", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::NANOS),
+            ::parquet::Type::INT64));
+    ASSERT_NE(nanos_time.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(nanos_time.doris_type), TYPE_BIGINT);
+    EXPECT_TRUE(nanos_time.unsupported_reason.empty());
+
+    const auto adjusted_nanos_time = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_ns_utc", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::NANOS),
+            ::parquet::Type::INT64));
+    EXPECT_EQ(adjusted_nanos_time.doris_type, nullptr);
+    EXPECT_FALSE(adjusted_nanos_time.supports_record_reader);
+    EXPECT_FALSE(adjusted_nanos_time.unsupported_reason.empty());
+
+    EXPECT_THROW(::parquet::schema::PrimitiveNode::Make("f16_bad", ::parquet::Repetition::REQUIRED,
+                                                        ::parquet::LogicalType::Float16(),
+                                                        ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 4),
+                 ::parquet::ParquetException);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/table/hive_reader_test.cpp b/be/test/format_v2/table/hive_reader_test.cpp
new file mode 100644
index 00000000000000..a41effaa91a3a9
--- /dev/null
+++ b/be/test/format_v2/table/hive_reader_test.cpp
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/hive_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format::hive {
+namespace {
+
+ColumnDefinition table_column(const std::string& name, DataTypePtr type) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_STRING>(name);
+    column.name = name;
+    column.type = std::move(type);
+    return column;
+}
+
+Status init_hive_reader(FileFormat format, TFileScanRangeParams* params, RuntimeState* state,
+                        RuntimeProfile* profile, HiveReader* reader) {
+    return reader->init({
+            .projected_columns = {table_column("id", std::make_shared<DataTypeInt32>()),
+                                  table_column("name", std::make_shared<DataTypeString>())},
+            .column_predicates = {},
+            .conjuncts = {},
+            .format = format,
+            .scan_params = params,
+            .io_ctx = nullptr,
+            .runtime_state = state,
+            .scanner_profile = profile,
+    });
+}
+
+class HiveV2ReaderTest : public testing::Test {
+public:
+    HiveV2ReaderTest() : state(query_options, query_globals), profile("hive_v2_reader_test") {}
+
+protected:
+    TQueryOptions query_options;
+    TQueryGlobals query_globals;
+    RuntimeState state;
+    RuntimeProfile profile;
+};
+
+// Scenario: Hive tables using OpenCSVSerde are planned as table_format=hive with CSV file format.
+// HiveReader must allow that file format so TableReader can create the v2 CsvReader.
+TEST_F(HiveV2ReaderTest, InitSupportsCsvFileFormat) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN);
+    HiveReader reader;
+
+    ASSERT_TRUE(init_hive_reader(FileFormat::CSV, &params, &state, &profile, &reader).ok());
+    EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+// Scenario: Hive text files also synthesize a file-local schema from FE slots, so they should use
+// name mapping at the table-reader layer while TextReader consumes column_idxs for field ordinals.
+TEST_F(HiveV2ReaderTest, InitSupportsTextFileFormat) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_TEXT);
+    HiveReader reader;
+
+    ASSERT_TRUE(init_hive_reader(FileFormat::TEXT, &params, &state, &profile, &reader).ok());
+    EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+// Scenario: Hive JSON files also synthesize a file-local schema from FE slots, so they should use
+// name mapping at the table-reader layer while JsonReader consumes JSON attributes.
+TEST_F(HiveV2ReaderTest, InitSupportsJsonFileFormat) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_JSON);
+    HiveReader reader;
+
+    ASSERT_TRUE(init_hive_reader(FileFormat::JSON, &params, &state, &profile, &reader).ok());
+    EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+// Scenario: positional mapping is only for Hive Parquet/ORC sessions that disable name mapping.
+// CSV keeps the synthesized file-column names and leaves column_idxs for the CsvReader itself.
+TEST_F(HiveV2ReaderTest, CsvDoesNotConsumeColumnIdxsAsPositionalSchemaMapping) {
+    query_options.hive_parquet_use_column_names = false;
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN);
+    params.__set_column_idxs({3});
+    ProjectedColumnBuildContext context {
+            .scan_params = &params,
+            .runtime_state = &state,
+    };
+    HiveReader reader;
+
+    TFileScanSlotInfo slot;
+    slot.__set_is_file_slot(true);
+    auto column = table_column("value", std::make_shared<DataTypeInt32>());
+
+    ASSERT_TRUE(reader.annotate_projected_column(slot, &context, &column).ok());
+    ASSERT_TRUE(column.has_identifier_name());
+    EXPECT_EQ(column.get_identifier_name(), "value");
+    EXPECT_EQ(context.next_file_column_idx, 0);
+}
+
+} // namespace
+} // namespace doris::format::hive
diff --git a/be/test/format_v2/table/hudi_reader_test.cpp b/be/test/format_v2/table/hudi_reader_test.cpp
new file mode 100644
index 00000000000000..125183cd7a60c4
--- /dev/null
+++ b/be/test/format_v2/table/hudi_reader_test.cpp
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/hudi_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/field.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/ExternalTableSchema_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format {
+namespace {
+
+schema::external::TFieldPtr external_schema_field(std::string name, int32_t id,
+                                                  std::vector<std::string> aliases = {}) {
+    auto field = std::make_shared<schema::external::TField>();
+    field->__set_name(std::move(name));
+    field->__set_id(id);
+    if (!aliases.empty()) {
+        field->__set_name_mapping(std::move(aliases));
+    }
+    schema::external::TFieldPtr field_ptr;
+    field_ptr.field_ptr = std::move(field);
+    field_ptr.__isset.field_ptr = true;
+    return field_ptr;
+}
+
+schema::external::TSchema external_schema(int64_t schema_id,
+                                          std::vector<schema::external::TFieldPtr> fields) {
+    schema::external::TStructField root_field;
+    root_field.__set_fields(std::move(fields));
+    schema::external::TSchema schema;
+    schema.__set_schema_id(schema_id);
+    schema.__set_root_field(std::move(root_field));
+    return schema;
+}
+
+ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(id);
+    field.local_id = id;
+    field.name = name;
+    field.type = type;
+    return field;
+}
+
+TTableFormatFileDesc hudi_table_format_desc(std::optional<int64_t> schema_id) {
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("hudi");
+    THudiFileDesc hudi_params;
+    if (schema_id.has_value()) {
+        hudi_params.__set_schema_id(*schema_id);
+    }
+    table_format_params.__set_hudi_params(hudi_params);
+    return table_format_params;
+}
+
+// Scenario: FileScannerV2 Hudi native reader uses the split schema id to annotate the physical
+// file schema before TableColumnMapper runs. This keeps schema-evolved Hudi files on field-id
+// mapping, including renamed nested children.
+TEST(HudiReaderTest, AnnotatesFileSchemaFromSplitHistorySchema) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+
+    auto profile_field = external_schema_field("profile", 20);
+    schema::external::TStructField profile_struct;
+    profile_struct.__set_fields({external_schema_field("old_age", 21, {"age"})});
+    profile_field.field_ptr->nestedField.__set_struct_field(std::move(profile_struct));
+    profile_field.field_ptr->__isset.nestedField = true;
+
+    scan_params.__set_history_schema_info({
+            external_schema(100, {external_schema_field("old_name", 10, {"name"}), profile_field}),
+            external_schema(
+                    200, {external_schema_field("name", 10), external_schema_field("profile", 20)}),
+    });
+
+    hudi::HudiReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_table_format_params(hudi_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID);
+
+    auto string_type = std::make_shared<DataTypeString>();
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto profile_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"old_age"});
+    auto profile_column = make_file_column(1, "profile", profile_type);
+    profile_column.children = {make_file_column(0, "old_age", int_type)};
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(0, "old_name", string_type),
+            profile_column,
+    };
+
+    ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 2);
+    EXPECT_EQ(file_schema[0].get_identifier_field_id(), 10);
+    EXPECT_EQ(file_schema[0].name_mapping, std::vector<std::string>({"name"}));
+    EXPECT_EQ(file_schema[1].get_identifier_field_id(), 20);
+    ASSERT_EQ(file_schema[1].children.size(), 1);
+    EXPECT_EQ(file_schema[1].children[0].get_identifier_field_id(), 21);
+    EXPECT_EQ(file_schema[1].children[0].name_mapping, std::vector<std::string>({"age"}));
+}
+
+// Scenario: a Hudi split can only use field-id mapping when its schema id resolves to a historical
+// schema sent by FE. Unknown or missing split schema ids must fall back to BY_NAME and leave the
+// physical file schema untouched.
+TEST(HudiReaderTest, FallsBackToByNameWhenSplitHistorySchemaIsMissing) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(200, {external_schema_field("name", 10)}),
+    });
+
+    hudi::HudiReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_table_format_params(hudi_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME);
+
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(0, "old_name", std::make_shared<DataTypeString>()),
+    };
+    ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok());
+    EXPECT_EQ(file_schema[0].get_identifier_field_id(), 0);
+    EXPECT_TRUE(file_schema[0].name_mapping.empty());
+}
+
+// Scenario: HudiReader must reset the previous split schema id before each split. Otherwise a
+// BY_FIELD_ID split could leak its schema id into the next split that carries no schema id.
+TEST(HudiReaderTest, ResetsSplitSchemaIdBeforePreparingNextSplit) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(100, {external_schema_field("old_name", 10, {"name"})}),
+            external_schema(200, {external_schema_field("name", 10)}),
+    });
+
+    hudi::HudiReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_with_schema_id;
+    split_with_schema_id.current_range.__set_table_format_params(hudi_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_with_schema_id).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID);
+
+    SplitReadOptions split_without_schema_id;
+    split_without_schema_id.current_range.__set_table_format_params(
+            hudi_table_format_desc(std::nullopt));
+    ASSERT_TRUE(reader.prepare_split(split_without_schema_id).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/table/iceberg_reader_test.cpp b/be/test/format_v2/table/iceberg_reader_test.cpp
new file mode 100644
index 00000000000000..84fe09bc0c55b5
--- /dev/null
+++ b/be/test/format_v2/table/iceberg_reader_test.cpp
@@ -0,0 +1,1852 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/iceberg_reader.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <algorithm>
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <optional>
+#include <string>
+#include <typeinfo>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_array.h"
+#include "core/column/column_const.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exec/common/endian.h"
+#include "exprs/runtime_filter_expr.h"
+#include "exprs/vectorized_fn_call.h"
+#include "exprs/vexpr.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "format/format_common.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Exprs_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/io_common.h"
+#include "roaring/roaring64map.hh"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+#include "storage/predicate/predicate_creator.h"
+#include "storage/segment/condition_cache.h"
+
+namespace doris::format {
+namespace {
+
+LocalColumnIndex field_projection(int32_t column_id) {
+    return LocalColumnIndex {.index = column_id};
+}
+
+std::vector<int32_t> projection_ids(const std::vector<LocalColumnIndex>& projections) {
+    std::vector<int32_t> ids;
+    ids.reserve(projections.size());
+    for (const auto& projection : projections) {
+        ids.push_back(projection.index);
+    }
+    return ids;
+}
+VExprSPtr table_int32_slot_ref(int slot_id, int column_id, const std::string& column_name) {
+    const auto nullable_int_type = make_nullable(std::make_shared<DataTypeInt32>());
+    return VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int_type, column_name);
+}
+
+VExprSPtr table_int32_literal(int32_t value) {
+    return VLiteral::create_shared(std::make_shared<DataTypeInt32>(),
+                                   Field::create_field<TYPE_INT>(value));
+}
+
+VExprSPtr table_int64_literal(int64_t value) {
+    return VLiteral::create_shared(std::make_shared<DataTypeInt64>(),
+                                   Field::create_field<TYPE_BIGINT>(value));
+}
+
+TExprNode table_function_node(const std::string& function_name, const DataTypePtr& return_type,
+                              const std::vector<DataTypePtr>& arg_types,
+                              TExprNodeType::type node_type,
+                              TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE,
+                              bool short_circuit_evaluation = false) {
+    TFunctionName fn_name;
+    fn_name.__set_function_name(function_name);
+    TFunction fn;
+    fn.__set_name(fn_name);
+    fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+    std::vector<TTypeDesc> thrift_arg_types;
+    thrift_arg_types.reserve(arg_types.size());
+    for (const auto& arg_type : arg_types) {
+        thrift_arg_types.push_back(arg_type->to_thrift());
+    }
+    fn.__set_arg_types(thrift_arg_types);
+    fn.__set_ret_type(return_type->to_thrift());
+    fn.__set_has_var_args(false);
+
+    TExprNode node;
+    node.__set_node_type(node_type);
+    node.__set_opcode(opcode);
+    node.__set_type(return_type->to_thrift());
+    node.__set_fn(fn);
+    node.__set_num_children(static_cast<int16_t>(arg_types.size()));
+    node.__set_is_nullable(return_type->is_nullable());
+    if (short_circuit_evaluation) {
+        node.__set_short_circuit_evaluation(true);
+    }
+    return node;
+}
+
+VExprSPtr table_function_expr(const std::string& function_name, const DataTypePtr& return_type,
+                              const std::vector<DataTypePtr>& arg_types,
+                              TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL,
+                              TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE) {
+    const auto node = table_function_node(function_name, return_type, arg_types, node_type, opcode);
+    return VectorizedFnCall::create_shared(node);
+}
+
+VExprSPtr table_int32_greater_than_expr(int slot_id, int column_id, int32_t value) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto expr = table_function_expr("gt", make_nullable(std::make_shared<DataTypeUInt8>()),
+                                    {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED,
+                                    TExprOpcode::GT);
+    expr->add_child(table_int32_slot_ref(slot_id, column_id, "id"));
+    expr->add_child(table_int32_literal(value));
+    return expr;
+}
+
+VExprSPtr table_nullable_int64_binary_predicate(const std::string& function_name,
+                                                TExprOpcode::type opcode, int slot_id,
+                                                int column_id, const std::string& column_name,
+                                                int64_t value) {
+    const auto int64_type = std::make_shared<DataTypeInt64>();
+    const auto nullable_int64_type = make_nullable(int64_type);
+    auto expr = table_function_expr(function_name, make_nullable(std::make_shared<DataTypeUInt8>()),
+                                    {nullable_int64_type, int64_type}, TExprNodeType::BINARY_PRED,
+                                    opcode);
+    expr->add_child(
+            VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int64_type, column_name));
+    expr->add_child(table_int64_literal(value));
+    return expr;
+}
+
+class IcebergTableReaderDeleteFileTestHelper final
+        : public doris::format::iceberg::IcebergTableReader {
+public:
+    Status parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc,
+                                      bool* has_delete_file) {
+        return _parse_deletion_vector_file(t_desc, desc, has_delete_file);
+    }
+};
+
+class IcebergTableReaderScanRequestTestHelper final
+        : public doris::format::iceberg::IcebergTableReader {
+public:
+    Status init_for_scan_request_test(std::vector<ColumnDefinition> projected_columns) {
+        _query_options = std::make_unique<TQueryOptions>();
+        _query_globals = std::make_unique<TQueryGlobals>();
+        _state = std::make_unique<RuntimeState>(*_query_options, *_query_globals);
+        RETURN_IF_ERROR(init({
+                .projected_columns = std::move(projected_columns),
+                .column_predicates = {},
+                .conjuncts = {},
+                .format = FileFormat::PARQUET,
+                .scan_params = nullptr,
+                .io_ctx = nullptr,
+                .runtime_state = _state.get(),
+                .scanner_profile = nullptr,
+        }));
+
+        SplitReadOptions split_options;
+        split_options.current_range.__set_path("scan-request-test.parquet");
+        TTableFormatFileDesc table_format_params;
+        TIcebergFileDesc iceberg_params;
+        iceberg_params.__set_first_row_id(1000);
+        table_format_params.__set_iceberg_params(iceberg_params);
+        split_options.current_range.__set_table_format_params(table_format_params);
+        RETURN_IF_ERROR(prepare_split(split_options));
+
+        _delete_rows_storage = {1};
+        _delete_rows = &_delete_rows_storage;
+        return Status::OK();
+    }
+
+    Status customize_request(FileScanRequest* request) {
+        return customize_file_scan_request(request);
+    }
+
+private:
+    std::unique_ptr<TQueryOptions> _query_options;
+    std::unique_ptr<TQueryGlobals> _query_globals;
+    std::unique_ptr<RuntimeState> _state;
+    DeleteRows _delete_rows_storage;
+};
+
+class IcebergTableReaderMappingModeTestHelper final
+        : public doris::format::iceberg::IcebergTableReader {
+public:
+    TableColumnMappingMode mapping_mode_for_schema(std::vector<ColumnDefinition> file_schema) {
+        _data_reader.file_schema = std::move(file_schema);
+        return mapping_mode();
+    }
+};
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_int64_array(const std::vector<int64_t>& values) {
+    arrow::Int64Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_nullable_int64_array(
+        const std::vector<std::optional<int64_t>>& values) {
+    arrow::Int64Builder builder;
+    for (const auto& value : values) {
+        if (value.has_value()) {
+            EXPECT_TRUE(builder.Append(*value).ok());
+        } else {
+            EXPECT_TRUE(builder.AppendNull().ok());
+        }
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+void write_iceberg_equality_delete_parquet_file(const std::string& file_path, int32_t field_id,
+                                                int32_t value) {
+    const auto metadata =
+            arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false)->WithMetadata(metadata),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array({value})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_iceberg_equality_delete_bigint_parquet_file(const std::string& file_path,
+                                                       int32_t field_id, int64_t value) {
+    const auto metadata =
+            arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int64(), false)->WithMetadata(metadata),
+    });
+    auto table = arrow::Table::Make(schema, {build_int64_array({value})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, const std::vector<int32_t>& ids,
+                                 const std::vector<int32_t>& scores,
+                                 const std::vector<std::string>& values,
+                                 int64_t row_group_size = -1) {
+    const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"});
+    const auto score_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"1"});
+    const auto value_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2"});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata),
+            arrow::field("score", arrow::int32(), false)->WithMetadata(score_metadata),
+            arrow::field("value", arrow::utf8(), false)->WithMetadata(value_metadata),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores),
+                                             build_string_array(values)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    const auto write_row_group_size =
+            row_group_size > 0 ? row_group_size : static_cast<int64_t>(ids.size());
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      write_row_group_size, builder.build()));
+}
+
+void write_iceberg_row_lineage_parquet_file(
+        const std::string& file_path, const std::vector<int32_t>& ids,
+        const std::vector<std::optional<int64_t>>& row_ids,
+        const std::vector<std::optional<int64_t>>& last_updated_sequence_numbers = {}) {
+    ASSERT_EQ(ids.size(), row_ids.size());
+    if (!last_updated_sequence_numbers.empty()) {
+        ASSERT_EQ(ids.size(), last_updated_sequence_numbers.size());
+    }
+    const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"});
+    const auto row_id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2147483540"});
+    const auto last_updated_sequence_number_metadata =
+            arrow::key_value_metadata({"PARQUET:field_id"}, {"2147483539"});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata),
+            arrow::field("_row_id", arrow::int64(), true)->WithMetadata(row_id_metadata),
+    });
+    std::vector<std::shared_ptr<arrow::Array>> arrays = {
+            build_int32_array(ids),
+            build_nullable_int64_array(row_ids),
+    };
+    if (!last_updated_sequence_numbers.empty()) {
+        schema =
+                schema->AddField(schema->num_fields(),
+                                 arrow::field("_last_updated_sequence_number", arrow::int64(), true)
+                                         ->WithMetadata(last_updated_sequence_number_metadata))
+                        .ValueOrDie();
+        arrays.push_back(build_nullable_int64_array(last_updated_sequence_numbers));
+    }
+    auto table = arrow::Table::Make(schema, arrays);
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      static_cast<int64_t>(ids.size()),
+                                                      builder.build()));
+}
+
+void write_position_delete_parquet_file(const std::string& file_path,
+                                        const std::vector<std::string>& data_file_paths,
+                                        const std::vector<int64_t>& positions) {
+    auto schema = arrow::schema({
+            arrow::field("file_path", arrow::utf8(), false),
+            arrow::field("pos", arrow::int64(), false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_string_array(data_file_paths), build_int64_array(positions)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      static_cast<int64_t>(positions.size()),
+                                                      builder.build()));
+}
+
+int64_t write_iceberg_deletion_vector_file(const std::string& file_path,
+                                           const std::vector<uint64_t>& deleted_positions) {
+    roaring::Roaring64Map rows;
+    for (const auto position : deleted_positions) {
+        rows.add(position);
+    }
+
+    const size_t bitmap_size = rows.getSizeInBytes();
+    std::vector<char> blob(4 + 4 + bitmap_size + 4);
+    rows.write(blob.data() + 8);
+
+    const uint32_t total_length = static_cast<uint32_t>(4 + bitmap_size);
+    BigEndian::Store32(blob.data(), total_length);
+    constexpr char DV_MAGIC[] = {'\xD1', '\xD3', '\x39', '\x64'};
+    memcpy(blob.data() + 4, DV_MAGIC, 4);
+    BigEndian::Store32(blob.data() + 8 + bitmap_size, 0);
+
+    std::ofstream output(file_path, std::ios::binary);
+    EXPECT_TRUE(output.is_open());
+    output.write(blob.data(), static_cast<std::streamsize>(blob.size()));
+    EXPECT_TRUE(output.good());
+    return static_cast<int64_t>(blob.size());
+}
+
+Block build_table_block(const std::vector<ColumnDefinition>& columns) {
+    Block block;
+    for (const auto& column : columns) {
+        block.insert({column.type->create_column(), column.type, column.name});
+    }
+    return block;
+}
+
+void expect_nullable_int64_column_values(const IColumn& column,
+                                         const std::vector<int64_t>& expected_values) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*full_column);
+    const auto& values =
+            assert_cast<const ColumnInt64&>(nullable_column.get_nested_column()).get_data();
+    ASSERT_EQ(nullable_column.size(), expected_values.size());
+    for (size_t row = 0; row < expected_values.size(); ++row) {
+        EXPECT_EQ(nullable_column.get_null_map_data()[row], 0);
+        EXPECT_EQ(values[row], expected_values[row]);
+    }
+}
+
+void expect_nullable_int64_column_optional_values(
+        const IColumn& column, const std::vector<std::optional<int64_t>>& expected_values) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*full_column);
+    const auto& values =
+            assert_cast<const ColumnInt64&>(nullable_column.get_nested_column()).get_data();
+    ASSERT_EQ(nullable_column.size(), expected_values.size());
+    for (size_t row = 0; row < expected_values.size(); ++row) {
+        if (expected_values[row].has_value()) {
+            EXPECT_EQ(nullable_column.get_null_map_data()[row], 0);
+            EXPECT_EQ(values[row], *expected_values[row]);
+        } else {
+            EXPECT_EQ(nullable_column.get_null_map_data()[row], 1);
+        }
+    }
+}
+
+const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) {
+    if (!column.is_nullable()) {
+        return column;
+    }
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    for (const auto is_null : nullable_column.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    return nullable_column.get_nested_column();
+}
+
+const IColumn& expect_not_null_table_column(const Block& block, size_t position) {
+    return expect_not_null_nullable_nested_column(*block.get_by_position(position).column);
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type);
+
+DataTypePtr make_iceberg_rowid_type() {
+    return make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt64>(),
+                       std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeString>()},
+            Strings {"file_path", "row_pos", "partition_spec_id", "partition_data_json"}));
+}
+
+ColumnDefinition make_iceberg_row_lineage_row_id_column() {
+    return make_table_column(2147483540, "_row_id",
+                             make_nullable(std::make_shared<DataTypeInt64>()));
+}
+
+ColumnDefinition make_iceberg_last_updated_sequence_number_column() {
+    return make_table_column(2147483539, "_last_updated_sequence_number",
+                             make_nullable(std::make_shared<DataTypeInt64>()));
+}
+
+void expect_iceberg_rowid_column_values(const IColumn& column, const std::string& file_path,
+                                        const std::vector<int64_t>& row_positions,
+                                        int32_t partition_spec_id,
+                                        const std::string& partition_data_json) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*full_column);
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& file_path_column = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(struct_column.get_column(0)));
+    const auto& row_pos_column = assert_cast<const ColumnInt64&>(
+            expect_not_null_nullable_nested_column(struct_column.get_column(1)));
+    const auto& spec_id_column = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(struct_column.get_column(2)));
+    const auto& partition_data_column = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(struct_column.get_column(3)));
+
+    ASSERT_EQ(nullable_column.size(), row_positions.size());
+    for (size_t row = 0; row < row_positions.size(); ++row) {
+        EXPECT_EQ(nullable_column.get_null_map_data()[row], 0);
+        EXPECT_EQ(file_path_column.get_data_at(row).to_string(), file_path);
+        EXPECT_EQ(row_pos_column.get_element(row), row_positions[row]);
+        EXPECT_EQ(spec_id_column.get_element(row), partition_spec_id);
+        EXPECT_EQ(partition_data_column.get_data_at(row).to_string(), partition_data_json);
+    }
+}
+
+void expect_int32_column_values(const IColumn& column,
+                                const std::vector<int32_t>& expected_values) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nested_column = expect_not_null_nullable_nested_column(*full_column);
+    const auto& values = assert_cast<const ColumnInt32&>(nested_column).get_data();
+    ASSERT_EQ(values.size(), expected_values.size());
+    for (size_t row = 0; row < expected_values.size(); ++row) {
+        EXPECT_EQ(values[row], expected_values[row]);
+    }
+}
+
+SplitReadOptions build_split_options(const std::string& file_path) {
+    SplitReadOptions options;
+    options.current_range.__set_path(file_path);
+    options.current_range.__set_file_size(
+            static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    return options;
+}
+
+void set_table_level_row_count(SplitReadOptions* split_options, int64_t row_count) {
+    split_options->current_range.__isset.table_format_params = true;
+    split_options->current_range.table_format_params.__isset.table_level_row_count = true;
+    split_options->current_range.table_format_params.table_level_row_count = row_count;
+}
+
+void set_iceberg_row_lineage_params(SplitReadOptions* split_options, int64_t first_row_id,
+                                    int64_t last_updated_sequence_number) {
+    TTableFormatFileDesc table_format_params;
+    TIcebergFileDesc iceberg_params;
+    iceberg_params.__set_first_row_id(first_row_id);
+    iceberg_params.__set_last_updated_sequence_number(last_updated_sequence_number);
+    table_format_params.__set_iceberg_params(iceberg_params);
+    split_options->current_range.__set_table_format_params(table_format_params);
+}
+
+void set_iceberg_rowid_params(SplitReadOptions* split_options,
+                              const std::string& original_file_path, int32_t partition_spec_id,
+                              const std::string& partition_data_json) {
+    TTableFormatFileDesc table_format_params;
+    TIcebergFileDesc iceberg_params;
+    iceberg_params.__set_original_file_path(original_file_path);
+    iceberg_params.__set_partition_spec_id(partition_spec_id);
+    iceberg_params.__set_partition_data_json(partition_data_json);
+    table_format_params.__set_iceberg_params(iceberg_params);
+    split_options->current_range.__set_table_format_params(table_format_params);
+}
+
+TIcebergDeleteFileDesc make_iceberg_deletion_vector(const std::string& path, int64_t offset,
+                                                    int64_t size) {
+    TIcebergDeleteFileDesc delete_file;
+    delete_file.__set_content(3);
+    delete_file.__set_path(path);
+    delete_file.__set_content_offset(offset);
+    delete_file.__set_content_size_in_bytes(size);
+    return delete_file;
+}
+
+TIcebergDeleteFileDesc make_iceberg_position_delete_file(const std::string& path) {
+    TIcebergDeleteFileDesc delete_file;
+    delete_file.__set_content(1);
+    delete_file.__set_path(path);
+    delete_file.__set_file_format(TFileFormatType::FORMAT_PARQUET);
+    return delete_file;
+}
+
+TIcebergDeleteFileDesc make_iceberg_equality_delete_file(const std::string& path,
+                                                         const std::vector<int32_t>& field_ids) {
+    TIcebergDeleteFileDesc delete_file;
+    delete_file.__set_content(2);
+    delete_file.__set_path(path);
+    delete_file.__set_field_ids(field_ids);
+    delete_file.__set_file_format(TFileFormatType::FORMAT_PARQUET);
+    return delete_file;
+}
+
+TFileScanRangeParams make_local_parquet_scan_params() {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_file_type(TFileType::FILE_LOCAL);
+    scan_params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    return scan_params;
+}
+
+std::shared_ptr<io::IOContext> make_io_context(io::FileReaderStats* file_reader_stats,
+                                               io::FileCacheStatistics* file_cache_stats) {
+    auto io_ctx = std::make_shared<io::IOContext>();
+    io_ctx->file_reader_stats = file_reader_stats;
+    io_ctx->file_cache_stats = file_cache_stats;
+    return io_ctx;
+}
+
+TTableFormatFileDesc make_iceberg_table_format_desc(
+        const std::string& data_file_path,
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    TTableFormatFileDesc table_format_params;
+    TIcebergFileDesc iceberg_params;
+    iceberg_params.__set_format_version(2);
+    iceberg_params.__set_original_file_path(data_file_path);
+    iceberg_params.__set_delete_files(delete_files);
+    table_format_params.__set_iceberg_params(iceberg_params);
+    return table_format_params;
+}
+
+std::vector<int32_t> read_iceberg_ids(doris::format::iceberg::IcebergTableReader* reader,
+                                      const std::vector<ColumnDefinition>& projected_columns) {
+    std::vector<int32_t> ids;
+    bool eos = false;
+    while (!eos) {
+        Block block = build_table_block(projected_columns);
+        auto status = reader->get_block(&block, &eos);
+        if (!status.ok()) {
+            ADD_FAILURE() << status;
+            return ids;
+        }
+        if (block.rows() == 0) {
+            continue;
+        }
+        const auto& id_column =
+                assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+        for (size_t row = 0; row < block.rows(); ++row) {
+            ids.push_back(id_column.get_element(row));
+        }
+    }
+    return ids;
+}
+
+DataTypePtr make_table_test_type(const DataTypePtr& type, bool nullable_root = true) {
+    DORIS_CHECK(type != nullptr);
+    const auto nested_type = remove_nullable(type);
+    DataTypePtr result;
+    if (const auto* struct_type = typeid_cast<const DataTypeStruct*>(nested_type.get())) {
+        DataTypes child_types;
+        child_types.reserve(struct_type->get_elements().size());
+        for (const auto& child_type : struct_type->get_elements()) {
+            child_types.push_back(make_table_test_type(child_type));
+        }
+        result = std::make_shared<DataTypeStruct>(child_types, struct_type->get_element_names());
+    } else if (const auto* array_type = typeid_cast<const DataTypeArray*>(nested_type.get())) {
+        result = std::make_shared<DataTypeArray>(
+                make_table_test_type(array_type->get_nested_type()));
+    } else if (const auto* map_type = typeid_cast<const DataTypeMap*>(nested_type.get())) {
+        result = std::make_shared<DataTypeMap>(make_table_test_type(map_type->get_key_type()),
+                                               make_table_test_type(map_type->get_value_type()));
+    } else {
+        result = nested_type;
+    }
+    return nullable_root ? make_nullable(result) : result;
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition column;
+    if (id >= 0) {
+        column.identifier = Field::create_field<TYPE_INT>(id);
+    }
+    column.name = name;
+    // TableReader tests model external table scan descriptors. Those table columns are nullable
+    // even when the Parquet file field itself is required, so keep the test schema aligned with
+    // the real scan contract at the construction boundary.
+    column.type = make_table_test_type(type);
+    return column;
+}
+
+ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(id);
+    field.local_id = id;
+    field.name = name;
+    field.type = make_table_test_type(type);
+    return field;
+}
+
+void set_name_identifiers(std::vector<ColumnDefinition>* columns);
+
+void set_name_identifier(ColumnDefinition* column) {
+    DORIS_CHECK(column != nullptr);
+    column->identifier = Field::create_field<TYPE_STRING>(column->name);
+    set_name_identifiers(&column->children);
+}
+
+void set_name_identifiers(std::vector<ColumnDefinition>* columns) {
+    DORIS_CHECK(columns != nullptr);
+    for (auto& column : *columns) {
+        set_name_identifier(&column);
+    }
+}
+
+void add_column_predicate(TableColumnPredicates* column_predicates, GlobalIndex global_index,
+                          std::shared_ptr<ColumnPredicate> predicate) {
+    auto& entry = (*column_predicates)[global_index];
+    entry.push_back(std::move(predicate));
+}
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto ctx = VExprContext::create_shared(expr);
+    auto status = ctx->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = ctx->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return ctx;
+}
+
+void apply_final_conjuncts(Block* block, const VExprContextSPtrs& conjuncts) {
+    const auto status = VExprContext::filter_block(conjuncts, block, block->columns());
+    ASSERT_TRUE(status.ok()) << status;
+}
+
+TEST(IcebergV2ReaderTest, IcebergVirtualColumnsUseRowLineageMetadata) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_virtual_columns_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_iceberg_last_updated_sequence_number_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(2, 2, 1))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 1000, 77);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 2));
+
+    ASSERT_EQ(block.rows(), 2);
+    EXPECT_EQ(id_column.get_element(0), 2);
+    EXPECT_EQ(id_column.get_element(1), 3);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001, 1002});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {77, 77});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergRowLineageUsesPhysicalRowIdAndFillsNulls) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_physical_row_id_fill_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002},
+                                           {80, std::nullopt, 82});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(
+            2147483540, "_row_id", make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(
+            make_table_column(2147483539, "_last_updated_sequence_number",
+                              make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 1000, 77);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.rows(), 3);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {7000, 1001, 7002});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {80, 77, 82});
+    expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergPhysicalRowIdKeepsNullsWithoutFirstRowId) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_physical_row_id_no_first_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002},
+                                           {80, std::nullopt, 82});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(
+            2147483540, "_row_id", make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(
+            make_table_column(2147483539, "_last_updated_sequence_number",
+                              make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.rows(), 3);
+    expect_nullable_int64_column_optional_values(
+            *block.get_by_position(0).column,
+            std::vector<std::optional<int64_t>> {7000, std::nullopt, 7002});
+    expect_nullable_int64_column_optional_values(
+            *block.get_by_position(1).column,
+            std::vector<std::optional<int64_t>> {80, std::nullopt, 82});
+    expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergMissingRowIdStaysNullWithoutFirstRowId) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_missing_row_id_no_first_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_iceberg_last_updated_sequence_number_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.rows(), 3);
+    expect_nullable_int64_column_optional_values(
+            *block.get_by_position(0).column,
+            std::vector<std::optional<int64_t>> {std::nullopt, std::nullopt, std::nullopt});
+    expect_nullable_int64_column_optional_values(
+            *block.get_by_position(1).column,
+            std::vector<std::optional<int64_t>> {std::nullopt, std::nullopt, std::nullopt});
+    expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergRowIdPredicateFiltersAfterRowLineageMaterialization) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_row_id_finalize_filter_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002},
+                                           {80, std::nullopt, 82});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(
+            2147483540, "_row_id", make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(
+            make_table_column(2147483539, "_last_updated_sequence_number",
+                              make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    VExprContextSPtrs conjuncts = {prepared_conjunct(
+            &state,
+            table_nullable_int64_binary_predicate("eq", TExprOpcode::EQ, 0, 0, "_row_id", 1001))};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = conjuncts,
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 1000, 77);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+
+    apply_final_conjuncts(&block, conjuncts);
+    ASSERT_EQ(block.rows(), 1);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {77});
+    expect_int32_column_values(*block.get_by_position(2).column, {2});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergLastUpdatedSequencePredicateFiltersAfterMaterialization) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_sequence_finalize_filter_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002},
+                                           {80, std::nullopt, 82});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(
+            2147483540, "_row_id", make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(
+            make_table_column(2147483539, "_last_updated_sequence_number",
+                              make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    VExprContextSPtrs conjuncts = {prepared_conjunct(
+            &state, table_nullable_int64_binary_predicate("eq", TExprOpcode::EQ, 1, 1,
+                                                          "_last_updated_sequence_number", 77))};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = conjuncts,
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 1000, 77);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+
+    apply_final_conjuncts(&block, conjuncts);
+    ASSERT_EQ(block.rows(), 1);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {77});
+    expect_int32_column_values(*block.get_by_position(2).column, {2});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergRowidVirtualColumnUsesDataFilePosition) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_rowid_virtual_column_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(
+            make_table_column(-1, BeConsts::ICEBERG_ROWID_COL, make_iceberg_rowid_type()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(1, 1, 1))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    const auto original_file_path = "s3://bucket/table/data/original.parquet";
+    const auto partition_data_json = R"({"part":"p1"})";
+    set_iceberg_rowid_params(&split_options, original_file_path, 17, partition_data_json);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.rows(), 2);
+    expect_iceberg_rowid_column_values(*block.get_by_position(0).column, original_file_path, {1, 2},
+                                       17, partition_data_json);
+    expect_int32_column_values(*block.get_by_position(1).column, {2, 3});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergVirtualColumnsKeepRowLineageAfterConjunctFiltering) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_virtual_columns_conjunct_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_iceberg_last_updated_sequence_number_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(2, 2, 1))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 3000, 88);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 2));
+
+    ASSERT_EQ(block.rows(), 2);
+    EXPECT_EQ(id_column.get_element(0), 2);
+    EXPECT_EQ(id_column.get_element(1), 3);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {3001, 3002});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {88, 88});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergVirtualColumnsKeepRowLineageAfterRowGroupPredicatePruning) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_iceberg_virtual_columns_row_group_predicate_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    // ColumnPredicate is used for row-group/statistics pruning. Keep one row per row group so
+    // id > 2 prunes the first two row groups and leaves only the third file-local row.
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_iceberg_last_updated_sequence_number_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(2),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(2), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 4000, 99);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 2));
+
+    ASSERT_EQ(block.rows(), 1);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {4002});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {99});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergDeletionVectorUsesTableReaderDeleteFileInterface) {
+    TTableFormatFileDesc table_format_desc;
+    TIcebergFileDesc iceberg_desc;
+    iceberg_desc.__set_format_version(2);
+    iceberg_desc.__set_delete_files({make_iceberg_deletion_vector("dv.bin", 8, 128)});
+    table_format_desc.__set_iceberg_params(iceberg_desc);
+
+    IcebergTableReaderDeleteFileTestHelper reader;
+    DeleteFileDesc desc;
+    bool has_delete_file = false;
+    ASSERT_TRUE(reader.parse_deletion_vector_file(table_format_desc, &desc, &has_delete_file).ok());
+
+    EXPECT_TRUE(has_delete_file);
+    EXPECT_EQ(desc.path, "dv.bin");
+    EXPECT_EQ(desc.start_offset, 8);
+    EXPECT_EQ(desc.size, 128);
+    EXPECT_EQ(desc.file_size, -1);
+    EXPECT_EQ(desc.format, DeleteFileDesc::Format::ICEBERG);
+}
+
+TEST(IcebergV2ReaderTest, IcebergDeletionVectorRejectsMultipleDeleteFiles) {
+    TTableFormatFileDesc table_format_desc;
+    TIcebergFileDesc iceberg_desc;
+    iceberg_desc.__set_format_version(2);
+    iceberg_desc.__set_delete_files({make_iceberg_deletion_vector("dv-a.bin", 8, 128),
+                                     make_iceberg_deletion_vector("dv-b.bin", 16, 256)});
+    table_format_desc.__set_iceberg_params(iceberg_desc);
+
+    IcebergTableReaderDeleteFileTestHelper reader;
+    DeleteFileDesc desc;
+    bool has_delete_file = false;
+    auto status = reader.parse_deletion_vector_file(table_format_desc, &desc, &has_delete_file);
+
+    EXPECT_FALSE(status.ok());
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderAppliesDeletionVectorFile) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_deletion_vector_file_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto dv_path = (test_dir / "delete-vector.bin").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"});
+    const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0, 4});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({2, 3, 4}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithDeletes) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_delete_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto dv_path = (test_dir / "delete-vector.bin").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 2);
+    EXPECT_EQ(id_column.get_element(1), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+// Covers TopN lazy materialization on Iceberg schema-evolution tables. The first-phase scan adds a
+// synthesized GLOBAL_ROWID column to the file schema. That virtual column must not make Iceberg
+// fall back from field-id mapping to name mapping, otherwise renamed columns are read as defaults
+// from old files.
+TEST(IcebergV2ReaderTest, IcebergMappingModeIgnoresGlobalRowIdVirtualColumn) {
+    IcebergTableReaderMappingModeTestHelper reader;
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(1, "id", std::make_shared<DataTypeInt32>()),
+            make_file_column(2, "name", std::make_shared<DataTypeString>()),
+            global_rowid_column_definition(),
+    };
+
+    EXPECT_EQ(reader.mapping_mode_for_schema(std::move(file_schema)),
+              TableColumnMappingMode::BY_FIELD_ID);
+}
+
+// Covers the fallback side of the previous case. Only synthesized columns are ignored; a real data
+// column without an Iceberg field id still disables field-id mapping.
+TEST(IcebergV2ReaderTest, IcebergMappingModeRequiresFieldIdsForDataColumns) {
+    IcebergTableReaderMappingModeTestHelper reader;
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(1, "id", std::make_shared<DataTypeInt32>()),
+            make_file_column(2, "name", std::make_shared<DataTypeString>()),
+            global_rowid_column_definition(),
+    };
+    file_schema[1].identifier = Field {};
+
+    EXPECT_EQ(reader.mapping_mode_for_schema(std::move(file_schema)),
+              TableColumnMappingMode::BY_NAME);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithPositionDelete) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_position_delete_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {file_path}, {1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableLevelCountUsesAssignedRowCountWithPositionDelete) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_table_level_count_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {file_path}, {1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TQueryOptions query_options;
+    query_options.__set_batch_size(10);
+    RuntimeState state {query_options, TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)}));
+    set_table_level_row_count(&split_options, 5);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.rows(), 5);
+
+    block = build_table_block(projected_columns);
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.rows(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergPositionDeleteFallsBackToSplitPath) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_position_delete_path_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {file_path}, {1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    TTableFormatFileDesc table_format_params;
+    TIcebergFileDesc iceberg_params;
+    iceberg_params.__set_format_version(2);
+    iceberg_params.__set_delete_files({make_iceberg_position_delete_file(delete_file_path)});
+    table_format_params.__set_iceberg_params(iceberg_params);
+    split_options.current_range.__set_table_format_params(table_format_params);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({1, 3}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithEqualityDelete) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_equality_delete_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "equality-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_iceberg_equality_delete_parquet_file(delete_file_path, 0, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_equality_delete_file(delete_file_path, {0})}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergEqualityDeleteCastsDataColumnToDeleteKeyType) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_equality_delete_cast_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "equality-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_iceberg_equality_delete_bigint_parquet_file(delete_file_path, 0, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_equality_delete_file(delete_file_path, {0})}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({1, 3}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergPositionDeleteOnlyMatchesOriginalDataFilePath) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_iceberg_position_delete_path_match_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto other_file_path = (test_dir / "other.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {other_file_path, file_path}, {0, 1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({1, 3}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergRowLineageRemainsFileLocalAfterDeleteFiltering) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_row_lineage_delete_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {file_path}, {1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    TTableFormatFileDesc table_format_params = make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)});
+    table_format_params.iceberg_params.__set_first_row_id(1000);
+    split_options.current_range.__set_table_format_params(table_format_params);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {1000, 1002});
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderAppliesPositionDeleteFile) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_position_delete_file_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"});
+    write_position_delete_parquet_file(delete_file_path, {file_path, file_path}, {1, 3});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({1, 3, 5}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderMergesDeletionVectorAndPositionDeleteFiles) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_delete_files_merge_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto dv_path = (test_dir / "delete-vector.bin").string();
+    const auto position_delete_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"});
+    const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0});
+    write_position_delete_parquet_file(position_delete_path, {file_path, file_path}, {3, 3});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size),
+                        make_iceberg_position_delete_file(position_delete_path)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({2, 3, 5}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, RowPositionDeletePredicateColumnIsNotRepeatedAsOutputColumn) {
+    const auto row_position_column_id = ROW_POSITION_COLUMN_ID;
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    IcebergTableReaderScanRequestTestHelper reader;
+    ASSERT_TRUE(reader.init_for_scan_request_test(projected_columns).ok());
+
+    FileScanRequest request;
+    request.non_predicate_columns.push_back(field_projection(0));
+    request.local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+
+    ASSERT_TRUE(reader.customize_request(&request).ok());
+
+    EXPECT_EQ(projection_ids(request.predicate_columns),
+              std::vector<int32_t>({row_position_column_id}));
+    EXPECT_EQ(projection_ids(request.non_predicate_columns), std::vector<int32_t>({0}));
+    ASSERT_TRUE(request.local_positions.contains(LocalColumnId(row_position_column_id)));
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(row_position_column_id)).value(), 1);
+    ASSERT_TRUE(request.conjuncts.empty());
+    ASSERT_EQ(request.delete_conjuncts.size(), 1);
+    EXPECT_NE(request.delete_conjuncts[0], nullptr);
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/table/paimon_reader_test.cpp b/be/test/format_v2/table/paimon_reader_test.cpp
new file mode 100644
index 00000000000000..5d0c51f070f76c
--- /dev/null
+++ b/be/test/format_v2/table/paimon_reader_test.cpp
@@ -0,0 +1,537 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/paimon_reader.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "exec/common/endian.h"
+#include "format/format_common.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/ExternalTableSchema_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/io_common.h"
+#include "roaring/roaring.hh"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format {
+namespace {
+
+DataTypePtr table_type(const DataTypePtr& type) {
+    return type->is_nullable() ? type : make_nullable(type);
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_INT>(id);
+    column.name = name;
+    column.type = table_type(type);
+    return column;
+}
+
+ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_INT>(id);
+    column.local_id = id;
+    column.name = name;
+    column.type = type;
+    return column;
+}
+
+schema::external::TFieldPtr external_schema_field(std::string name, int32_t id,
+                                                  std::vector<std::string> aliases = {}) {
+    auto field = std::make_shared<schema::external::TField>();
+    field->__set_name(std::move(name));
+    field->__set_id(id);
+    if (!aliases.empty()) {
+        field->__set_name_mapping(std::move(aliases));
+    }
+    schema::external::TFieldPtr field_ptr;
+    field_ptr.field_ptr = std::move(field);
+    field_ptr.__isset.field_ptr = true;
+    return field_ptr;
+}
+
+schema::external::TFieldPtr external_array_field(std::string name, int32_t id,
+                                                 schema::external::TFieldPtr item_field,
+                                                 std::vector<std::string> aliases = {}) {
+    auto field = external_schema_field(std::move(name), id, std::move(aliases));
+    schema::external::TArrayField array_field;
+    array_field.__set_item_field(std::move(item_field));
+    field.field_ptr->nestedField.__set_array_field(std::move(array_field));
+    field.field_ptr->__isset.nestedField = true;
+    return field;
+}
+
+schema::external::TFieldPtr external_map_field(std::string name, int32_t id,
+                                               schema::external::TFieldPtr key_field,
+                                               schema::external::TFieldPtr value_field,
+                                               std::vector<std::string> aliases = {}) {
+    auto field = external_schema_field(std::move(name), id, std::move(aliases));
+    schema::external::TMapField map_field;
+    map_field.__set_key_field(std::move(key_field));
+    map_field.__set_value_field(std::move(value_field));
+    field.field_ptr->nestedField.__set_map_field(std::move(map_field));
+    field.field_ptr->__isset.nestedField = true;
+    return field;
+}
+
+schema::external::TSchema external_schema(int64_t schema_id,
+                                          std::vector<schema::external::TFieldPtr> fields) {
+    schema::external::TStructField root_field;
+    root_field.__set_fields(std::move(fields));
+    schema::external::TSchema schema;
+    schema.__set_schema_id(schema_id);
+    schema.__set_root_field(std::move(root_field));
+    return schema;
+}
+
+Block build_table_block(const std::vector<ColumnDefinition>& columns) {
+    Block block;
+    for (const auto& column : columns) {
+        block.insert({column.type->create_column(), column.type, column.name});
+    }
+    return block;
+}
+
+const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) {
+    if (!column.is_nullable()) {
+        return column;
+    }
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    for (const auto is_null : nullable_column.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    return nullable_column.get_nested_column();
+}
+
+const IColumn& expect_not_null_table_column(const Block& block, size_t position) {
+    return expect_not_null_nullable_nested_column(*block.get_by_position(position).column);
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder.Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder.Finish(&array).ok());
+    return array;
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, const std::vector<int32_t>& ids,
+                                 const std::vector<int32_t>& scores,
+                                 const std::vector<std::string>& values) {
+    ASSERT_EQ(ids.size(), scores.size());
+    ASSERT_EQ(ids.size(), values.size());
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("score", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores),
+                                             build_string_array(values)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      static_cast<int64_t>(ids.size()),
+                                                      builder.build()));
+}
+
+int64_t write_paimon_deletion_vector_file(const std::string& file_path,
+                                          const std::vector<uint32_t>& deleted_positions) {
+    roaring::Roaring rows;
+    for (const auto position : deleted_positions) {
+        rows.add(position);
+    }
+
+    const size_t bitmap_size = rows.getSizeInBytes();
+    const uint32_t total_length = static_cast<uint32_t>(4 + bitmap_size);
+    std::vector<char> blob(4 + total_length);
+    BigEndian::Store32(blob.data(), total_length);
+    constexpr char PAIMON_BITMAP_MAGIC[] = {'\x5E', '\x43', '\xF2', '\xD0'};
+    memcpy(blob.data() + 4, PAIMON_BITMAP_MAGIC, 4);
+    rows.write(blob.data() + 8);
+
+    std::ofstream output(file_path, std::ios::binary);
+    EXPECT_TRUE(output.is_open());
+    output.write(blob.data(), static_cast<std::streamsize>(blob.size()));
+    EXPECT_TRUE(output.good());
+    // Paimon DeletionFile.length is magic + bitmap length, excluding the leading length field.
+    return static_cast<int64_t>(total_length);
+}
+
+TFileScanRangeParams make_local_parquet_scan_params() {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_file_type(TFileType::FILE_LOCAL);
+    scan_params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    return scan_params;
+}
+
+std::shared_ptr<io::IOContext> make_io_context(io::FileReaderStats* file_reader_stats,
+                                               io::FileCacheStatistics* file_cache_stats) {
+    auto io_ctx = std::make_shared<io::IOContext>();
+    io_ctx->file_reader_stats = file_reader_stats;
+    io_ctx->file_cache_stats = file_cache_stats;
+    return io_ctx;
+}
+
+SplitReadOptions build_split_options(const std::string& file_path) {
+    SplitReadOptions options;
+    options.current_range.__set_path(file_path);
+    options.current_range.__set_file_size(
+            static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    return options;
+}
+
+TTableFormatFileDesc make_paimon_table_format_desc(const std::string& deletion_file_path,
+                                                   int64_t offset, int64_t length) {
+    TTableFormatFileDesc table_format_params;
+    TPaimonFileDesc paimon_params;
+    paimon_params.__set_file_format("parquet");
+    TPaimonDeletionFileDesc deletion_file;
+    deletion_file.__set_path(deletion_file_path);
+    deletion_file.__set_offset(offset);
+    deletion_file.__set_length(length);
+    paimon_params.__set_deletion_file(deletion_file);
+    table_format_params.__set_paimon_params(paimon_params);
+    return table_format_params;
+}
+
+TTableFormatFileDesc make_paimon_schema_table_format_desc(int64_t schema_id) {
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("paimon");
+    TPaimonFileDesc paimon_params;
+    paimon_params.__set_file_format("parquet");
+    paimon_params.__set_schema_id(schema_id);
+    table_format_params.__set_paimon_params(paimon_params);
+    return table_format_params;
+}
+
+TFileRangeDesc make_paimon_native_range(TFileFormatType::type format_type) {
+    TFileRangeDesc range;
+    range.__set_path(format_type == TFileFormatType::FORMAT_ORC ? "s3://bucket/native.orc"
+                                                                : "s3://bucket/native.parquet");
+    range.__set_format_type(format_type);
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("paimon");
+    TPaimonFileDesc paimon_params;
+    paimon_params.__set_file_format(format_type == TFileFormatType::FORMAT_ORC ? "orc" : "parquet");
+    paimon_params.__set_reader_type(TPaimonReaderType::PAIMON_NATIVE);
+    table_format_params.__set_paimon_params(paimon_params);
+    range.__set_table_format_params(table_format_params);
+    return range;
+}
+
+TFileRangeDesc make_paimon_jni_range() {
+    TFileRangeDesc range;
+    range.__set_path("/data-placeholder.parquet");
+    range.__set_format_type(TFileFormatType::FORMAT_JNI);
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("paimon");
+    TPaimonFileDesc paimon_params;
+    paimon_params.__set_file_format("parquet");
+    paimon_params.__set_reader_type(TPaimonReaderType::PAIMON_JNI);
+    paimon_params.__set_paimon_split("serialized-paimon-split");
+    table_format_params.__set_paimon_params(paimon_params);
+    range.__set_table_format_params(table_format_params);
+    return range;
+}
+
+TFileRangeDesc make_paimon_range_without_reader_type(TFileFormatType::type format_type) {
+    TFileRangeDesc range = make_paimon_native_range(format_type);
+    range.table_format_params.paimon_params.__isset.reader_type = false;
+    return range;
+}
+
+// Scenario: PaimonReader shares Hudi's history-schema annotation path. A split whose schema id
+// resolves to a historical schema should use field-id mapping and annotate array/map children so
+// TableColumnMapper can match evolved physical Parquet columns by id instead of by the old names.
+TEST(PaimonReaderTest, AnnotatesArrayAndMapFileSchemaFromSplitHistorySchema) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(
+                    100,
+                    {external_array_field("old_tags", 30,
+                                          external_schema_field("old_item", 31, {"tag"}), {"tags"}),
+                     external_map_field(
+                             "old_props", 40, external_schema_field("old_key", 41, {"key"}),
+                             external_schema_field("old_value", 42, {"score"}), {"props"})}),
+            external_schema(
+                    200, {external_schema_field("tags", 30), external_schema_field("props", 40)}),
+    });
+
+    paimon::PaimonReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_table_format_params(
+            make_paimon_schema_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID);
+
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto int_type = std::make_shared<DataTypeInt32>();
+
+    auto tags = make_file_column(0, "old_tags", std::make_shared<DataTypeArray>(string_type));
+    tags.children = {make_file_column(0, "old_item", string_type)};
+
+    auto props =
+            make_file_column(1, "old_props", std::make_shared<DataTypeMap>(string_type, int_type));
+    props.children = {make_file_column(0, "old_key", string_type),
+                      make_file_column(1, "old_value", int_type)};
+
+    std::vector<ColumnDefinition> file_schema {tags, props};
+    ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok());
+
+    ASSERT_EQ(file_schema.size(), 2);
+    EXPECT_EQ(file_schema[0].get_identifier_field_id(), 30);
+    EXPECT_EQ(file_schema[0].name_mapping, std::vector<std::string>({"tags"}));
+    ASSERT_EQ(file_schema[0].children.size(), 1);
+    EXPECT_EQ(file_schema[0].children[0].get_identifier_field_id(), 31);
+    EXPECT_EQ(file_schema[0].children[0].name_mapping, std::vector<std::string>({"tag"}));
+
+    EXPECT_EQ(file_schema[1].get_identifier_field_id(), 40);
+    EXPECT_EQ(file_schema[1].name_mapping, std::vector<std::string>({"props"}));
+    ASSERT_EQ(file_schema[1].children.size(), 2);
+    EXPECT_EQ(file_schema[1].children[0].get_identifier_field_id(), 41);
+    EXPECT_EQ(file_schema[1].children[0].name_mapping, std::vector<std::string>({"key"}));
+    EXPECT_EQ(file_schema[1].children[1].get_identifier_field_id(), 42);
+    EXPECT_EQ(file_schema[1].children[1].name_mapping, std::vector<std::string>({"score"}));
+}
+
+// Scenario: when FE does not send a matching historical schema for the split schema id, Paimon must
+// stay on BY_NAME mapping and must not rewrite the file schema identifiers.
+TEST(PaimonReaderTest, FallsBackToByNameWhenSplitHistorySchemaIsMissing) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(200, {external_schema_field("name", 10)}),
+    });
+
+    paimon::PaimonReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_table_format_params(
+            make_paimon_schema_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME);
+
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(0, "old_name", std::make_shared<DataTypeString>()),
+    };
+    ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok());
+    EXPECT_EQ(file_schema[0].get_identifier_field_id(), 0);
+    EXPECT_TRUE(file_schema[0].name_mapping.empty());
+}
+
+// Scenario: PaimonReader must clear the previous split schema id before reading a new split. A
+// schema-evolved split must not force the following split without schema id to keep BY_FIELD_ID.
+TEST(PaimonReaderTest, ResetsSplitSchemaIdBeforePreparingNextSplit) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(100, {external_schema_field("old_name", 10, {"name"})}),
+            external_schema(200, {external_schema_field("name", 10)}),
+    });
+
+    paimon::PaimonReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_with_schema_id;
+    split_with_schema_id.current_range.__set_table_format_params(
+            make_paimon_schema_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_with_schema_id).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID);
+
+    SplitReadOptions split_without_schema_id;
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("paimon");
+    table_format_params.__set_paimon_params(TPaimonFileDesc {});
+    split_without_schema_id.current_range.__set_table_format_params(table_format_params);
+    ASSERT_TRUE(reader.prepare_split(split_without_schema_id).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+// Scenario: Paimon reader should parse its bitmap deletion vector and let TableReader apply the
+// generated row-position delete predicate before returning table rows.
+TEST(PaimonReaderTest, AppliesBitmapDeletionVectorFile) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_paimon_deletion_vector_file_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto dv_path = (test_dir / "delete-vector.bin").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"});
+    const auto dv_length = write_paimon_deletion_vector_file(dv_path, {0, 4});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    paimon::PaimonReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(
+            make_paimon_table_format_desc(dv_path, 0, dv_length));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    std::vector<int32_t> ids;
+    bool eos = false;
+    while (!eos) {
+        Block block = build_table_block(projected_columns);
+        ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+        if (block.rows() == 0) {
+            continue;
+        }
+        const auto& id_column =
+                assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+        for (size_t row = 0; row < block.rows(); ++row) {
+            ids.push_back(id_column.get_element(row));
+        }
+    }
+    EXPECT_EQ(ids, std::vector<int32_t>({2, 3, 4}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(PaimonHybridReaderTest, ClassifiesJniSplitByReaderType) {
+    EXPECT_FALSE(paimon::PaimonHybridReader::TEST_is_jni_split(
+            make_paimon_native_range(TFileFormatType::FORMAT_PARQUET)));
+    EXPECT_FALSE(paimon::PaimonHybridReader::TEST_is_jni_split(
+            make_paimon_range_without_reader_type(TFileFormatType::FORMAT_JNI)));
+    EXPECT_TRUE(paimon::PaimonHybridReader::TEST_is_jni_split(make_paimon_jni_range()));
+}
+
+TEST(PaimonHybridReaderTest, ConvertsNativeSplitFileFormat) {
+    FileFormat file_format;
+    ASSERT_TRUE(paimon::PaimonHybridReader::TEST_to_file_format(
+                        make_paimon_native_range(TFileFormatType::FORMAT_PARQUET), &file_format)
+                        .ok());
+    EXPECT_EQ(file_format, FileFormat::PARQUET);
+
+    ASSERT_TRUE(paimon::PaimonHybridReader::TEST_to_file_format(
+                        make_paimon_native_range(TFileFormatType::FORMAT_ORC), &file_format)
+                        .ok());
+    EXPECT_EQ(file_format, FileFormat::ORC);
+
+    auto status =
+            paimon::PaimonHybridReader::TEST_to_file_format(make_paimon_jni_range(), &file_format);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(std::string::npos, status.to_string().find("Unsupported native Paimon file format"));
+}
+
+TEST(PaimonHybridReaderTest, DispatchesNativeThenJniSplitToMatchingReader) {
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+
+    paimon::PaimonHybridReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = {},
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    SplitReadOptions native_split;
+    native_split.current_range = make_paimon_native_range(TFileFormatType::FORMAT_PARQUET);
+    ASSERT_TRUE(reader.prepare_split(native_split).ok());
+
+    SplitReadOptions jni_split;
+    jni_split.current_range = make_paimon_jni_range();
+    auto status = reader.prepare_split(jni_split);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(std::string::npos, status.to_string().find("missing serialized_table"));
+
+    ASSERT_TRUE(reader.close().ok());
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/table_reader_request_test.cpp b/be/test/format_v2/table_reader_request_test.cpp
new file mode 100644
index 00000000000000..3845e086cea1b1
--- /dev/null
+++ b/be/test/format_v2/table_reader_request_test.cpp
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "format_v2/table_reader.h"
+
+namespace doris::format {
+namespace {
+
+class TableReaderRequestTestHelper final : public TableReader {
+public:
+    using TableReader::_append_file_scan_column;
+};
+
+// Scenario: FileScanRequestBuilder owns request-local block positions and merges repeated nested
+// projections for the same root. ColumnMapper can focus on producing file-local projection trees.
+TEST(FileScanRequestBuilderTest, MergesNestedProjectionAndKeepsStableBlockPosition) {
+    FileScanRequest request;
+    FileScanRequestBuilder builder(&request);
+
+    auto name_projection = LocalColumnIndex::partial_local(5);
+    name_projection.children.push_back(LocalColumnIndex::local(2));
+    ASSERT_TRUE(builder.add_non_predicate_column(std::move(name_projection)).ok());
+
+    auto id_projection = LocalColumnIndex::partial_local(5);
+    id_projection.children.push_back(LocalColumnIndex::local(0));
+    ASSERT_TRUE(builder.add_non_predicate_column(std::move(id_projection)).ok());
+
+    ASSERT_EQ(request.local_positions.size(), 1);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(5)).value(), 0);
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(5));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 2);
+    EXPECT_EQ(projection.children[0].local_id(), 0);
+    EXPECT_EQ(projection.children[1].local_id(), 2);
+}
+
+// Scenario: predicate scan columns dominate non-predicate columns because file readers return
+// predicate columns in the same file-local block and TableReader can reuse them for output.
+TEST(FileScanRequestBuilderTest, PredicateColumnRemovesDuplicateNonPredicateColumn) {
+    FileScanRequest request;
+    FileScanRequestBuilder builder(&request);
+
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(2)).ok());
+    ASSERT_TRUE(builder.add_predicate_column(LocalColumnId(1)).ok());
+
+    ASSERT_EQ(request.local_positions.size(), 2);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(1)).value(), 0);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(2)).value(), 1);
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(2));
+}
+
+// Scenario: TableReader's format-specific customization path delegates to FileScanRequestBuilder
+// and preserves the same predicate/non-predicate de-duplication rule.
+TEST(TableReaderRequestTest, AppendPredicateColumnKeepsOtherNonPredicateColumns) {
+    TableReaderRequestTestHelper reader;
+    FileScanRequest request;
+
+    reader._append_file_scan_column(&request, LocalColumnId(1), &request.non_predicate_columns);
+    reader._append_file_scan_column(&request, LocalColumnId(2), &request.non_predicate_columns);
+    reader._append_file_scan_column(&request, LocalColumnId(1), &request.predicate_columns);
+
+    ASSERT_EQ(request.local_positions.size(), 2);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(1)).value(), 0);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(2)).value(), 1);
+
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(2));
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/table_reader_test.cpp b/be/test/format_v2/table_reader_test.cpp
new file mode 100644
index 00000000000000..6f6e96427591d2
--- /dev/null
+++ b/be/test/format_v2/table_reader_test.cpp
@@ -0,0 +1,3609 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table_reader.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <optional>
+#include <string>
+#include <typeinfo>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_array.h"
+#include "core/column/column_const.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/runtime_filter_expr.h"
+#include "exprs/vectorized_fn_call.h"
+#include "exprs/vexpr.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "gen_cpp/Exprs_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+#include "storage/predicate/predicate_creator.h"
+#include "storage/segment/condition_cache.h"
+
+namespace doris::format {
+namespace {
+
+std::vector<int32_t> projection_ids(const std::vector<LocalColumnIndex>& projections) {
+    std::vector<int32_t> ids;
+    ids.reserve(projections.size());
+    for (const auto& projection : projections) {
+        ids.push_back(projection.index);
+    }
+    return ids;
+}
+
+TEST(LocalColumnIndexTest, MergeUnionsPartialChildrenAndFullProjectionDominates) {
+    LocalColumnIndex target {.index = 10, .project_all_children = false};
+    target.children.push_back({.index = 1});
+    target.children.push_back({.index = 2, .project_all_children = false});
+    target.children.back().children.push_back({.index = 20});
+
+    LocalColumnIndex source {.index = 10, .project_all_children = false};
+    source.children.push_back({.index = 2, .project_all_children = false});
+    source.children.back().children.push_back({.index = 21});
+    source.children.push_back({.index = 3});
+
+    ASSERT_TRUE(merge_local_column_index(&target, source).ok());
+    ASSERT_FALSE(target.project_all_children);
+    ASSERT_EQ(std::vector<int32_t>({1, 2, 3}), projection_ids(target.children));
+    ASSERT_FALSE(target.children[1].project_all_children);
+    ASSERT_EQ(std::vector<int32_t>({20, 21}), projection_ids(target.children[1].children));
+    ASSERT_TRUE(target.children[2].project_all_children);
+
+    LocalColumnIndex full_source {.index = 10};
+    ASSERT_TRUE(merge_local_column_index(&target, full_source).ok());
+    ASSERT_TRUE(target.project_all_children);
+    ASSERT_TRUE(target.children.empty());
+}
+
+TEST(LocalColumnIndexTest, FindsProjectedChildren) {
+    LocalColumnIndex projection {.index = 10, .project_all_children = false};
+    projection.children.push_back({.index = 1});
+    projection.children.push_back({.index = 2});
+
+    EXPECT_TRUE(is_full_projection(nullptr));
+    EXPECT_FALSE(is_full_projection(&projection));
+    EXPECT_TRUE(is_partial_projection(&projection));
+    ASSERT_NE(find_child_projection(&projection, 2), nullptr);
+    EXPECT_EQ(find_child_projection(&projection, 2)->local_id(), 2);
+    EXPECT_EQ(find_child_projection(&projection, 3), nullptr);
+    EXPECT_TRUE(is_child_projected(nullptr, 3));
+    EXPECT_TRUE(is_child_projected(&projection, 1));
+    EXPECT_FALSE(is_child_projected(&projection, 3));
+}
+
+TEST(LocalColumnIndexTest, ProjectColumnDefinitionMatchesChildrenByLocalId) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(5);
+    field.name = "root";
+    field.type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    ColumnDefinition a_child;
+    a_child.identifier = Field::create_field<TYPE_INT>(10);
+    a_child.local_id = 0;
+    a_child.name = "a";
+    a_child.type = int_type;
+    ColumnDefinition b_child;
+    b_child.identifier = Field::create_field<TYPE_INT>(20);
+    b_child.local_id = 1;
+    b_child.name = "b";
+    b_child.type = string_type;
+    field.children = {
+            a_child,
+            b_child,
+    };
+    LocalColumnIndex projection {.index = 5, .project_all_children = false};
+    projection.children.push_back({.index = 1});
+
+    ColumnDefinition projected_field;
+    ASSERT_TRUE(project_column_definition(field, projection, &projected_field).ok());
+    ASSERT_EQ(projected_field.children.size(), 1);
+    EXPECT_EQ(projected_field.children[0].get_identifier_field_id(), 20);
+    EXPECT_EQ(projected_field.children[0].name, "b");
+
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(projected_field.type).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "b");
+    EXPECT_TRUE(projected_type->get_element(0)->equals(*string_type));
+}
+
+TEST(LocalColumnIndexTest, ProjectColumnDefinitionKeepsFileChildOrder) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    ColumnDefinition a_child;
+    a_child.identifier = Field::create_field<TYPE_INT>(10);
+    a_child.local_id = 0;
+    a_child.name = "a";
+    a_child.type = int_type;
+    ColumnDefinition b_child;
+    b_child.identifier = Field::create_field<TYPE_INT>(20);
+    b_child.local_id = 1;
+    b_child.name = "b";
+    b_child.type = string_type;
+
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(5);
+    field.name = "root";
+    field.type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    field.children = {a_child, b_child};
+
+    LocalColumnIndex projection {.index = 5, .project_all_children = false};
+    projection.children.push_back({.index = 1});
+    projection.children.push_back({.index = 0});
+
+    ColumnDefinition projected_field;
+    ASSERT_TRUE(project_column_definition(field, projection, &projected_field).ok());
+    ASSERT_EQ(projected_field.children.size(), 2);
+    EXPECT_EQ(projected_field.children[0].name, "a");
+    EXPECT_EQ(projected_field.children[1].name, "b");
+
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(projected_field.type).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 2);
+    EXPECT_EQ(projected_type->get_element_name(0), "a");
+    EXPECT_EQ(projected_type->get_element_name(1), "b");
+}
+
+VExprSPtr table_int32_slot_ref(int slot_id, int column_id, const std::string& column_name) {
+    const auto nullable_int_type = make_nullable(std::make_shared<DataTypeInt32>());
+    return VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int_type, column_name);
+}
+
+VExprSPtr table_int32_literal(int32_t value) {
+    return VLiteral::create_shared(std::make_shared<DataTypeInt32>(),
+                                   Field::create_field<TYPE_INT>(value));
+}
+
+TExprNode table_function_node(const std::string& function_name, const DataTypePtr& return_type,
+                              const std::vector<DataTypePtr>& arg_types,
+                              TExprNodeType::type node_type,
+                              TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE,
+                              bool short_circuit_evaluation = false) {
+    TFunctionName fn_name;
+    fn_name.__set_function_name(function_name);
+    TFunction fn;
+    fn.__set_name(fn_name);
+    fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+    std::vector<TTypeDesc> thrift_arg_types;
+    thrift_arg_types.reserve(arg_types.size());
+    for (const auto& arg_type : arg_types) {
+        thrift_arg_types.push_back(arg_type->to_thrift());
+    }
+    fn.__set_arg_types(thrift_arg_types);
+    fn.__set_ret_type(return_type->to_thrift());
+    fn.__set_has_var_args(false);
+
+    TExprNode node;
+    node.__set_node_type(node_type);
+    node.__set_opcode(opcode);
+    node.__set_type(return_type->to_thrift());
+    node.__set_fn(fn);
+    node.__set_num_children(static_cast<int16_t>(arg_types.size()));
+    node.__set_is_nullable(return_type->is_nullable());
+    if (short_circuit_evaluation) {
+        node.__set_short_circuit_evaluation(true);
+    }
+    return node;
+}
+
+VExprSPtr create_expr_from_node(const TExprNode& node) {
+    VExprSPtr expr;
+    auto status = VExpr::create_expr(node, expr);
+    DORIS_CHECK(status.ok()) << status.to_string();
+    return expr;
+}
+
+VExprSPtr table_function_expr(const std::string& function_name, const DataTypePtr& return_type,
+                              const std::vector<DataTypePtr>& arg_types,
+                              TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL,
+                              TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE) {
+    const auto node = table_function_node(function_name, return_type, arg_types, node_type, opcode);
+    return VectorizedFnCall::create_shared(node);
+}
+
+VExprSPtr table_int32_greater_than_expr(int slot_id, int column_id, int32_t value) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto expr = table_function_expr("gt", make_nullable(std::make_shared<DataTypeUInt8>()),
+                                    {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED,
+                                    TExprOpcode::GT);
+    expr->add_child(table_int32_slot_ref(slot_id, column_id, "id"));
+    expr->add_child(table_int32_literal(value));
+    return expr;
+}
+
+VExprSPtr runtime_filter_wrapper_expr(VExprSPtr impl) {
+    TExprNode node;
+    node.__set_node_type(TExprNodeType::SLOT_REF);
+    node.__set_type(std::make_shared<DataTypeUInt8>()->to_thrift());
+    node.__set_num_children(1);
+    return RuntimeFilterExpr::create_shared(node, std::move(impl), 0, false, /*filter_id=*/1);
+}
+
+class NullableArrayBigintDefaultExpr final : public VExpr {
+public:
+    explicit NullableArrayBigintDefaultExpr(DataTypePtr data_type)
+            : _name("single_element_groups") {
+        _data_type = std::move(data_type);
+    }
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block*, const Selector* selector, size_t count,
+                               ColumnPtr& result_column) const override {
+        DCHECK(selector == nullptr || selector->size() == count);
+        auto values = ColumnInt64::create();
+        auto offsets = ColumnArray::ColumnOffsets::create();
+        auto null_map = ColumnUInt8::create();
+        for (size_t i = 0; i < count; ++i) {
+            values->insert_value(7);
+            offsets->insert_value(static_cast<Int64>(i + 1));
+            null_map->insert_value(0);
+        }
+        auto array_column = ColumnArray::create(std::move(values), std::move(offsets));
+        result_column = ColumnNullable::create(std::move(array_column), std::move(null_map));
+        return Status::OK();
+    }
+
+private:
+    std::string _name;
+};
+
+class TableReaderMaterializeTestHelper final : public TableReader {
+public:
+    using TableReader::_materialize_map_mapping_column;
+};
+
+VExprSPtr table_int32_sum_expr(int left_slot_id, int left_column_id, int right_slot_id,
+                               int right_column_id) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto expr =
+            table_function_expr("add", nullable_int_type, {nullable_int_type, nullable_int_type});
+    expr->add_child(table_int32_slot_ref(left_slot_id, left_column_id, "id"));
+    expr->add_child(table_int32_slot_ref(right_slot_id, right_column_id, "score"));
+    return expr;
+}
+
+VExprSPtr table_int32_sum_greater_than_expr(int left_slot_id, int left_column_id, int right_slot_id,
+                                            int right_column_id, int32_t value) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto expr = table_function_expr("gt", make_nullable(std::make_shared<DataTypeUInt8>()),
+                                    {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED,
+                                    TExprOpcode::GT);
+    expr->add_child(
+            table_int32_sum_expr(left_slot_id, left_column_id, right_slot_id, right_column_id));
+    expr->add_child(table_int32_literal(value));
+    return expr;
+}
+
+VExprSPtr table_condition_function_expr(const std::string& function_name, bool short_circuit) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    std::vector<DataTypePtr> arg_types;
+    if (function_name == "if") {
+        arg_types = {std::make_shared<DataTypeUInt8>(), int_type, int_type};
+    } else {
+        arg_types = {int_type, int_type};
+    }
+    auto expr = create_expr_from_node(
+            table_function_node(function_name, int_type, arg_types, TExprNodeType::FUNCTION_CALL,
+                                TExprOpcode::INVALID_OPCODE, short_circuit));
+    if (function_name == "if") {
+        expr->add_child(table_int32_greater_than_expr(0, 0, 0));
+        expr->add_child(table_int32_literal(1));
+        expr->add_child(table_int32_literal(0));
+    } else {
+        expr->add_child(table_int32_slot_ref(0, 0, "id"));
+        expr->add_child(table_int32_literal(0));
+    }
+    return expr;
+}
+
+VExprSPtr table_case_expr(bool short_circuit) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    TCaseExpr case_node;
+    case_node.__set_has_case_expr(false);
+    case_node.__set_has_else_expr(true);
+
+    TExprNode node;
+    node.__set_node_type(TExprNodeType::CASE_EXPR);
+    node.__set_type(int_type->to_thrift());
+    node.__set_is_nullable(false);
+    node.__set_num_children(3);
+    node.__set_case_expr(case_node);
+    if (short_circuit) {
+        node.__set_short_circuit_evaluation(true);
+    }
+
+    auto expr = create_expr_from_node(node);
+    expr->add_child(table_int32_greater_than_expr(0, 0, 0));
+    expr->add_child(table_int32_literal(1));
+    expr->add_child(table_int32_literal(0));
+    return expr;
+}
+
+TEST(CloneTableExprTreeTest, ClonesConditionalExpressions) {
+    const std::vector<VExprSPtr> expressions {
+            table_condition_function_expr("if", false),
+            table_condition_function_expr("if", true),
+            table_condition_function_expr("ifnull", false),
+            table_condition_function_expr("ifnull", true),
+            table_condition_function_expr("coalesce", false),
+            table_condition_function_expr("coalesce", true),
+            table_case_expr(false),
+            table_case_expr(true),
+    };
+
+    for (const auto& expr : expressions) {
+        VExprSPtr cloned;
+        const auto status = clone_table_expr_tree(expr, &cloned);
+        ASSERT_TRUE(status.ok()) << expr->debug_string() << ": " << status.to_string();
+        ASSERT_NE(cloned, nullptr);
+        const auto* original_expr = expr.get();
+        const auto* cloned_expr = cloned.get();
+        EXPECT_TRUE(typeid(*original_expr) == typeid(*cloned_expr))
+                << expr->expr_name() << " cloned as " << typeid(*cloned_expr).name();
+        EXPECT_EQ(expr->expr_name(), cloned->expr_name());
+        EXPECT_EQ(expr->get_num_children(), cloned->get_num_children());
+        EXPECT_NE(original_expr, cloned_expr);
+    }
+}
+
+// Scenario: cloning a VectorizedFnCall whose return type is complex must not reconstruct the expr
+// from TExprNode, because DataTypeFactory rejects nested types through the primitive-type path.
+TEST(CloneTableExprTreeTest, ClonesVectorizedFnCallWithComplexReturnType) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    const auto array_type = std::make_shared<DataTypeArray>(struct_type);
+
+    auto expr = table_function_expr("element_at", struct_type, {array_type, int_type});
+    expr->add_child(VSlotRef::create_shared(0, 0, -1, array_type, "array_of_struct"));
+    expr->add_child(table_int32_literal(1));
+
+    VExprSPtr cloned;
+    const auto status = clone_table_expr_tree(expr, &cloned);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_NE(cloned, nullptr);
+    EXPECT_EQ(cloned->expr_name(), expr->expr_name());
+    EXPECT_TRUE(cloned->data_type()->equals(*struct_type));
+    EXPECT_EQ(cloned->get_num_children(), 2);
+    EXPECT_NE(cloned.get(), expr.get());
+}
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+void write_parquet_file(const std::string& file_path, int32_t id, const std::string& value) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array({id}), build_string_array({value})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_struct_parquet_file(const std::string& file_path, int32_t id) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false)});
+    arrow::StructBuilder builder(
+            struct_type, arrow::default_memory_pool(),
+            {std::make_shared<arrow::Int32Builder>(arrow::default_memory_pool())});
+    auto* id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(id_builder->Append(id).ok());
+
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      writer_builder.build()));
+}
+
+void write_struct_parquet_file(const std::string& file_path, const std::vector<int32_t>& ids,
+                               int64_t row_group_size = -1) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false)});
+    arrow::StructBuilder builder(
+            struct_type, arrow::default_memory_pool(),
+            {std::make_shared<arrow::Int32Builder>(arrow::default_memory_pool())});
+    auto* id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    for (const auto id : ids) {
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(id_builder->Append(id).ok());
+    }
+
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    const auto write_row_group_size =
+            row_group_size > 0 ? row_group_size : static_cast<int64_t>(ids.size());
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      write_row_group_size,
+                                                      writer_builder.build()));
+}
+
+void write_struct_with_nullable_child_parquet_file(const std::string& file_path) {
+    auto struct_type = arrow::struct_({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("note", arrow::utf8(), true),
+    });
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto id_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(id_builder)));
+    auto note_builder = std::make_unique<arrow::StringBuilder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(note_builder)));
+    arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                 std::move(field_builders));
+    auto* struct_id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    auto* struct_note_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_id_builder->Append(7).ok());
+    EXPECT_TRUE(struct_note_builder->Append("seven").ok());
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_id_builder->Append(8).ok());
+    EXPECT_TRUE(struct_note_builder->AppendNull().ok());
+
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2,
+                                                      writer_builder.build()));
+}
+
+void write_list_struct_parquet_file(const std::string& file_path) {
+    auto struct_type = arrow::struct_(
+            {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::int32(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+    auto b_array_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+    auto struct_builder = std::make_shared<arrow::StructBuilder>(
+            struct_type, arrow::default_memory_pool(), std::move(field_builders));
+    auto list_type = arrow::list(arrow::field("element", struct_type, true));
+    arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder, list_type);
+    auto* a_builder = assert_cast<arrow::Int32Builder*>(struct_builder->field_builder(0));
+    auto* b_builder = assert_cast<arrow::Int32Builder*>(struct_builder->field_builder(1));
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(10).ok());
+    EXPECT_TRUE(b_builder->Append(11).ok());
+    EXPECT_TRUE(struct_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(20).ok());
+    EXPECT_TRUE(b_builder->Append(21).ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(30).ok());
+    EXPECT_TRUE(b_builder->Append(31).ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(40).ok());
+    EXPECT_TRUE(b_builder->Append(41).ok());
+
+    auto schema = arrow::schema({
+            arrow::field("xs", list_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 3,
+                                                      writer_builder.build()));
+}
+
+void write_map_struct_parquet_file(const std::string& file_path) {
+    auto key_builder = std::make_shared<arrow::Int32Builder>();
+    auto struct_type = arrow::struct_(
+            {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+    auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+    auto value_builder = std::make_shared<arrow::StructBuilder>(
+            struct_type, arrow::default_memory_pool(), std::move(field_builders));
+    auto map_type = arrow::map(arrow::int32(), arrow::field("value", struct_type, false));
+    arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, map_type);
+    auto* a_builder = assert_cast<arrow::Int32Builder*>(value_builder->field_builder(0));
+    auto* b_builder = assert_cast<arrow::StringBuilder*>(value_builder->field_builder(1));
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(key_builder->Append(1).ok());
+    EXPECT_TRUE(value_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(10).ok());
+    EXPECT_TRUE(b_builder->Append("ma").ok());
+    EXPECT_TRUE(key_builder->Append(2).ok());
+    EXPECT_TRUE(value_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(20).ok());
+    EXPECT_TRUE(b_builder->Append("mb").ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(key_builder->Append(3).ok());
+    EXPECT_TRUE(value_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(30).ok());
+    EXPECT_TRUE(b_builder->Append("mc").ok());
+
+    EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+    auto schema = arrow::schema({
+            arrow::field("kv", map_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 3,
+                                                      writer_builder.build()));
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, const std::vector<int32_t>& ids,
+                                 const std::vector<int32_t>& scores,
+                                 const std::vector<std::string>& values,
+                                 int64_t row_group_size = -1) {
+    const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"});
+    const auto score_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"1"});
+    const auto value_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2"});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata),
+            arrow::field("score", arrow::int32(), false)->WithMetadata(score_metadata),
+            arrow::field("value", arrow::utf8(), false)->WithMetadata(value_metadata),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores),
+                                             build_string_array(values)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    const auto write_row_group_size =
+            row_group_size > 0 ? row_group_size : static_cast<int64_t>(ids.size());
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      write_row_group_size, builder.build()));
+}
+
+Block build_table_block(const std::vector<ColumnDefinition>& columns) {
+    Block block;
+    for (const auto& column : columns) {
+        block.insert({column.type->create_column(), column.type, column.name});
+    }
+    return block;
+}
+
+const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) {
+    if (!column.is_nullable()) {
+        return column;
+    }
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    for (const auto is_null : nullable_column.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    return nullable_column.get_nested_column();
+}
+
+void expect_nullable_column_all_null(const IColumn& column) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*full_column);
+    for (const auto is_null : nullable_column.get_null_map_data()) {
+        EXPECT_EQ(is_null, 1);
+    }
+}
+
+const IColumn& expect_not_null_table_column(const Block& block, size_t position) {
+    return expect_not_null_nullable_nested_column(*block.get_by_position(position).column);
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type);
+
+void expect_int32_column_values(const IColumn& column,
+                                const std::vector<int32_t>& expected_values) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nested_column = expect_not_null_nullable_nested_column(*full_column);
+    const auto& values = assert_cast<const ColumnInt32&>(nested_column).get_data();
+    ASSERT_EQ(values.size(), expected_values.size());
+    for (size_t row = 0; row < expected_values.size(); ++row) {
+        EXPECT_EQ(values[row], expected_values[row]);
+    }
+}
+
+SplitReadOptions build_split_options(const std::string& file_path) {
+    SplitReadOptions options;
+    options.current_range.__set_path(file_path);
+    options.current_range.__set_file_size(
+            static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    return options;
+}
+
+void set_table_level_row_count(SplitReadOptions* split_options, int64_t row_count) {
+    split_options->current_range.__isset.table_format_params = true;
+    split_options->current_range.table_format_params.__isset.table_level_row_count = true;
+    split_options->current_range.table_format_params.table_level_row_count = row_count;
+}
+
+int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? static_cast<int64_t>(column_metadata.dictionary_page_offset())
+                   : static_cast<int64_t>(column_metadata.data_page_offset());
+}
+
+SplitReadOptions build_split_options_for_row_group_mid(const std::string& file_path,
+                                                       int row_group_idx) {
+    auto options = build_split_options(file_path);
+    auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false);
+    auto metadata = reader->metadata();
+    auto row_group_metadata = metadata->RowGroup(row_group_idx);
+    auto first_column = row_group_metadata->ColumnChunk(0);
+    auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1);
+    const int64_t row_group_start_offset = parquet_column_start_offset(*first_column);
+    const int64_t row_group_end_offset =
+            parquet_column_start_offset(*last_column) + last_column->total_compressed_size();
+    const int64_t row_group_mid_offset =
+            row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2;
+    options.current_range.__set_start_offset(row_group_mid_offset);
+    options.current_range.__set_size(1);
+    return options;
+}
+
+DataTypePtr make_table_test_type(const DataTypePtr& type, bool nullable_root = true) {
+    DORIS_CHECK(type != nullptr);
+    const auto nested_type = remove_nullable(type);
+    DataTypePtr result;
+    if (const auto* struct_type = typeid_cast<const DataTypeStruct*>(nested_type.get())) {
+        DataTypes child_types;
+        child_types.reserve(struct_type->get_elements().size());
+        for (const auto& child_type : struct_type->get_elements()) {
+            child_types.push_back(make_table_test_type(child_type));
+        }
+        result = std::make_shared<DataTypeStruct>(child_types, struct_type->get_element_names());
+    } else if (const auto* array_type = typeid_cast<const DataTypeArray*>(nested_type.get())) {
+        result = std::make_shared<DataTypeArray>(
+                make_table_test_type(array_type->get_nested_type()));
+    } else if (const auto* map_type = typeid_cast<const DataTypeMap*>(nested_type.get())) {
+        result = std::make_shared<DataTypeMap>(make_table_test_type(map_type->get_key_type()),
+                                               make_table_test_type(map_type->get_value_type()));
+    } else {
+        result = nested_type;
+    }
+    return nullable_root ? make_nullable(result) : result;
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition column;
+    if (id >= 0) {
+        column.identifier = Field::create_field<TYPE_INT>(id);
+    }
+    column.name = name;
+    // TableReader tests model external table scan descriptors. Those table columns are nullable
+    // even when the Parquet file field itself is required, so keep the test schema aligned with
+    // the real scan contract at the construction boundary.
+    column.type = make_table_test_type(type);
+    return column;
+}
+
+ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(id);
+    field.local_id = id;
+    field.name = name;
+    field.type = make_table_test_type(type);
+    return field;
+}
+
+ColumnDefinition make_nullable_column_definition(ColumnDefinition column) {
+    column.type = make_table_test_type(column.type);
+    for (auto& child : column.children) {
+        child = make_nullable_column_definition(std::move(child));
+    }
+    return column;
+}
+
+MutableColumnPtr make_not_null_nullable_column(MutableColumnPtr nested_column) {
+    auto null_map = ColumnUInt8::create();
+    for (size_t i = 0; i < nested_column->size(); ++i) {
+        null_map->insert_value(0);
+    }
+    return ColumnNullable::create(std::move(nested_column), std::move(null_map));
+}
+
+class TableReaderCharVarcharTestHelper final : public TableReader {
+public:
+    using TableReader::_should_truncate_char_or_varchar_column;
+    using TableReader::_truncate_char_or_varchar_column;
+};
+
+TEST(TableReaderTest, TruncateCharOrVarcharPredicateOnlyAppliesToParquetStringWidthMismatch) {
+    ColumnMapping mapping;
+    mapping.table_type = std::make_shared<DataTypeString>(3, TYPE_VARCHAR);
+    mapping.file_type = std::make_shared<DataTypeString>(10, TYPE_VARCHAR);
+    EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+
+    mapping.file_type = std::make_shared<DataTypeString>(2, TYPE_VARCHAR);
+    EXPECT_FALSE(
+            TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+
+    mapping.file_type = std::make_shared<DataTypeString>();
+    EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+
+    mapping.file_type = std::make_shared<DataTypeInt32>();
+    EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+
+    mapping.table_type = std::make_shared<DataTypeString>();
+    EXPECT_FALSE(
+            TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+}
+
+TEST(TableReaderTest, TruncateCharOrVarcharColumnKeepsNullMap) {
+    auto nested = ColumnString::create();
+    nested->insert_data("abcdef", 6);
+    nested->insert_data("xyz", 3);
+    auto null_map = ColumnUInt8::create();
+    null_map->insert_value(0);
+    null_map->insert_value(1);
+
+    auto type = make_nullable(std::make_shared<DataTypeString>(3, TYPE_VARCHAR));
+    Block block;
+    block.insert({ColumnNullable::create(std::move(nested), std::move(null_map)), type, "v"});
+
+    TableReaderCharVarcharTestHelper::_truncate_char_or_varchar_column(&block, 0, 3);
+
+    ASSERT_EQ(block.columns(), 1);
+    ASSERT_EQ(block.rows(), 2);
+    const auto* nullable_column =
+            assert_cast<const ColumnNullable*>(block.get_by_position(0).column.get());
+    EXPECT_EQ(nullable_column->get_nested_column().get_data_at(0).to_string(), "abc");
+    EXPECT_FALSE(nullable_column->is_null_at(0));
+    EXPECT_TRUE(nullable_column->is_null_at(1));
+}
+
+void set_name_identifiers(std::vector<ColumnDefinition>* columns);
+
+void set_name_identifier(ColumnDefinition* column) {
+    DORIS_CHECK(column != nullptr);
+    column->identifier = Field::create_field<TYPE_STRING>(column->name);
+    set_name_identifiers(&column->children);
+}
+
+void set_name_identifiers(std::vector<ColumnDefinition>* columns) {
+    DORIS_CHECK(columns != nullptr);
+    for (auto& column : *columns) {
+        set_name_identifier(&column);
+    }
+}
+
+void add_column_predicate(TableColumnPredicates* column_predicates, GlobalIndex global_index,
+                          std::shared_ptr<ColumnPredicate> predicate) {
+    auto& entry = (*column_predicates)[global_index];
+    entry.push_back(std::move(predicate));
+}
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto ctx = VExprContext::create_shared(expr);
+    auto status = ctx->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = ctx->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return ctx;
+}
+
+struct FakeFileReaderState {
+    int init_count = 0;
+    int open_count = 0;
+    int close_count = 0;
+    int64_t total_rows = 2;
+    bool eof_with_first_batch = true;
+    bool inject_delete_conjunct = false;
+    std::shared_ptr<FileScanRequest> last_request;
+    std::shared_ptr<ConditionCacheContext> condition_cache_ctx;
+};
+
+class FakeFileReader final : public FileReader {
+public:
+    FakeFileReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                   std::unique_ptr<io::FileDescription>& file_description,
+                   std::vector<ColumnDefinition> schema, std::shared_ptr<FakeFileReaderState> state)
+            : FileReader(system_properties, file_description, nullptr, nullptr),
+              _schema(std::move(schema)),
+              _state(std::move(state)) {}
+
+    Status init(RuntimeState* state) override {
+        (void)state;
+        ++_state->init_count;
+        _eof = false;
+        return Status::OK();
+    }
+
+    Status get_schema(std::vector<ColumnDefinition>* file_schema) const override {
+        DORIS_CHECK(file_schema != nullptr);
+        *file_schema = _schema;
+        for (auto& column : *file_schema) {
+            column = make_nullable_column_definition(std::move(column));
+        }
+        return Status::OK();
+    }
+
+    Status open(std::shared_ptr<FileScanRequest> request) override {
+        RETURN_IF_ERROR(FileReader::open(std::move(request)));
+        _state->last_request = _request;
+        ++_state->open_count;
+        _returned_batch = false;
+        return Status::OK();
+    }
+
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override {
+        DORIS_CHECK(file_block != nullptr);
+        DORIS_CHECK(rows != nullptr);
+        DORIS_CHECK(eof != nullptr);
+        DORIS_CHECK(_request != nullptr);
+        if (_returned_batch) {
+            *rows = 0;
+            *eof = true;
+            return Status::OK();
+        }
+
+        for (const auto& [file_column_id, block_position] : _request->local_positions) {
+            if (file_column_id == LocalColumnId(0)) {
+                auto column = ColumnInt32::create();
+                column->insert_value(1);
+                column->insert_value(2);
+                file_block->replace_by_position(block_position.value(),
+                                                make_not_null_nullable_column(std::move(column)));
+            } else if (file_column_id == LocalColumnId(1)) {
+                auto column = ColumnString::create();
+                column->insert_data("one", 3);
+                column->insert_data("two", 3);
+                file_block->replace_by_position(block_position.value(),
+                                                make_not_null_nullable_column(std::move(column)));
+            } else if (file_column_id == LocalColumnId(2)) {
+                auto country_values = ColumnString::create();
+                country_values->insert_data("USA", 3);
+                country_values->insert_data("UK", 2);
+                auto country_column = make_not_null_nullable_column(std::move(country_values));
+
+                auto city_column = ColumnString::create();
+                city_column->insert_data("New York", 8);
+                city_column->insert_data("London", 6);
+
+                MutableColumns struct_children;
+                struct_children.push_back(std::move(country_column));
+                struct_children.push_back(make_not_null_nullable_column(std::move(city_column)));
+                auto struct_column = ColumnStruct::create(std::move(struct_children));
+
+                file_block->replace_by_position(
+                        block_position.value(),
+                        make_not_null_nullable_column(std::move(struct_column)));
+            } else {
+                return Status::InvalidArgument("Unexpected fake file column id {}",
+                                               file_column_id.value());
+            }
+        }
+
+        _returned_batch = true;
+        *rows = 2;
+        *eof = _state->eof_with_first_batch;
+        if (_state->condition_cache_ctx != nullptr && !_state->condition_cache_ctx->is_hit &&
+            _state->condition_cache_ctx->filter_result != nullptr &&
+            !_state->condition_cache_ctx->filter_result->empty()) {
+            // The real file reader marks a granule after local row-level predicates keep at least
+            // one row from that granule. The fake reader does it here so TableReader tests can
+            // focus on condition-cache lifecycle decisions without depending on Parquet internals.
+            (*_state->condition_cache_ctx->filter_result)[0] = true;
+        }
+        return Status::OK();
+    }
+
+    void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) override {
+        _state->condition_cache_ctx = std::move(ctx);
+    }
+
+    int64_t get_total_rows() const override { return _state->total_rows; }
+
+    Status close() override {
+        ++_state->close_count;
+        _request.reset();
+        _eof = true;
+        return Status::OK();
+    }
+
+private:
+    std::vector<ColumnDefinition> _schema;
+    std::shared_ptr<FakeFileReaderState> _state;
+    bool _returned_batch = false;
+};
+
+class FakeTableReader final : public TableReader {
+public:
+    FakeTableReader(std::vector<ColumnDefinition> file_schema,
+                    std::shared_ptr<FakeFileReaderState> state)
+            : _file_schema(std::move(file_schema)), _state(std::move(state)) {}
+
+protected:
+    Status create_file_reader(std::unique_ptr<FileReader>* reader) override {
+        DORIS_CHECK(reader != nullptr);
+        auto system_properties = std::make_shared<io::FileSystemProperties>();
+        system_properties->system_type = TFileType::FILE_LOCAL;
+        auto file_description = std::make_unique<io::FileDescription>();
+        file_description->path = "fake-table-reader-input";
+        *reader = std::make_unique<FakeFileReader>(system_properties, file_description,
+                                                   _file_schema, _state);
+        return Status::OK();
+    }
+
+    Status customize_file_scan_request(FileScanRequest* file_request) override {
+        RETURN_IF_ERROR(TableReader::customize_file_scan_request(file_request));
+        if (_state->inject_delete_conjunct) {
+            // Table-format delete handling is represented in v2 by TableReader injecting
+            // delete_conjuncts into the file scan request. The fake reader does not execute it;
+            // this only tests that condition cache is disabled once such table-level delete state
+            // is present in the request.
+            file_request->delete_conjuncts.push_back(
+                    VExprContext::create_shared(table_int32_literal(1)));
+        }
+        return Status::OK();
+    }
+
+private:
+    std::vector<ColumnDefinition> _file_schema;
+    std::shared_ptr<FakeFileReaderState> _state;
+};
+
+class ScopedConditionCacheForTest {
+public:
+    ScopedConditionCacheForTest()
+            : _previous(ExecEnv::GetInstance()->get_condition_cache()),
+              _cache(segment_v2::ConditionCache::create_global_cache(1024 * 1024, 4)) {
+        ExecEnv::GetInstance()->_condition_cache = _cache.get();
+    }
+
+    ~ScopedConditionCacheForTest() { ExecEnv::GetInstance()->_condition_cache = _previous; }
+
+    segment_v2::ConditionCache* get() { return _cache.get(); }
+
+private:
+    segment_v2::ConditionCache* _previous = nullptr;
+    std::unique_ptr<segment_v2::ConditionCache> _cache;
+};
+
+TEST(TableReaderTest, CanUseInjectedFileReaderForStandaloneUnitTest) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+    file_schema.push_back(make_file_column(1, "value", std::make_shared<DataTypeString>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_FALSE(eos);
+
+    ASSERT_EQ(fake_state->init_count, 1);
+    ASSERT_EQ(fake_state->open_count, 1);
+    ASSERT_EQ(fake_state->close_count, 1);
+    ASSERT_NE(fake_state->last_request, nullptr);
+    ASSERT_EQ(fake_state->last_request->local_positions.at(LocalColumnId(1)).value(), 0);
+    ASSERT_EQ(fake_state->last_request->local_positions.at(LocalColumnId(0)).value(), 1);
+    EXPECT_EQ(projection_ids(fake_state->last_request->non_predicate_columns),
+              std::vector<int32_t>({1, 0}));
+    EXPECT_TRUE(fake_state->last_request->predicate_columns.empty());
+
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 0));
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(block.rows(), 2);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "one");
+    EXPECT_EQ(value_column.get_data_at(1).to_string(), "two");
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 2);
+
+    block = build_table_block(projected_columns);
+    eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+}
+
+TEST(TableReaderTest, ComplexRematerializeCastsScalarChildToTableType) {
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto nullable_string_type = make_nullable(string_type);
+    const auto file_struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_string_type, string_type}, Strings {"country", "city"}));
+    auto file_struct_column = make_file_column(2, "struct_column", file_struct_type);
+    file_struct_column.children = {make_file_column(0, "country", nullable_string_type),
+                                   make_file_column(1, "city", string_type)};
+    std::vector<ColumnDefinition> file_schema = {file_struct_column};
+
+    const auto table_struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_string_type, nullable_string_type}, Strings {"country", "city"}));
+    auto country_child = make_table_column(0, "country", nullable_string_type);
+    auto city_child = make_table_column(1, "city", nullable_string_type);
+    auto table_struct_column = make_table_column(2, "struct_column", table_struct_type);
+    table_struct_column.children = {country_child, city_child};
+    std::vector<ColumnDefinition> projected_columns = {table_struct_column};
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    const auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+    ASSERT_TRUE(block.check_type_and_column().ok()) << block.dump_structure();
+
+    const auto& result_nullable =
+            assert_cast<const ColumnNullable&>(*block.get_by_position(0).column);
+    const auto& struct_result =
+            assert_cast<const ColumnStruct&>(result_nullable.get_nested_column());
+    ASSERT_EQ(struct_result.get_columns().size(), 2);
+    const auto& country_column = assert_cast<const ColumnNullable&>(struct_result.get_column(0));
+    const auto& city_column = assert_cast<const ColumnNullable&>(struct_result.get_column(1));
+    const auto& country_values =
+            assert_cast<const ColumnString&>(country_column.get_nested_column());
+    const auto& city_values = assert_cast<const ColumnString&>(city_column.get_nested_column());
+    ASSERT_EQ(city_column.size(), 2);
+    EXPECT_FALSE(city_column.is_null_at(0));
+    EXPECT_FALSE(city_column.is_null_at(1));
+    EXPECT_EQ(country_values.get_data_at(0).to_string(), "USA");
+    EXPECT_EQ(country_values.get_data_at(1).to_string(), "UK");
+    EXPECT_EQ(city_values.get_data_at(0).to_string(), "New York");
+    EXPECT_EQ(city_values.get_data_at(1).to_string(), "London");
+}
+
+TEST(TableReaderTest, ReopenSplitAfterClose) {
+    const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const std::vector<std::string> file_paths = {
+            (test_dir / "split_1.parquet").string(),
+            (test_dir / "split_2.parquet").string(),
+            (test_dir / "split_3.parquet").string(),
+    };
+    write_parquet_file(file_paths[0], 1, "one");
+    write_parquet_file(file_paths[1], 2, "two");
+    write_parquet_file(file_paths[2], 3, "three");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(1, 1, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    // Simulate the scanner lifecycle for three different splits:
+    // init() once, then repeat prepare_split() -> get_block() -> close().
+    // This verifies TableReader::close() fully releases the previous low-level reader and task
+    // state, so a later prepare_split() can open and read a new split on the same TableReader.
+    // The table-level conjunct is also rebuilt for each split. The projection order puts value
+    // before id, so the pushed conjunct has to be rewritten to the ParquetReader file-local block
+    // position every time a new split is opened.
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    for (const auto& file_path : file_paths) {
+        auto split_options = build_split_options(file_path);
+        ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+        Block block = build_table_block(projected_columns);
+        bool eos = false;
+        ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+        ASSERT_FALSE(eos);
+
+        const auto& value_column =
+                assert_cast<const ColumnString&>(expect_not_null_table_column(block, 0));
+        const auto& id_column =
+                assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+        ASSERT_EQ(id_column.size(), 1);
+        ASSERT_EQ(value_column.size(), 1);
+        ids.push_back(id_column.get_element(0));
+        values.push_back(value_column.get_data_at(0).to_string());
+
+        ASSERT_TRUE(reader.close().ok());
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 3}));
+    EXPECT_EQ(values, std::vector<std::string>({"one", "two", "three"}));
+
+    std::filesystem::remove_all(test_dir);
+}
+
+// Scenario: column predicates are pruning hints only. They do not produce a row-level survivor
+// bitmap, so TableReader must not enable condition cache when the scan request has no conjuncts.
+TEST(TableReaderTest, ConditionCacheSkipsColumnPredicateOnlyRequest) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(0),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(0), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .condition_cache_digest = 7,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_EQ(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_EQ(reader.condition_cache_hit_count(), 0);
+    ASSERT_TRUE(reader.close().ok());
+}
+
+// Scenario: runtime filters can arrive late and are not represented by the stable predicate digest.
+// A MISS must not insert a bitmap for `stable predicate AND runtime filter` under the stable digest.
+TEST(TableReaderTest, ConditionCacheSkipsRuntimeFilterConjunct) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(
+            reader.init({
+                                .projected_columns = projected_columns,
+                                .column_predicates = {},
+                                .conjuncts = {prepared_conjunct(
+                                        &state, runtime_filter_wrapper_expr(
+                                                        table_int32_greater_than_expr(0, 0, 0)))},
+                                .format = FileFormat::PARQUET,
+                                .scan_params = nullptr,
+                                .io_ctx = nullptr,
+                                .runtime_state = &state,
+                                .scanner_profile = nullptr,
+                                .condition_cache_digest = 7,
+                        })
+                    .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_EQ(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_EQ(reader.condition_cache_hit_count(), 0);
+    ASSERT_TRUE(reader.close().ok());
+}
+
+// Scenario: table-format delete files/deletion vectors are outside the data-file cache key. When
+// TableReader injects delete conjuncts into the file scan request, condition cache must be disabled
+// for that split.
+TEST(TableReaderTest, ConditionCacheSkipsRequestWithDeleteConjuncts) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    fake_state->inject_delete_conjunct = true;
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .condition_cache_digest = 7,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_EQ(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_EQ(reader.condition_cache_hit_count(), 0);
+    ASSERT_TRUE(reader.close().ok());
+}
+
+// Scenario: a MISS bitmap is safe to publish only after the physical reader reaches EOF. This test
+// returns EOF together with the first batch and verifies TableReader publishes the marked bitmap.
+TEST(TableReaderTest, ConditionCacheMissPublishesBitmapAfterReaderEof) {
+    ScopedConditionCacheForTest cache;
+
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    fake_state->total_rows = ConditionCacheContext::GRANULE_SIZE;
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .condition_cache_digest = 7,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_NE(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_FALSE(fake_state->condition_cache_ctx->is_hit);
+
+    segment_v2::ConditionCache::ExternalCacheKey key("fake-table-reader-input", 0, -1, 7, 0, -1);
+    segment_v2::ConditionCacheHandle handle;
+    ASSERT_TRUE(cache.get()->lookup(key, &handle));
+    const auto cached_bitmap = handle.get_filter_result();
+    ASSERT_NE(cached_bitmap, nullptr);
+    ASSERT_FALSE(cached_bitmap->empty());
+    EXPECT_TRUE((*cached_bitmap)[0]);
+
+    ASSERT_TRUE(reader.close().ok());
+}
+
+// Scenario: LIMIT/cancel can close a reader before it reaches EOF. TableReader must drop the MISS
+// bitmap because unvisited granules would still be false and unsafe for future cache hits.
+TEST(TableReaderTest, ConditionCacheMissIsDroppedWhenReaderClosesBeforeEof) {
+    ScopedConditionCacheForTest cache;
+
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    fake_state->total_rows = ConditionCacheContext::GRANULE_SIZE;
+    fake_state->eof_with_first_batch = false;
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .condition_cache_digest = 7,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_NE(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_FALSE(fake_state->condition_cache_ctx->is_hit);
+
+    ASSERT_TRUE(reader.close().ok());
+    segment_v2::ConditionCache::ExternalCacheKey key("fake-table-reader-input", 0, -1, 7, 0, -1);
+    segment_v2::ConditionCacheHandle handle;
+    EXPECT_FALSE(cache.get()->lookup(key, &handle));
+}
+
+TEST(TableReaderTest, PushDownCountFromNewParquetReader) {
+    const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_count_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"}, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 5);
+    EXPECT_FALSE(is_column_const(*block.get_by_position(0).column));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, TableLevelCountUsesAssignedRowCount) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_table_count_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TQueryOptions query_options;
+    query_options.__set_batch_size(2);
+    RuntimeState state {query_options, TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    auto split_options = build_split_options(file_path);
+    set_table_level_row_count(&split_options, 5);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.rows(), 2);
+
+    block = build_table_block(projected_columns);
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.rows(), 2);
+
+    block = build_table_block(projected_columns);
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.rows(), 1);
+
+    block = build_table_block(projected_columns);
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.rows(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFromNewParquetReader) {
+    const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_minmax_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {3, 1, 5, 2}, {30, 10, 50, 20},
+                                {"three", "one", "five", "two"}, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    projected_columns.push_back(make_table_column(1, "score", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    const auto& score_column =
+            assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 5);
+    EXPECT_EQ(score_column.get_element(0), 10);
+    EXPECT_EQ(score_column.get_element(1), 50);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxCastsFileValueToTableType) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_cast_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {3, 1, 5, 2}, {30, 10, 50, 20},
+                                {"three", "one", "five", "two"}, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt64>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt64&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 5);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFromProjectedStructLeaf) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_struct_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_struct_parquet_file(file_path, {3, 1, 5, 2}, 2);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    auto id_child = make_table_column(0, "id", int_type);
+    auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"id"});
+    auto struct_column = make_table_column(100, "s", struct_type);
+    struct_column.children = {id_child};
+    std::vector<ColumnDefinition> projected_columns = {struct_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& struct_result =
+            assert_cast<const ColumnStruct&>(expect_not_null_table_column(block, 0));
+    ASSERT_EQ(struct_result.get_columns().size(), 1);
+    const auto& ids = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(struct_result.get_column(0)));
+    EXPECT_EQ(ids.get_element(0), 1);
+    EXPECT_EQ(ids.get_element(1), 5);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFallsBackForProjectedListStructLeaf) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_list_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_list_struct_parquet_file(file_path);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto element_type = std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int_type, nullable_int_type}, Strings {"a", "b"});
+    auto nullable_element_type = make_nullable(element_type);
+    auto list_column =
+            make_table_column(100, "xs", std::make_shared<DataTypeArray>(nullable_element_type));
+    std::vector<ColumnDefinition> projected_columns = {list_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& array_result =
+            assert_cast<const ColumnArray&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(array_result.get_offsets()[0], 2);
+    EXPECT_EQ(array_result.get_offsets()[1], 3);
+    EXPECT_EQ(array_result.get_offsets()[2], 4);
+    const auto& nullable_elements = assert_cast<const ColumnNullable&>(array_result.get_data());
+    for (const auto is_null : nullable_elements.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    const auto& element_struct =
+            assert_cast<const ColumnStruct&>(nullable_elements.get_nested_column());
+    ASSERT_EQ(element_struct.get_columns().size(), 2);
+    const auto& a_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(0)));
+    EXPECT_EQ(a_values.get_element(0), 10);
+    EXPECT_EQ(a_values.get_element(1), 20);
+    EXPECT_EQ(a_values.get_element(2), 30);
+    EXPECT_EQ(a_values.get_element(3), 40);
+    const auto& b_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(1)));
+    EXPECT_EQ(b_values.get_element(0), 11);
+    EXPECT_EQ(b_values.get_element(1), 21);
+    EXPECT_EQ(b_values.get_element(2), 31);
+    EXPECT_EQ(b_values.get_element(3), 41);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedListStructReadsSelectedElementChild) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_list_projection_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_list_struct_parquet_file(file_path);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    auto a_child = make_table_column(0, "a", int_type);
+    auto element_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"a"});
+    auto nullable_element_type = make_nullable(element_type);
+    auto element_child = make_table_column(0, "element", nullable_element_type);
+    element_child.children = {a_child};
+    auto list_column =
+            make_table_column(100, "xs", std::make_shared<DataTypeArray>(nullable_element_type));
+    list_column.children = {element_child};
+    std::vector<ColumnDefinition> projected_columns = {list_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& array_result =
+            assert_cast<const ColumnArray&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(array_result.get_offsets()[0], 2);
+    EXPECT_EQ(array_result.get_offsets()[1], 3);
+    EXPECT_EQ(array_result.get_offsets()[2], 4);
+    const auto& nullable_elements = assert_cast<const ColumnNullable&>(array_result.get_data());
+    const auto& element_struct =
+            assert_cast<const ColumnStruct&>(nullable_elements.get_nested_column());
+    ASSERT_EQ(element_struct.get_columns().size(), 1);
+    const auto& a_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(0)));
+    EXPECT_EQ(a_values.get_element(0), 10);
+    EXPECT_EQ(a_values.get_element(1), 20);
+    EXPECT_EQ(a_values.get_element(2), 30);
+    EXPECT_EQ(a_values.get_element(3), 40);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedListStructReordersRenamedAndMissingElementChildren) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_list_schema_evolution_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_list_struct_parquet_file(file_path);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    const auto string_type = std::make_shared<DataTypeString>();
+    auto b_child = make_table_column(1, "renamed_b", nullable_int_type);
+    b_child.name_mapping = {"b"};
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto a_child = make_table_column(0, "renamed_a", nullable_int_type);
+    a_child.name_mapping = {"a"};
+    auto element_type = std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int_type, string_type, nullable_int_type},
+            Strings {"renamed_b", "missing_child", "renamed_a"});
+    auto nullable_element_type = make_nullable(element_type);
+    auto element_child = make_table_column(0, "element", nullable_element_type);
+    element_child.children = {b_child, missing_child, a_child};
+    auto list_column =
+            make_table_column(100, "xs", std::make_shared<DataTypeArray>(nullable_element_type));
+    list_column.children = {element_child};
+    std::vector<ColumnDefinition> projected_columns = {list_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& array_result =
+            assert_cast<const ColumnArray&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(array_result.get_offsets()[0], 2);
+    EXPECT_EQ(array_result.get_offsets()[1], 3);
+    EXPECT_EQ(array_result.get_offsets()[2], 4);
+    const auto& nullable_elements = assert_cast<const ColumnNullable&>(array_result.get_data());
+    const auto& element_struct =
+            assert_cast<const ColumnStruct&>(nullable_elements.get_nested_column());
+    ASSERT_EQ(element_struct.get_columns().size(), 3);
+    const auto& b_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(0)));
+    const auto& missing_values = element_struct.get_column(1);
+    const auto& a_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(2)));
+    EXPECT_EQ(b_values.get_element(0), 11);
+    EXPECT_EQ(b_values.get_element(1), 21);
+    EXPECT_EQ(b_values.get_element(2), 31);
+    EXPECT_EQ(b_values.get_element(3), 41);
+    expect_nullable_column_all_null(missing_values);
+    EXPECT_EQ(a_values.get_element(0), 10);
+    EXPECT_EQ(a_values.get_element(1), 20);
+    EXPECT_EQ(a_values.get_element(2), 30);
+    EXPECT_EQ(a_values.get_element(3), 40);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+// Scenario: when every projected array-element struct child is missing/default-only, the reader
+// still receives a full element projection and can materialize the default child without crashing.
+TEST(TableReaderTest, ProjectedListStructOnlyMissingElementChildFallsBackToFullElement) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_list_only_missing_child_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_list_struct_parquet_file(file_path);
+
+    const auto string_type = std::make_shared<DataTypeString>();
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto element_type =
+            std::make_shared<DataTypeStruct>(DataTypes {string_type}, Strings {"missing_child"});
+    auto nullable_element_type = make_nullable(element_type);
+    auto element_child = make_table_column(0, "element", nullable_element_type);
+    element_child.children = {missing_child};
+    auto list_column =
+            make_table_column(100, "xs", std::make_shared<DataTypeArray>(nullable_element_type));
+    list_column.children = {element_child};
+    std::vector<ColumnDefinition> projected_columns = {list_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& array_result =
+            assert_cast<const ColumnArray&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(array_result.get_offsets()[0], 2);
+    EXPECT_EQ(array_result.get_offsets()[1], 3);
+    EXPECT_EQ(array_result.get_offsets()[2], 4);
+    const auto& nullable_elements = assert_cast<const ColumnNullable&>(array_result.get_data());
+    const auto& element_struct =
+            assert_cast<const ColumnStruct&>(nullable_elements.get_nested_column());
+    ASSERT_EQ(element_struct.get_columns().size(), 1);
+    expect_nullable_column_all_null(element_struct.get_column(0));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFallsBackForProjectedMapValueStructLeaf) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_map_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_map_struct_parquet_file(file_path);
+
+    const auto key_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto nullable_string_type = make_nullable(string_type);
+    auto b_child = make_table_column(1, "b", nullable_string_type);
+    auto value_type =
+            std::make_shared<DataTypeStruct>(DataTypes {nullable_string_type}, Strings {"b"});
+    auto nullable_value_type = make_nullable(value_type);
+    auto value_child = make_table_column(1, "value", nullable_value_type);
+    value_child.children = {b_child};
+    auto map_column = make_table_column(
+            100, "kv", std::make_shared<DataTypeMap>(key_type, nullable_value_type));
+    map_column.children = {value_child};
+    std::vector<ColumnDefinition> projected_columns = {map_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& map_result = assert_cast<const ColumnMap&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(map_result.get_offsets()[0], 2);
+    EXPECT_EQ(map_result.get_offsets()[1], 3);
+    EXPECT_EQ(map_result.get_offsets()[2], 3);
+    const auto& keys = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(map_result.get_keys()));
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 3);
+    const auto& nullable_values = assert_cast<const ColumnNullable&>(map_result.get_values());
+    for (const auto is_null : nullable_values.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    const auto& value_struct =
+            assert_cast<const ColumnStruct&>(nullable_values.get_nested_column());
+    ASSERT_EQ(value_struct.get_columns().size(), 1);
+    const auto& b_values = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(value_struct.get_column(0)));
+    EXPECT_EQ(b_values.get_data_at(0).to_string(), "ma");
+    EXPECT_EQ(b_values.get_data_at(1).to_string(), "mb");
+    EXPECT_EQ(b_values.get_data_at(2).to_string(), "mc");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedMapValueStructReordersRenamedAndMissingChildren) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_map_schema_evolution_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_map_struct_parquet_file(file_path);
+
+    const auto key_type = std::make_shared<DataTypeInt32>();
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto nullable_string_type = make_nullable(string_type);
+    auto b_child = make_table_column(1, "renamed_b", nullable_string_type);
+    b_child.name_mapping = {"b"};
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto a_child = make_table_column(0, "renamed_a", nullable_int_type);
+    a_child.name_mapping = {"a"};
+    auto value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_string_type, string_type, nullable_int_type},
+            Strings {"renamed_b", "missing_child", "renamed_a"});
+    auto nullable_value_type = make_nullable(value_type);
+    auto value_child = make_table_column(1, "value", nullable_value_type);
+    value_child.children = {b_child, missing_child, a_child};
+    auto map_column = make_table_column(
+            100, "kv", std::make_shared<DataTypeMap>(key_type, nullable_value_type));
+    map_column.children = {value_child};
+    std::vector<ColumnDefinition> projected_columns = {map_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& map_result = assert_cast<const ColumnMap&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(map_result.get_offsets()[0], 2);
+    EXPECT_EQ(map_result.get_offsets()[1], 3);
+    EXPECT_EQ(map_result.get_offsets()[2], 3);
+    const auto& keys = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(map_result.get_keys()));
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 3);
+    const auto& nullable_values = assert_cast<const ColumnNullable&>(map_result.get_values());
+    const auto& value_struct =
+            assert_cast<const ColumnStruct&>(nullable_values.get_nested_column());
+    ASSERT_EQ(value_struct.get_columns().size(), 3);
+    const auto& b_values = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(value_struct.get_column(0)));
+    const auto& missing_values = value_struct.get_column(1);
+    const auto& a_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(value_struct.get_column(2)));
+    EXPECT_EQ(b_values.get_data_at(0).to_string(), "ma");
+    EXPECT_EQ(b_values.get_data_at(1).to_string(), "mb");
+    EXPECT_EQ(b_values.get_data_at(2).to_string(), "mc");
+    expect_nullable_column_all_null(missing_values);
+    EXPECT_EQ(a_values.get_element(0), 10);
+    EXPECT_EQ(a_values.get_element(1), 20);
+    EXPECT_EQ(a_values.get_element(2), 30);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, MaterializeMapKeyStructReordersRenamedChildren) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto file_key_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    const auto table_key_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, int_type}, Strings {"renamed_b", "renamed_a"});
+    const auto file_map_type = std::make_shared<DataTypeMap>(file_key_type, int_type);
+    const auto table_map_type = std::make_shared<DataTypeMap>(table_key_type, int_type);
+
+    ColumnMapping a_mapping;
+    a_mapping.table_column_name = "renamed_a";
+    a_mapping.file_column_name = "a";
+    a_mapping.file_local_id = 0;
+    a_mapping.table_type = int_type;
+    a_mapping.file_type = int_type;
+    a_mapping.is_trivial = true;
+
+    ColumnMapping b_mapping;
+    b_mapping.table_column_name = "renamed_b";
+    b_mapping.file_column_name = "b";
+    b_mapping.file_local_id = 1;
+    b_mapping.table_type = string_type;
+    b_mapping.file_type = string_type;
+    b_mapping.is_trivial = true;
+
+    ColumnMapping key_mapping;
+    key_mapping.table_column_name = "key";
+    key_mapping.file_column_name = "key";
+    key_mapping.file_local_id = 0;
+    key_mapping.table_type = table_key_type;
+    key_mapping.file_type = file_key_type;
+    key_mapping.is_trivial = false;
+    key_mapping.child_mappings = {b_mapping, a_mapping};
+
+    ColumnMapping value_mapping;
+    value_mapping.table_column_name = "value";
+    value_mapping.file_column_name = "value";
+    value_mapping.file_local_id = 1;
+    value_mapping.table_type = int_type;
+    value_mapping.file_type = int_type;
+    value_mapping.is_trivial = true;
+
+    ColumnMapping map_mapping;
+    map_mapping.table_column_name = "kv";
+    map_mapping.file_column_name = "kv";
+    map_mapping.table_type = table_map_type;
+    map_mapping.file_type = file_map_type;
+    map_mapping.is_trivial = false;
+    map_mapping.child_mappings = {key_mapping, value_mapping};
+
+    auto a_keys = ColumnInt32::create();
+    a_keys->insert_value(10);
+    a_keys->insert_value(20);
+    a_keys->insert_value(30);
+    auto b_keys = ColumnString::create();
+    b_keys->insert_value("x");
+    b_keys->insert_value("y");
+    b_keys->insert_value("z");
+    MutableColumns key_children;
+    key_children.push_back(std::move(a_keys));
+    key_children.push_back(std::move(b_keys));
+    auto key_column = ColumnStruct::create(std::move(key_children));
+
+    auto value_column = ColumnInt32::create();
+    value_column->insert_value(100);
+    value_column->insert_value(200);
+    value_column->insert_value(300);
+    auto offsets_column = ColumnArray::ColumnOffsets::create();
+    offsets_column->insert_value(2);
+    offsets_column->insert_value(3);
+    ColumnPtr file_column = ColumnMap::create(std::move(key_column), std::move(value_column),
+                                              std::move(offsets_column));
+
+    TableReaderMaterializeTestHelper reader;
+    ColumnPtr result_column;
+    ASSERT_TRUE(reader._materialize_map_mapping_column(map_mapping, file_column, 2, &result_column)
+                        .ok());
+
+    const auto& result_map = assert_cast<const ColumnMap&>(*result_column);
+    EXPECT_EQ(result_map.get_offsets()[0], 2);
+    EXPECT_EQ(result_map.get_offsets()[1], 3);
+    const auto& result_key = assert_cast<const ColumnStruct&>(result_map.get_keys());
+    ASSERT_EQ(result_key.get_columns().size(), 2);
+    const auto& b_result = assert_cast<const ColumnString&>(result_key.get_column(0));
+    const auto& a_result = assert_cast<const ColumnInt32&>(result_key.get_column(1));
+    EXPECT_EQ(b_result.get_data_at(0).to_string(), "x");
+    EXPECT_EQ(b_result.get_data_at(1).to_string(), "y");
+    EXPECT_EQ(b_result.get_data_at(2).to_string(), "z");
+    EXPECT_EQ(a_result.get_element(0), 10);
+    EXPECT_EQ(a_result.get_element(1), 20);
+    EXPECT_EQ(a_result.get_element(2), 30);
+
+    const auto& result_value = assert_cast<const ColumnInt32&>(result_map.get_values());
+    EXPECT_EQ(result_value.get_element(0), 100);
+    EXPECT_EQ(result_value.get_element(1), 200);
+    EXPECT_EQ(result_value.get_element(2), 300);
+}
+
+// Scenario: map value struct materialization follows DataTypeStruct field order even when
+// ColumnMapping children arrive in a different order from projected ColumnDefinition children.
+TEST(TableReaderTest, MaterializeMapValueStructUsesTableTypeOrder) {
+    const auto key_type = std::make_shared<DataTypeString>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto file_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, string_type}, Strings {"full_name", "gender"});
+    const auto table_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, string_type}, Strings {"full_name", "gender"});
+    const auto file_map_type = std::make_shared<DataTypeMap>(key_type, file_value_type);
+    const auto table_map_type = std::make_shared<DataTypeMap>(key_type, table_value_type);
+
+    ColumnMapping full_name_mapping;
+    full_name_mapping.table_column_name = "full_name";
+    full_name_mapping.file_column_name = "full_name";
+    full_name_mapping.file_local_id = 0;
+    full_name_mapping.table_type = string_type;
+    full_name_mapping.file_type = string_type;
+    full_name_mapping.is_trivial = true;
+
+    ColumnMapping gender_mapping;
+    gender_mapping.table_column_name = "gender";
+    gender_mapping.file_column_name = "gender";
+    gender_mapping.file_local_id = 1;
+    gender_mapping.table_type = string_type;
+    gender_mapping.file_type = string_type;
+    gender_mapping.is_trivial = true;
+
+    ColumnMapping value_mapping;
+    value_mapping.table_column_name = "value";
+    value_mapping.file_column_name = "value";
+    value_mapping.file_local_id = 1;
+    value_mapping.table_type = table_value_type;
+    value_mapping.file_type = file_value_type;
+    value_mapping.is_trivial = false;
+    value_mapping.child_mappings = {gender_mapping, full_name_mapping};
+
+    ColumnMapping key_mapping;
+    key_mapping.table_column_name = "key";
+    key_mapping.file_column_name = "key";
+    key_mapping.file_local_id = 0;
+    key_mapping.table_type = key_type;
+    key_mapping.file_type = key_type;
+    key_mapping.is_trivial = true;
+
+    ColumnMapping map_mapping;
+    map_mapping.table_column_name = "new_map_column";
+    map_mapping.file_column_name = "new_map_column";
+    map_mapping.table_type = table_map_type;
+    map_mapping.file_type = file_map_type;
+    map_mapping.is_trivial = false;
+    map_mapping.child_mappings = {key_mapping, value_mapping};
+
+    auto key_column = ColumnString::create();
+    key_column->insert_value("person10");
+    key_column->insert_value("person20");
+
+    auto full_name_column = ColumnString::create();
+    full_name_column->insert_value("Jack");
+    full_name_column->insert_value("James Lee");
+    auto gender_column = ColumnString::create();
+    gender_column->insert_value("Male");
+    gender_column->insert_value("Male");
+    MutableColumns value_children;
+    value_children.push_back(std::move(full_name_column));
+    value_children.push_back(std::move(gender_column));
+    auto value_column = ColumnStruct::create(std::move(value_children));
+
+    auto offsets_column = ColumnArray::ColumnOffsets::create();
+    offsets_column->insert_value(1);
+    offsets_column->insert_value(2);
+    ColumnPtr file_column = ColumnMap::create(std::move(key_column), std::move(value_column),
+                                              std::move(offsets_column));
+
+    TableReaderMaterializeTestHelper reader;
+    ColumnPtr result_column;
+    ASSERT_TRUE(reader._materialize_map_mapping_column(map_mapping, file_column, 2, &result_column)
+                        .ok());
+
+    const auto& result_map = assert_cast<const ColumnMap&>(*result_column);
+    const auto& result_value = assert_cast<const ColumnStruct&>(result_map.get_values());
+    ASSERT_EQ(result_value.get_columns().size(), 2);
+    const auto& full_name_result = assert_cast<const ColumnString&>(result_value.get_column(0));
+    const auto& gender_result = assert_cast<const ColumnString&>(result_value.get_column(1));
+    EXPECT_EQ(full_name_result.get_data_at(0).to_string(), "Jack");
+    EXPECT_EQ(full_name_result.get_data_at(1).to_string(), "James Lee");
+    EXPECT_EQ(gender_result.get_data_at(0).to_string(), "Male");
+    EXPECT_EQ(gender_result.get_data_at(1).to_string(), "Male");
+}
+
+TEST(TableReaderTest, PushDownMinMaxOnlyUsesSelectedRowGroupInFileRange) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_range_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {10, 1, 100}, {100, 10, 1000}, {"ten", "one", "hundred"},
+                                1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 1)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownCountOnlyUsesSelectedRowGroupInFileRange) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_count_range_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 2)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownCountFallsBackWithTableConjunct) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_count_conjunct_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 2))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 1);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownCountFallsBackWithColumnPredicate) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_count_predicate_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(0),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(2), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 1);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFallsBackWithoutDirectFileMapping) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_minmax_missing_mapping_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(
+            make_table_column(99, "missing_id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 1);
+    expect_nullable_column_all_null(*block.get_by_position(0).column);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, OpenReaderBuildsTableFiltersFromConjuncts) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_conjunct_filter_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 3, "three");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(1, 1, 2))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // open_reader() should convert the table-level conjunct on projected column id 1 into
+    // _table_filters before ColumnMapper creates the FileScanRequest. ColumnMapper then rewrites
+    // the conjunct's slot ref from table column id 1 to the file-local block position used by
+    // ParquetReader. The projection order intentionally puts value before id, so the id filter
+    // column is not at position 0 in the file block.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(id_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+
+    TableReader filtered_reader;
+    ASSERT_TRUE(filtered_reader
+                        .init({
+                                .projected_columns = projected_columns,
+                                .column_predicates = {},
+                                .conjuncts = {prepared_conjunct(
+                                        &state, table_int32_greater_than_expr(1, 1, 4))},
+                                .format = FileFormat::PARQUET,
+                                .scan_params = nullptr,
+                                .io_ctx = nullptr,
+                                .runtime_state = &state,
+                                .scanner_profile = nullptr,
+                        })
+                        .ok());
+    ASSERT_TRUE(filtered_reader.prepare_split(build_split_options(file_path)).ok());
+
+    block = build_table_block(projected_columns);
+    eos = false;
+    ASSERT_TRUE(filtered_reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.get_by_position(1).column->size(), 0);
+
+    ASSERT_TRUE(filtered_reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, OpenReaderBuildsColumnPredicateFilters) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_column_predicate_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    // ColumnPredicate is only used for row-group/statistics pruning. Keep one row per row
+    // group so the predicate can prune the first two row groups and leave only id = 3.
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {1, 5, 8}, {"one", "two", "three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(2, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(1),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(2), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 0));
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(id_column.size(), 1);
+    ASSERT_EQ(value_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "three");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ColumnPredicateSurvivesReopenSplit) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_predicate_reopen_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const std::vector<std::string> file_paths = {
+            (test_dir / "split_1.parquet").string(),
+            (test_dir / "split_2.parquet").string(),
+    };
+    write_int_pair_parquet_file(file_paths[0], {1, 3}, {10, 30}, {"one", "three"}, 1);
+    write_int_pair_parquet_file(file_paths[1], {2, 4}, {20, 40}, {"two", "four"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(0),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(2), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    std::vector<int32_t> ids;
+    for (const auto& file_path : file_paths) {
+        ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+        Block block = build_table_block(projected_columns);
+        bool eos = false;
+        ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+        ASSERT_FALSE(eos);
+        const auto& id_column =
+                assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+        ASSERT_EQ(id_column.size(), 1);
+        ids.push_back(id_column.get_element(0));
+
+        ASSERT_TRUE(reader.close().ok());
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({3, 4}));
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, CreateScanRequestDeduplicatesSharedPredicateColumns) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const std::vector<ColumnDefinition> projected_columns = {
+            make_table_column(0, "a", int_type),
+            make_table_column(1, "b", int_type),
+            make_table_column(2, "c", int_type),
+            make_table_column(3, "value", std::make_shared<DataTypeString>()),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            make_file_column(0, "a", int_type),
+            make_file_column(1, "b", int_type),
+            make_file_column(2, "c", int_type),
+            make_file_column(3, "value", std::make_shared<DataTypeString>()),
+    };
+
+    TableColumnMapper mapper;
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok());
+
+    std::vector<TableFilter> table_filters;
+    table_filters.push_back({
+            // This test only needs the referenced global indices to drive predicate-column
+            // placement. Keep the conjunct empty so the assertion focuses on scan-column
+            // de-duplication rather than expression rewrite/prepare behavior.
+            .conjunct = nullptr,
+            .global_indices = {GlobalIndex(0), GlobalIndex(1)},
+    });
+    table_filters.push_back({
+            .conjunct = nullptr,
+            .global_indices = {GlobalIndex(0), GlobalIndex(2)},
+    });
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request(table_filters, {}, projected_columns, &file_request).ok());
+
+    // Both filters reference column a. It must still be read once as a predicate column, and a
+    // predicate column must not be repeated as a non-predicate column.
+    EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0, 1, 2}));
+    EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector<int32_t>({3}));
+    ASSERT_EQ(file_request.local_positions.size(), 4);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(3)).value(), 0);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(0)).value(), 1);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(1)).value(), 2);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(2)).value(), 3);
+    const auto predicate_column_ids = projection_ids(file_request.predicate_columns);
+    const auto non_predicate_column_ids = projection_ids(file_request.non_predicate_columns);
+    for (const auto predicate_column_id : predicate_column_ids) {
+        EXPECT_TRUE(std::find(non_predicate_column_ids.begin(), non_predicate_column_ids.end(),
+                              predicate_column_id) == non_predicate_column_ids.end());
+    }
+}
+
+TEST(TableReaderTest, CreateScanRequestPromotesProjectedColumnToPredicateColumn) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const std::vector<ColumnDefinition> projected_columns = {
+            make_table_column(0, "id", int_type),
+            make_table_column(1, "score", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            make_file_column(0, "id", int_type),
+            make_file_column(1, "score", int_type),
+    };
+
+    TableColumnMapper mapper;
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok());
+
+    TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(table_int32_greater_than_expr(0, 0, 1)),
+            .global_indices = {GlobalIndex(0)},
+    };
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request).ok());
+
+    EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector<int32_t>({1}));
+    ASSERT_EQ(file_request.local_positions.size(), 2);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(0)).value(), 1);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(1)).value(), 0);
+}
+
+TEST(TableReaderTest, CreateScanRequestUsesColumnNameForByNamePredicateMapping) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    std::vector<ColumnDefinition> projected_columns = {
+            make_table_column(10, "id", int_type),
+            make_table_column(11, "score", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            make_file_column(0, "ID", int_type),
+            make_file_column(1, "score", int_type),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    set_name_identifiers(&projected_columns);
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok());
+
+    TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(table_int32_greater_than_expr(0, 0, 1)),
+            .global_indices = {GlobalIndex(0)},
+    };
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request).ok());
+
+    EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector<int32_t>({1}));
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(file_request.conjuncts[0]->root()->children()[0].get());
+    EXPECT_EQ(localized_slot->slot_id(), 0);
+    EXPECT_EQ(localized_slot->column_id(), 1);
+}
+
+TEST(TableReaderTest, ColumnPredicateFilterUsesColumnNameForByNameMapping) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    std::vector<ColumnDefinition> projected_columns = {
+            make_table_column(10, "id", int_type),
+            make_table_column(11, "score", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            make_file_column(0, "ID", int_type),
+            make_file_column(1, "score", int_type),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    set_name_identifiers(&projected_columns);
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok());
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(
+            &column_predicates, GlobalIndex(0),
+            create_comparison_predicate<PredicateType::GT>(
+                    10, "id", make_nullable(int_type), Field::create_field<TYPE_INT>(2), false));
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(mapper.create_scan_request({}, column_predicates, projected_columns, &file_request)
+                        .ok());
+
+    ASSERT_EQ(file_request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(file_request.column_predicate_filters[0].file_column_id.value(), 0);
+    EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector<int32_t>({0, 1}));
+    EXPECT_TRUE(file_request.predicate_columns.empty());
+}
+
+TEST(TableReaderTest, OpenReaderPushesMultiColumnConjunctToParquetReader) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_multi_conjunct_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {1, 5, 8}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(2, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    projected_columns.push_back(make_table_column(1, "score", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(
+            reader.init({
+                                .projected_columns = projected_columns,
+                                .column_predicates = {},
+                                .conjuncts = {prepared_conjunct(
+                                        &state, table_int32_sum_greater_than_expr(1, 1, 2, 2, 8))},
+                                .format = FileFormat::PARQUET,
+                                .scan_params = nullptr,
+                                .io_ctx = nullptr,
+                                .runtime_state = &state,
+                                .scanner_profile = nullptr,
+                        })
+                    .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // The conjunct references both id and score, so ColumnMapper must put both file columns into
+    // predicate_columns and rewrite both slot refs to ParquetReader's file-local block positions.
+    // ParquetReader then evaluates the expression after all predicate columns have been read.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 0));
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    const auto& score_column =
+            assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 2));
+    ASSERT_EQ(id_column.size(), 1);
+    ASSERT_EQ(score_column.size(), 1);
+    ASSERT_EQ(value_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    EXPECT_EQ(score_column.get_element(0), 8);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "three");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedColumnsFillDefaultForParquetSchemaMismatch) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_schema_mismatch_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(
+            make_table_column(99, "missing_value", std::make_shared<DataTypeString>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // The table projection asks for field id 99, but the ParquetReader exposes only file-local
+    // fields 0 and 1. Missing columns are allowed by the current mapper options, so TableReader
+    // should still use the Parquet row count and fill a default column in table schema.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.get_by_position(0).column->size(), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, DefaultExprResultMatchesNullableTableType) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_nullable_default_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    auto missing_column = make_table_column(99, "c_new", make_nullable(int_type));
+    missing_column.default_expr = VExprContext::create_shared(
+            VLiteral::create_shared(int_type, Field::create_field<TYPE_INT>(42)));
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(std::move(missing_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+
+    const auto& result = block.get_by_position(0);
+    ASSERT_TRUE(result.check_type_and_column_match().ok());
+    EXPECT_TRUE(result.type->is_nullable());
+    ASSERT_TRUE(result.column->is_nullable());
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(nullable_column.size(), 1);
+    EXPECT_EQ(nullable_column.get_null_map_data()[0], 0);
+    const auto& values = assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+    EXPECT_EQ(values.get_element(0), 42);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, DefaultExprAlignsNestedNullableArrayTableType) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_nested_nullable_array_default_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    const auto bigint_type = std::make_shared<DataTypeInt64>();
+    const auto array_type = std::make_shared<DataTypeArray>(make_nullable(bigint_type));
+    const auto table_type = make_nullable(array_type);
+    auto missing_column = make_table_column(99, "single_element_groups", table_type);
+    missing_column.default_expr = VExprContext::create_shared(
+            std::make_shared<NullableArrayBigintDefaultExpr>(table_type));
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(std::move(missing_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+
+    const auto& result = block.get_by_position(0);
+    ASSERT_TRUE(result.check_type_and_column_match().ok());
+    ASSERT_TRUE(result.column->is_nullable());
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(nullable_column.size(), 1);
+    EXPECT_EQ(nullable_column.get_null_map_data()[0], 0);
+
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    ASSERT_EQ(array_column.size(), 1);
+    EXPECT_EQ(array_column.get_offsets()[0], 1);
+    ASSERT_TRUE(array_column.get_data().is_nullable());
+    const auto& nested_nullable = assert_cast<const ColumnNullable&>(array_column.get_data());
+    ASSERT_EQ(nested_nullable.size(), 1);
+    EXPECT_EQ(nested_nullable.get_null_map_data()[0], 0);
+    const auto& values = assert_cast<const ColumnInt64&>(nested_nullable.get_nested_column());
+    EXPECT_EQ(values.get_element(0), 7);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedColumnsFillMissingParquetColumnWithDefault) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_schema_mismatch_reject_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(
+            make_table_column(99, "missing_value", std::make_shared<DataTypeString>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    const auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+
+    const auto& result = block.get_by_position(0);
+    ASSERT_TRUE(result.check_type_and_column_match().ok());
+    // A missing scalar column without an explicit default is materialized as a default-value
+    // column. It may stay constant, so verify through the IColumn interface instead of assuming a
+    // concrete ColumnString instance.
+    ASSERT_EQ(result.column->size(), 1);
+    EXPECT_EQ(result.column->get_data_at(0).to_string(), "");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedStructFillsMissingChildWithDefault) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_struct_missing_child_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_struct_parquet_file(file_path, 7);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    auto id_child = make_table_column(0, "id", int_type);
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type},
+                                                        Strings {"id", "missing_child"});
+    auto struct_column = make_table_column(100, "s", struct_type);
+    struct_column.children = {id_child, missing_child};
+    std::vector<ColumnDefinition> projected_columns = {struct_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& struct_result =
+            assert_cast<const ColumnStruct&>(expect_not_null_table_column(block, 0));
+    ASSERT_EQ(struct_result.get_columns().size(), 2);
+    const auto& ids = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(struct_result.get_column(0)));
+    ASSERT_EQ(struct_result.size(), 1);
+    EXPECT_EQ(ids.get_element(0), 7);
+    expect_nullable_column_all_null(struct_result.get_column(1));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ReusedBlockClearsProjectedStructWithNullableChild) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_struct_nullable_child_reuse_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_struct_with_nullable_child_parquet_file(file_path);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto nullable_string_type = make_nullable(string_type);
+    auto id_child = make_table_column(0, "id", int_type);
+    auto note_child = make_table_column(1, "note", nullable_string_type);
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto struct_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, nullable_string_type, string_type},
+            Strings {"id", "note", "missing_child"});
+    auto struct_column = make_table_column(100, "s", struct_type);
+    struct_column.children = {id_child, note_child, missing_child};
+    std::vector<ColumnDefinition> projected_columns = {struct_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& struct_result =
+            assert_cast<const ColumnStruct&>(expect_not_null_table_column(block, 0));
+    const auto& notes = assert_cast<const ColumnNullable&>(struct_result.get_column(1));
+    EXPECT_FALSE(notes.is_null_at(0));
+    EXPECT_TRUE(notes.is_null_at(1));
+
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.rows(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedPartitionColumnUsesSplitPartitionValue) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_partition_value_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    auto partition_column = make_table_column(1, "value", std::make_shared<DataTypeString>());
+    partition_column.is_partition_key = true;
+    projected_columns.push_back(std::move(partition_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.partition_values.emplace("value", Field::create_field<TYPE_STRING>("p1"));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    // The file has a physical column with the same id/name. The split partition value should still
+    // take precedence and be materialized by TableReader.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto partition_value = block.get_by_position(0).column->convert_to_full_column_if_const();
+    const auto& partition_value_data = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(*partition_value));
+    ASSERT_EQ(partition_value_data.size(), 1);
+    EXPECT_EQ(partition_value_data.get_data_at(0).to_string(), "p1");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ConstantPartitionFilterSkipsSplitWhenFalse) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_constant_partition_filter_skip_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    auto partition_column = make_table_column(0, "part", std::make_shared<DataTypeInt32>());
+    partition_column.is_partition_key = true;
+    projected_columns.push_back(std::move(partition_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 10))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.partition_values.emplace("part", Field::create_field<TYPE_INT>(7));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.get_by_position(0).column->size(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ConstantPartitionFilterKeepsSplitWhenTrue) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_constant_partition_filter_keep_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    auto partition_column = make_table_column(0, "part", std::make_shared<DataTypeInt32>());
+    partition_column.is_partition_key = true;
+    projected_columns.push_back(std::move(partition_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 1))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.partition_values.emplace("part", Field::create_field<TYPE_INT>(7));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    expect_int32_column_values(*block.get_by_position(0).column, {7});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, RuntimeFilterOnConstantPartitionIsNotPreExecuted) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_constant_runtime_filter";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    auto partition_column = make_table_column(0, "part", std::make_shared<DataTypeInt32>());
+    partition_column.is_partition_key = true;
+    projected_columns.push_back(std::move(partition_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(
+            reader.init({
+                                .projected_columns = projected_columns,
+                                .column_predicates = {},
+                                .conjuncts = {prepared_conjunct(
+                                        &state, runtime_filter_wrapper_expr(
+                                                        table_int32_greater_than_expr(0, 0, 1)))},
+                                .format = FileFormat::PARQUET,
+                                .scan_params = nullptr,
+                                .io_ctx = nullptr,
+                                .runtime_state = &state,
+                                .scanner_profile = nullptr,
+                        })
+                    .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.partition_values.emplace("part", Field::create_field<TYPE_INT>(7));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    const auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+    expect_int32_column_values(*block.get_by_position(0).column, {7});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ParquetReaderReadsOnlyRowGroupsInFileRange) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_file_range_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30},
+                                {"range_group_one", "range_group_two", "range_group_three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    projected_columns.push_back(make_table_column(2, "value", std::make_shared<DataTypeString>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 1)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(block.rows(), 1);
+    EXPECT_EQ(id_column.get_element(0), 2);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "range_group_two");
+
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.rows(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedColumnsUseMapperExpressionForSameNameDifferentIdParquetSchema) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_same_name_diff_id_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(99, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // The table column has the same name as the Parquet field, but a different field id.
+    // ColumnMapper should still resolve it by name and build a SlotRef projection from the file
+    // column into the requested table column.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    ASSERT_EQ(id_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedColumnsUseMapperExpressionsForParquetSchemaMismatch) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_mapper_expr_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 7, "seven");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt64>()));
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // The table projection requests id as BIGINT instead of the file INT, so ColumnMapper should
+    // build a Cast expression. The second field has the same type and should build a SlotRef
+    // projection. Both columns should still materialize in table schema order.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.get_by_position(0).name, "id");
+    ASSERT_EQ(block.get_by_position(1).name, "value");
+    const auto& id_column = assert_cast<const ColumnInt64&>(expect_not_null_table_column(block, 0));
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(id_column.size(), 1);
+    ASSERT_EQ(value_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 7);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "seven");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/docs/doris-iceberg-parquet-api-design.md b/docs/doris-iceberg-parquet-api-design.md
new file mode 100644
index 00000000000000..457550a932da67
--- /dev/null
+++ b/docs/doris-iceberg-parquet-api-design.md
@@ -0,0 +1,511 @@
+# Doris Iceberg + Parquet 新架构 API 设计
+
+本文档用于描述 Doris 中 Iceberg + Parquet 新架构的 API 设计。本文档作为后续从
+`master` 新开重构分支时的起点，只定义 API 形状、职责边界、依赖方向和兼容原则，
+不定义函数实现细节，不提供伪代码，不包含迁移 patch。
+
+## 架构总览
+
+目标架构包含 table 调度层、表格式语义层、schema 映射层、文件通用层和文件格式实现层：
+
+```text
+FileScanner / split producer
+    ->
+TableReader
+    ->
+IcebergTableReader
+    ->
+TableColumnMapper + FileReader
+    ->
+ParquetReader
+```
+
+核心职责如下：
+
+- `TableReader`
+  负责多文件、多 split 的上层调度，统一 scan 生命周期，对外输出 table block，
+  并承接动态分区裁剪等 table-level 通用逻辑。
+- `IcebergTableReader`
+  负责 Iceberg 表语义，包括 schema 绑定、scan task、delete file、虚拟列和 table
+  block finalize。
+- `TableColumnMapper`
+  负责 table schema 到 file schema 的映射，负责 filter localization 和 schema
+  change 映射。
+- `FileReader`
+  负责文件层通用读取接口，只理解 file-local schema 和 file-local scan request。
+- `ParquetReader`
+  作为 `FileReader` 的 Parquet 实现，负责 Parquet 文件物理读取。
+
+依赖方向必须保持单向：
+
+```text
+TableReader
+  -> IcebergTableReader
+    -> TableColumnMapper
+    -> FileReader
+      -> ParquetReader
+```
+
+低层不反向理解高层语义，尤其 `ParquetReader` 不得反向理解 Iceberg/global schema。
+
+## 核心 API 设计
+
+### TableReader
+
+`TableReader` 是最上层读取接口，作为 `IcebergTableReader` 的基类，负责多 split /
+多 file 调度，并承接 table-level 的通用裁剪逻辑，不下沉文件格式语义。
+
+实际 API 文件：
+
+```text
+be/src/format_v2/table_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::format
+```
+
+建议职责：
+
+- 接收 split 列表或 scan task 列表；
+- 控制当前 reader 的创建、切换和关闭；
+- 管理 scan 生命周期；
+- 承接动态分区裁剪等 table-level 通用过滤逻辑；
+- 对外统一输出 table block。
+- `next` 是基类统一入口，内部负责 EOF 后切换 reader；具体表格式只提供打开和读取
+  当前 reader 的 hook。
+
+建议接口形状：
+
+```cpp
+namespace doris::format {
+
+class TableReader {
+public:
+    virtual ~TableReader() = default;
+
+    virtual Status init(const TableReadOptions& options);
+    virtual Status filter(const VExprContextSPtr& expr, bool* can_filter_all);
+    Status next(Block* table_block, size_t* rows, bool* eof);
+    virtual Status close();
+
+protected:
+    Status next_reader();
+    virtual Status open_next_reader(bool* has_reader);
+    virtual Status read_current(Block* table_block, size_t* rows, bool* eof);
+    virtual Status close_current_reader();
+};
+
+} // namespace doris::format
+```
+
+接口约束：
+
+- `TableReader` 输出的是 table block，不输出 file-local block。
+- `TableReader` 负责多文件编排和 table-level 通用裁剪，不负责 schema mapping，不负责
+  Parquet 物理解码。
+- `next_reader` 是 `TableReader` 自己的通用切换逻辑，不作为子类公开 override 接口。
+- 动态分区裁剪这类逻辑应下放到 `TableReader`，而不是散落在具体表格式 reader 中。
+- `TableReader` 不直接依赖旧 `vparquet` 表层语义。
+
+### IcebergTableReader
+
+`IcebergTableReader` 是 Iceberg 表语义层，负责把单个 Iceberg data file 的读取组织成
+table 语义输出。
+
+实际 API 文件：
+
+```text
+be/src/format_v2/table/iceberg_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::iceberg
+```
+
+建议职责：
+
+- 绑定 Iceberg 当前 table schema；
+- 接收 `IcebergScanTask` 列表，并按 `TableReader` 的统一调度打开当前 task；
+- 处理 position delete、equality delete、deletion vector；
+- 物化 `_row_id`、`_last_updated_sequence_number` 等虚拟列；
+- 将 `ParquetReader` 返回的 file-local block finalize 成 table block。
+
+建议接口形状：
+
+```cpp
+namespace doris::iceberg {
+
+class IcebergTableReader : public format::TableReader {
+public:
+    virtual ~IcebergTableReader() = default;
+
+    Status init(IcebergTableReadParams params);
+    Status close() override;
+
+protected:
+    Status open_next_reader(bool* has_reader) override;
+    Status read_current(Block* table_block, size_t* rows, bool* eof) override;
+    Status close_current_reader() override;
+};
+
+} // namespace doris::iceberg
+```
+
+接口约束：
+
+- `IcebergTableReader` 继承 `TableReader`，并通过组合使用 `FileReader`。
+- `IcebergTableReader` 不做 Parquet page/column 解码。
+- `IcebergTableReader` 负责 table-level finalize，不负责 file-local pruning 实现。
+- `IcebergTableReader` 的 schema、scan request、scan tasks 和底层 `FileReader` 应通过
+  一个初始化参数对象一次性传入；除非存在明确生命周期差异，不拆成 `bind` /
+  `init(TableScanRequest)` / `set_scan_tasks` 多阶段接口。
+- `IcebergTableReader` 不重新实现 reader 切换循环，只实现打开 Iceberg task、读取当前
+  task 和关闭当前 reader 的 hook。
+
+### TableColumnMapper
+
+`TableColumnMapper` 是 table schema 到 file schema 的通用映射层，不是
+Iceberg-only 组件。
+
+实际 API 文件：
+
+```text
+be/src/format_v2/table_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::format
+```
+
+建议职责：
+
+- 输入 table schema、file schema、table scan request；
+- 输出 `ColumnMapping` 和通用 `FileScanRequest`；
+- 负责 filter localization；
+- 负责 schema change 映射；
+- 负责复杂列 child mapping；
+- 负责缺失列、default、partition、generated 列的 finalize 语义描述。
+
+建议接口形状：
+
+```cpp
+namespace doris::format {
+
+class TableColumnMapper {
+public:
+    explicit TableColumnMapper(TableColumnMapperOptions options = {});
+
+    virtual Status create_mapping(const std::vector<TableColumnDefinition>& table_schema,
+                                  const std::vector<SchemaField>& file_schema,
+                                  std::vector<ColumnMapping>* mappings);
+
+    virtual Status create_scan_request(const TableScanRequest& table_request,
+                                       const std::vector<ColumnMapping>& mappings,
+                                       FileScanRequest* file_request);
+};
+
+} // namespace doris::format
+```
+
+接口约束：
+
+- `TableColumnMapper` 的输入是 table schema + file schema + table scan request。
+- `TableColumnMapper` 的输出是 `ColumnMapping` + `FileScanRequest`。
+- `TableColumnMapper` 必须是通用层，不做 Iceberg-only 命名。
+- Iceberg 场景默认按 field id 映射；按 name 映射不是本轮默认路径。
+
+### FileReader
+
+`FileReader` 是文件物理读取层的通用接口，为后续 Parquet 之外的文件格式适配预留。
+
+实际 API 文件：
+
+```text
+be/src/format_v2/file_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::format
+```
+
+建议职责：
+
+- 打开物理文件；
+- 暴露 file-local schema；
+- 接收 `FileScanRequest`；
+- 输出 file-local block；
+- 不理解 table/global schema。
+
+建议接口形状：
+
+```cpp
+namespace doris::format {
+
+class FileReader {
+public:
+    virtual ~FileReader() = default;
+
+    virtual Status open(io::FileReaderSPtr file, io::IOContext* io_ctx = nullptr);
+    virtual Status get_schema(std::vector<SchemaField>* file_schema) const;
+    virtual Status init(const FileScanRequest& request);
+    virtual Status next(Block* file_block, size_t* rows, bool* eof);
+    virtual Status close();
+};
+
+} // namespace doris::format
+```
+
+接口约束：
+
+- `FileReader` 输出的是 file-local block，不输出 table/global schema block。
+- `FileReader` 不处理 Iceberg schema evolution、default/generated/partition 列。
+- `IcebergTableReader` 组合 `FileReader`，不直接绑定具体文件格式 reader。
+
+### ParquetReader
+
+`ParquetReader` 是 `FileReader` 的 Parquet 实现，只负责 Parquet file-local schema
+和 Parquet file-local scan request。
+
+实际 API 文件：
+
+```text
+be/src/format/parquet/parquet_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::parquet
+```
+
+建议职责：
+
+- 打开 Parquet 文件；
+- 解析 footer 和 file schema；
+- 接收 `ParquetScanRequest` 或通用 `FileScanRequest`；
+- 执行 file-local projection 和 file-local filter；
+- 输出 file-local block。
+
+建议接口形状：
+
+```cpp
+namespace doris::parquet {
+
+class ParquetReader : public format::FileReader {
+public:
+    virtual ~ParquetReader() = default;
+
+    virtual Status open(io::FileReaderSPtr file, io::IOContext* io_ctx = nullptr);
+    virtual Status get_schema(std::vector<format::SchemaField>* file_schema) const;
+    virtual Status init(const ParquetScanRequest& request);
+    virtual Status next(Block* file_block, size_t* rows, bool* eof);
+    virtual Status close();
+};
+
+} // namespace doris::parquet
+```
+
+接口约束：
+
+- `ParquetReader` 输出的是 file-local block，不输出 table/global schema block。
+- `ParquetReader` 不理解 Iceberg schema evolution。
+- `ParquetReader` 不负责 default/generated/partition 列。
+- 任何 table-level cast/default/generated/partition 语义都不能重新塞回
+  `ParquetReader`。
+
+## 关键类型
+
+### SchemaField
+
+`SchemaField` 表示文件层 schema 中的列定义。
+
+建议包含的信息：
+
+- file-local column id；
+- 列名；
+- 类型；
+- child fields。
+
+它服务于 `TableColumnMapper` 做 schema matching，不携带 table-level 语义。
+
+### TableColumnDefinition
+
+`TableColumnDefinition` 表示 table/global schema 中的列定义。
+
+建议包含的信息：
+
+- table column id；
+- 列名；
+- 类型；
+- child columns。
+
+Iceberg 场景下，column id 默认对应 field id。
+
+### TableFilter
+
+`TableFilter` 表示 table 层过滤条件。
+
+建议包含的信息：
+
+- `table_column_id`
+- `conjunct`
+- `predicates`
+
+职责约束：
+
+- `conjunct` 偏表达式过滤，适合表达 cast、复杂表达式、复杂列提取等语义；
+- `predicates` 偏结构化单列下推，适合驱动 row group stats、page index、dictionary、
+  bloom filter 等文件层优化。
+
+### FileLocalFilter
+
+`FileLocalFilter` 表示已经 localize 到 file-local schema 的过滤条件。
+
+建议包含的信息：
+
+- `file_column_id`
+- `conjunct`
+- `predicates`
+
+职责约束：
+
+- `conjunct` 用于 file-local 表达式过滤；
+- `predicates` 用于 file-local 结构化下推；
+- 其输入必须来自 `TableColumnMapper`，不能由具体文件 reader 自己推导 table 语义。
+
+### ColumnMapping
+
+`ColumnMapping` 是 table schema 与 file schema 之间的核心边界对象。
+
+建议包含的信息：
+
+- `table_column_id`
+- `file_column_id`
+- `file_type`
+- `table_type`
+- `finalize_expr`
+- `reader_filter_expr`
+- `child_mappings`
+
+职责约束：
+
+- `finalize_expr` 服务最终输出，把 file-local value 转成 table/global value；
+- `reader_filter_expr` 服务读时 filter fallback；
+- 二者语义不同，不能混用；
+- `child_mappings` 用于复杂列 remap、复杂列裁剪和复杂列 schema change。
+
+### TableScanRequest
+
+`TableScanRequest` 描述 table 层 scan 请求。
+
+建议包含的信息：
+
+- projected table columns；
+- table filters。
+
+它由 `IcebergTableReader` 接收，再交给 `TableColumnMapper` 生成 file-local request。
+
+### ParquetScanRequest
+
+`ParquetScanRequest` 继承 `FileScanRequest`，描述 Parquet file-local scan 请求。
+
+### FileScanRequest
+
+`FileScanRequest` 描述通用 file-local scan 请求。
+
+建议包含的信息：
+
+- projected file columns；
+- local filters；
+- reader expression map。
+
+它是 `FileReader` 的唯一 scan 输入，不包含 table/global schema 语义。
+
+### IcebergScanTask
+
+`IcebergScanTask` 表示一次 Iceberg data file 读取任务。
+
+建议包含的信息：
+
+- data file 信息；
+- position delete 文件；
+- equality delete 文件；
+- deletion vector 信息。
+
+它是 `IcebergTableReader` 的输入，不应直接传给 `ParquetReader`。
+
+### IcebergTableReadParams
+
+`IcebergTableReadParams` 表示一次 Iceberg table scan 的完整初始化输入。
+
+建议包含的信息：
+
+- Iceberg read options；
+- Iceberg table schema；
+- table scan request；
+- Iceberg scan task 列表；
+- 底层 `FileReader`。
+
+它用于避免 `IcebergTableReader` 暴露多个半初始化阶段。调用方应一次性构造完整
+参数并调用 `init`。
+
+## 设计原则
+
+### 边界原则
+
+- `FileReader` 不理解 global schema，不直接处理 Iceberg schema evolution。
+- `ParquetReader` 是 `FileReader` 的 Parquet 实现。
+- `TableColumnMapper` 是 schema mapping 和 filter localization 的唯一入口。
+- `IcebergTableReader` 不做 Parquet 解码，只负责 table-level finalize、delete、
+  virtual columns。
+- `TableReader` 只负责多文件编排和 table-level 通用裁剪，不下沉文件格式语义。
+- 任何 table-level cast/default/generated/partition 语义都不能重新塞回
+  `ParquetReader`。
+
+### 依赖原则
+
+- 低层不能反向依赖高层语义。
+- `FileReader` 只依赖 file-local request。
+- `IcebergTableReader` 继承 `TableReader`，复用其多文件编排和通用裁剪能力。
+- `IcebergTableReader` 通过组合使用 `FileReader`。
+- `TableColumnMapper` 可以被 Iceberg 之外的其他表格式复用。
+
+### 命名原则
+
+- 表层抽象使用 `TableReader`、`IcebergTableReader`、`TableColumnMapper`、
+  `FileReader`、`ParquetReader` 命名。
+- `TableColumnMapper` 不使用 Iceberg-only 命名。
+- file schema 类型使用 `SchemaField`，table schema 类型使用 `TableColumnDefinition`。
+
+## 兼容原则
+
+新架构重构期间，新旧代码允许并存，但必须遵守以下约束：
+
+- 旧 `vparquet` / Hive / Hudi / Paimon 路径在新架构稳定前允许保留。
+- 新架构实现不得继续向旧 `vparquet` 表层语义回灌依赖。
+- 先搭新框架 API，再逐步迁移调用点。
+- 不允许边改 API 边混入临时裸逻辑、实验性草稿或未收敛命名。
+- 兼容层可能需要存在，但本文档不定义兼容层的具体实现方案。
+
+## 验收标准
+
+该文档应满足以下目标：
+
+- 不引用错误实验代码作为既成事实；
+- 不出现实现性草稿、裸伪代码、未收敛命名混用；
+- 让另一个工程师从 `master` 新开分支时，可以直接按本文档搭 API 骨架；
+- 读完文档后，不需要再讨论以下问题：
+  - 新架构分几层；
+  - 每层负责什么；
+  - 哪层理解 global schema；
+  - 哪层做 schema change / filter localization / finalize；
+  - 哪层允许依赖旧实现，哪层不允许。
diff --git a/docs/new-parquet-reader-column-index-refactor.md b/docs/new-parquet-reader-column-index-refactor.md
new file mode 100644
index 00000000000000..56f8c7ca4a37d5
--- /dev/null
+++ b/docs/new-parquet-reader-column-index-refactor.md
@@ -0,0 +1,404 @@
+# New Reader 列标识实现说明
+
+本文说明 Doris new table/file reader 栈中各种列标识的当前含义，以及它们在
+`FileScannerV2`、`TableReader`、`TableColumnMapper` 和 new Parquet reader 中的流转逻辑。
+
+核心原则是把 **schema identity** 和 **执行期位置** 分开：
+
+- schema identity 用来判断 table column 和 file column 是否是同一列。
+- index/position 用来表示 block、projection tree、scan request 或 constant map 中的位置。
+- FE column unique id 只在 scanner 边界用于定位 slot，进入 table/file reader 后不再出现。
+
+共享定义集中在 `be/src/format_v2/column_data.h`。file reader 通用请求定义在
+`be/src/format_v2/file_reader.h`。new Parquet reader 自己的 Parquet 内部 schema tree 定义在
+`be/src/format_v2/parquet/parquet_column_schema.h`。
+
+## 层级边界
+
+当前 reader 栈可以按语义分成三层。
+
+### FileScannerV2：FE 标识到 reader 标识的边界
+
+`FileScannerV2` 仍能看到 FE 下发的 `slot_id`、`col_unique_id`、`TFileScanSlotInfo` 和
+`TColumnAccessPath`。这些 FE 侧标识只在这里使用。
+
+`FileScannerV2::_build_projected_columns()` 会把 `_params->required_slots` 转成
+`std::vector<format::ColumnDefinition>`：
+
+- vector 下标就是 `GlobalIndex`。
+- `_slot_id_to_global_index` 把 FE `slot_id` 转成 `GlobalIndex`，用于 row-level conjunct。
+- `_column_unique_id_to_global_index` 把 FE `col_unique_id` 转成 `GlobalIndex`，用于 column predicate。
+- `ColumnDefinition::identifier` 表示 table-side schema identity，默认是列名；如果外部 schema
+  提供 field id，则改用 field id。
+- partition/default/generated 信息被挂到 `ColumnDefinition` 上，由 table reader 层处理。
+
+从这一层往下，table/file reader 不再使用 FE column unique id。
+
+### TableReader / TableColumnMapper：table schema 到 file schema
+
+`TableReader::open_reader()` 对每个 split 打开一个具体 `FileReader`，先通过
+`FileReader::get_schema()` 获取当前文件的 file-local schema，再用 `TableColumnMapper` 建立映射。
+
+`TableColumnMapper` 的输入是：
+
+- table/global schema：`FileScannerV2` 构造的 `projected_columns`。
+- file-local schema：具体 file reader 返回的 `std::vector<ColumnDefinition>`。
+- per-split partition values。
+- table-level row filters 和 column predicates。
+
+`TableColumnMapper` 的输出是：
+
+- `ColumnMapping`：构造阶段使用的 table column 到 file/constant/virtual source 的映射。
+- `FileScanRequest`：只含 file-local projection、file-local block layout 和 file-local filters。
+- `ColumnMapResult` / `ResultColumnMapping`：给 table reader finalize 阶段消费的最终映射。
+- `FilterEntry`：给 filter localization 使用的 `GlobalIndex -> LOCAL/CONSTANT/UNSET` target。
+- `ConstantMap`：partition/default/generated 常量列。
+
+### FileReader / ParquetReader：只理解 file-local 请求
+
+`FileReader` 只暴露两类 schema/request：
+
+- `get_schema(std::vector<ColumnDefinition>*)`：返回文件自身 schema。
+- `open(std::unique_ptr<FileScanRequest>&)`：接收已经 localize 后的 file-local scan request。
+
+具体 file reader 不理解 table/global schema、Iceberg default、partition column、FE slot id 或
+FE column unique id。
+
+new Parquet reader 使用 `FileScanRequest` 中的 `LocalColumnIndex` 创建 column reader，并使用
+`local_positions` 决定 file-local block layout。
+
+## ColumnDefinition
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`ColumnDefinition` 是 table/global schema 和 file-local schema 共用的列定义。它表示列名、类型、
+nested children、默认表达式、partition 属性和 file-local column kind。
+
+关键字段：
+
+- `identifier`：schema identity。用于 table column 和 file column 匹配。
+- `local_id`：file reader 返回的 schema node 在当前 parent 下的 reader-local id。
+- `name`：逻辑列名。BY_NAME 且没有显式 string identifier 时会回退到它。
+- `type`：当前 schema node 的 Doris 类型。
+- `children`：nested children。table/global schema 中是 table children；file schema 中是
+  file-local children。
+- `default_expr`：missing/default/generated column 的物化表达式。
+- `is_partition_key`：partition column 标记。
+- `column_type`：file-local column kind，例如普通数据列或 row number virtual column。
+
+`ColumnDefinition` 不保存 FE column unique id。它也不保存“应该按什么方式匹配”。匹配方式由
+`TableColumnMapperOptions::mode` 统一决定。
+
+### identifier
+
+`identifier` 是一个 `Field`，语义接近 DuckDB `MultiFileColumnDefinition::identifier`：
+
+- `TYPE_NULL`：没有显式 identifier。BY_NAME 时使用 `name`。
+- `TYPE_INT`：在 BY_FIELD_ID 中表示 field id；在 BY_INDEX 中表示 file schema position。
+- `TYPE_STRING`：显式 name identifier。
+
+访问 helper：
+
+- `has_identifier_field_id()` / `get_identifier_field_id()`：BY_FIELD_ID 使用。
+- `get_identifier_name()`：BY_NAME 使用；没有显式 string identifier 时返回 `name`。
+- `get_identifier_position()`：BY_INDEX 使用。
+- `file_local_id()`：file reader projection 使用；优先返回 `local_id`，否则回退到 int
+  identifier。这个回退只用于兼容某些 file schema 构造路径，不应重新引入 FE id 语义。
+
+## 强类型位置
+
+### GlobalIndex
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`GlobalIndex` 表示 table/global output block 中的 top-level 列位置。当前等于
+`_params->required_slots` 的下标。
+
+主要使用位置：
+
+- `ColumnMapping::global_index`
+- `TableFilter::global_indices`
+- `TableColumnPredicates` 的 key
+- `ColumnMapResult` / `ResultColumnMapping` 的 key
+- `FilterEntry` map 的 key
+
+`GlobalIndex` 不是 FE slot id，也不是 FE column unique id。
+
+### LocalColumnId
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`LocalColumnId` 表示当前物理文件 schema 的 top-level reader-local column id。
+
+主要使用位置：
+
+- `FileScanRequest::local_positions` 的 key。
+- `LocalColumnIndex::top_level()`。
+- new Parquet reader 创建 top-level column reader。
+- page index、statistics、bloom filter 等 file-local pruning 的 root column key。
+- row position 这类 reader 内部 virtual column id。
+
+`LocalColumnId` 不是 file-local block position。一个 top-level file column 在本次 scan request
+输出 block 中的位置由 `LocalIndex` 表示。
+
+### LocalIndex
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`LocalIndex` 表示一次 `FileScanRequest` 内 file-local block 的列位置。
+
+主要使用位置：
+
+- `FileScanRequest::local_positions` 的 value。
+- file-local rewritten `SlotRef` 的 input position。
+- `TableReader` 从 file block 取列。
+- `ParquetScanScheduler` 把 column reader 读出的数据写入 file block。
+
+`LocalIndex` 是 request-local block layout，不是 file schema ordinal。
+
+### ConstantIndex
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`ConstantIndex` 表示 `ConstantMap` 中的 entry 位置。它用于 per-split/per-file 常量列：
+
+- partition column。
+- schema evolution default column。
+- generated/default expression column。
+- 将来可扩展到更多 virtual/constant source。
+
+`FilterEntry` 可以指向 `ConstantIndex`。当一个 row-level conjunct 只引用 constant target 时，
+`TableReader` 会在打开 file reader 前用 1 行常量 block 求值；如果结果为 false/NULL，当前 split
+直接跳过。
+
+### LocalColumnIndex
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`LocalColumnIndex` 表示递归 file-local projection path：
+
+```cpp
+struct LocalColumnIndex {
+    int32_t index = -1;
+    bool project_all_children = true;
+    std::vector<LocalColumnIndex> children;
+};
+```
+
+语义：
+
+- root entry 的 `index` 是 `LocalColumnId`。
+- nested entry 的 `index` 是当前 parent 下的 file-local child id。
+- `project_all_children = true` 表示读取整个 subtree。
+- `project_all_children = false` 表示只读取 `children` 中列出的 child paths。
+
+通用 helper：
+
+- `is_full_projection()`
+- `is_partial_projection()`
+- `find_child_projection()`
+- `is_child_projected()`
+- `merge_local_column_index()`
+
+new Parquet reader 的 STRUCT/LIST/MAP reader 都消费这套 projection helper：
+
+- STRUCT：只创建被投影 child 的 reader。
+- LIST：把 element projection 递归传给 element reader。
+- MAP：总是读取 key，把 value projection 递归传给 value reader。
+
+## FileScanRequest
+
+定义位置：`be/src/format_v2/file_reader.h`
+
+`FileScanRequest` 是 table reader 交给 file reader 的唯一 scan 输入。它不包含 table/global schema。
+
+关键字段：
+
+- `predicate_columns`：row-level conjunct/delete conjunct 需要先读取的 file-local projection。
+- `non_predicate_columns`：最终输出需要读取、且不需要先参与 row-level filter 的 file-local
+  projection。
+- `local_positions`：`LocalColumnId -> LocalIndex`，决定 file-local block layout。
+- `conjuncts` / `delete_conjuncts`：已经把 table/global slot 改写成 file-local slot 的表达式。
+- `column_predicate_filters`：file-layer pruning hints，只用于 min/max、page index、dictionary、
+  bloom filter 等剪枝，不参与 batch row filtering。
+
+`predicate_columns` 和 `non_predicate_columns` 都按 file-local schema 表达。file reader 只需要根据
+这两个列表创建 reader，并按 `local_positions` 写入 file block。
+
+## TableColumnMapper 逻辑
+
+定义位置：
+
+- `be/src/format_v2/column_mapper.h`
+- `be/src/format_v2/column_mapper.cpp`
+
+### 匹配模式
+
+`TableColumnMapperOptions::mode` 决定 `identifier` 的解释方式：
+
+- `BY_FIELD_ID`：`TYPE_INT` identifier 是 field id。
+- `BY_NAME`：`TYPE_STRING` identifier 或 `name` 是匹配名。
+- `BY_INDEX`：`TYPE_INT` identifier 是 file schema position。
+
+`TableReader::open_reader()` 当前默认按 field id 映射；如果 file schema 首列没有 int identifier，
+会 fallback 到 BY_NAME。Hive reader 可覆盖默认模式，Hive1 ORC 这类场景可使用 BY_INDEX。
+
+### create_mapping()
+
+`create_mapping()` 为每个 `GlobalIndex` 生成一个 `ColumnMapping`：
+
+1. partition column 优先映射到 `ConstantMap`。
+2. BY_INDEX 时按 file position 取 file schema。
+3. 普通列通过 matcher 在 file schema 中找对应 file field。
+4. 缺失但带 default expr 的列映射到 `ConstantMap`。
+5. 特殊 virtual column 记录 virtual column type。
+6. 允许 missing column 时保留空 mapping，由 table finalize 阶段补 NULL/default。
+
+`ColumnMapping::file_local_id` 是 table column 绑定到 file schema 后的 reader-local id：
+
+- root mapping 中可转成 `LocalColumnId`。
+- nested mapping 中表示 parent 下的 child id。
+- constant/missing/virtual mapping 没有 `file_local_id`。
+
+schema identity field id 不保存在 `ColumnMapping` 中，只保存在
+`ColumnDefinition::identifier` 中，并由 mapper 的匹配模式解释。
+
+### create_scan_request()
+
+`create_scan_request()` 把 table-level scan 信息转换成 file-local request：
+
+1. 先把不参与 row-level filter 的输出列加入 `non_predicate_columns`。
+2. 调用 `localize_filters()`，把 row-level conjunct 和 column predicates 定位到 file-local source。
+3. 为所有已读取 file column 重建 output projection，让 `ColumnMapping::projection` 指向正确的
+   `LocalIndex`。
+4. 生成 `ColumnMapResult` 和 `ResultColumnMapping`，供 table reader finalize。
+
+`local_positions` 在这个阶段确定。同一个 file column 如果同时被 filter 和 output 使用，只会有
+一个 `LocalIndex`。
+
+### FilterEntry
+
+`FilterEntry` 是 `GlobalIndex` 到 filter target 的结果：
+
+- `LOCAL`：filter 可以在 file-local block 上求值，target 是 `LocalIndex`。
+- `CONSTANT`：filter 只依赖 `ConstantMap` entry。
+- `UNSET`：当前 split 无法下推到 file reader。
+
+`TableColumnMapper::_build_filter_entries()` 在 `FileScanRequest::local_positions` 确定后生成
+`FilterEntry`。表达式改写时只把 `LOCAL` target 改写成 file-local slot；`CONSTANT` target 用于
+split-level constant filter evaluation。
+
+### ColumnMapResult / ResultColumnMapping
+
+`ColumnMapResult` 记录一个 global result column 的递归映射结果：
+
+- `local_column_id`：root file column。
+- `column_index`：file-local projection tree。
+- `mapping`：root 指向 `LocalIndex`，nested child 通过 `IndexMapping::child_mapping` 递归映射。
+
+`ResultColumnMapping` 是最终可消费的 `GlobalIndex -> ColumnMapEntry` map。`ColumnMapEntry` 包含：
+
+- `IndexMapping mapping`
+- `local_type`
+- `global_type`
+- `filter_conversion`
+
+TableReader finalize 阶段用它把 file-local block 转成 table/global block。
+
+### nested child mapping
+
+复杂列映射时，`IndexMapping::child_mapping` 的 key 是 table/global child ordinal，value 是对应
+file-local child mapping。这样 filter 中的 `STRUCT_EXTRACT` 可以按 table child ordinal 找到
+file child ordinal。
+
+Doris 不再维护额外的 `NestedPredicateTargetInfo` / filter target path。nested filter localization
+直接沿 `IndexMapping::child_mapping` 转换 selector path。
+
+对于 `SELECT s.name WHERE s.id > 5` 这类 filter-only child：
+
+- `s.name` 进入 output projection。
+- `s.id` 会进入 predicate projection。
+- `original_file_children` 保留 projection 前的 file children，用于定位 filter-only child。
+- `child_mappings` 只描述输出 shape，避免 filter-only child 改变最终 STRUCT/LIST/MAP shape。
+
+## Parquet 内部 schema 标识
+
+定义位置：`be/src/format_v2/parquet/parquet_column_schema.h`
+
+`ParquetColumnSchema` 是 new Parquet reader 内部 schema tree。它描述 Parquet 逻辑字段和 primitive
+leaf column 的关系，不暴露给 table reader。对外统一通过 `ParquetReader::get_schema()` 返回
+`std::vector<format::ColumnDefinition>`。
+
+关键字段：
+
+- `local_id`：当前 parent 下的 reader-local id。top-level 是 root field ordinal，nested 是 child
+  ordinal。`LocalColumnIndex` 传给 `ParquetColumnReaderFactory` 的就是这个 id。
+- `parquet_field_id`：Parquet schema element 中可选的 field_id。Arrow 在不存在 field_id 时返回
+  `-1`。它只作为 schema matching identifier，不用于读取 column chunk。
+- `name`：Parquet schema name。
+- `type`：转换后的 Doris 类型。
+- `leaf_column_id`：Parquet primitive leaf column ordinal。用于访问 `ColumnDescriptor`、
+  row group column chunk、statistics、page index、bloom filter 等。复杂节点为 `-1`。
+- `type_descriptor`：primitive leaf 的 Parquet physical/logical type 信息。
+- `descriptor`：primitive leaf 的 Arrow Parquet `ColumnDescriptor`。
+- `max_definition_level` / `max_repetition_level`：该 node 下的最大 Dremel level。
+- `nullable_definition_level`：当前 node 自身为 NULL 时对应的 definition level。
+- `repeated_repetition_level`：当前或最近 repeated container 的 repetition level。
+
+`ParquetReader::get_schema()` 会把 `ParquetColumnSchema` 转成 `ColumnDefinition`：
+
+- 如果 `parquet_field_id >= 0`，`ColumnDefinition::identifier` 是 `TYPE_INT` field id。
+- 否则 `identifier` 是 `TYPE_STRING` name。
+- `ColumnDefinition::local_id` 是 `ParquetColumnSchema::local_id`。
+- children 递归转换。
+
+因此 table reader 可以按 field id 或 name 匹配，而 Parquet reader 自己仍只按 `local_id`、
+`leaf_column_id` 和 Dremel levels 读取数据。
+
+## 端到端流转
+
+一次 split 的列标识流转如下：
+
+1. `FileScannerV2::_build_projected_columns()`：
+   FE `slot_id` / `col_unique_id` 被翻译成 `GlobalIndex`，并生成 table-side
+   `ColumnDefinition`。
+2. `ParquetReader::init()`：
+   解析 Arrow Parquet schema，构造内部 `ParquetColumnSchema`。
+3. `ParquetReader::get_schema()`：
+   把 Parquet 内部 schema 暴露成 file-side `ColumnDefinition`。
+4. `TableReader::open_reader()`：
+   根据 file schema 是否带 int identifier 选择 BY_FIELD_ID 或 BY_NAME，并调用 mapper。
+5. `TableColumnMapper::create_mapping()`：
+   用 `ColumnDefinition::identifier` 匹配 table/global schema 和 file-local schema，生成
+   `ColumnMapping`。
+6. `TableColumnMapper::create_scan_request()`：
+   生成 `FileScanRequest`，其中所有 projection 和 block position 都是 file-local 的。
+7. `ParquetReader::open()`：
+   校验 `LocalColumnId`，用 `LocalColumnIndex` 创建 column readers，并规划 row group pruning。
+8. `ParquetScanScheduler`：
+   按 `local_positions` 把 predicate/non-predicate column 写入 file-local block。
+9. `TableReader` finalize：
+   使用 `ResultColumnMapping`、`ConstantMap` 和 projection expression，把 file-local block 转成
+   table/global output block。
+
+## 使用约定
+
+修改 new reader 代码时应遵守以下约定：
+
+- 不要在 table/file reader 层重新传递 FE column unique id。
+- 不要把 `ColumnDefinition::identifier` 当作 file reader 读取 id。
+- 不要把 `LocalColumnId` 当作 block position；block position 使用 `LocalIndex`。
+- 不要把 `LocalIndex` 当作 schema ordinal。
+- `LocalColumnIndex::index` 在 root 和 child 层含义不同，调用方必须知道当前 projection node
+  所在层级。
+- file reader 只能消费 `FileScanRequest`，不能理解 partition/default/generated/table schema。
+- column predicate pruning 是 file-layer hint，不等价于 row-level filter。
+- constant filter 可以在 table reader 层提前求值，但不应下推到 file reader。
+
+## 已知限制
+
+TVF 查询 Parquet 且文件没有 field id 时，top-level BY_NAME 已经可以通过 name identifier 工作。
+但 nested access path 的 fallback 目前仍有一处 TODO：STRUCT child fallback 使用 struct ordinal
+构造 int identifier。对于没有 field id 的 nested Parquet schema，BY_NAME 场景应保留 string
+identifier，让 `TableColumnMapper` 从 Parquet file schema 中按 name 解析 file-local child id。
+该问题已在 `be/src/exec/scan/file_scanner_v2.cpp` 代码中记录，当前未修复。
diff --git a/docs/new-parquet-reader-ut-improvement-plan.md b/docs/new-parquet-reader-ut-improvement-plan.md
new file mode 100644
index 00000000000000..4ece111d0d6323
--- /dev/null
+++ b/docs/new-parquet-reader-ut-improvement-plan.md
@@ -0,0 +1,325 @@
+# New Parquet Reader UT Improvement Plan
+
+本文档评估 Doris new parquet reader 当前 UT 覆盖方式，并给出更合理的测试分层、数据构造方法和落地优先级。
+
+目标不是追求形式上的 100% 行覆盖率，而是让测试能够发现 new parquet reader 最容易出错的真实问题：schema 兼容、definition/repetition level 物化、投影/过滤交互、row group/page pruning、delete predicate 以及 schema evolution 组合。
+
+## 当前覆盖方式评估
+
+当前测试分层大体合理：
+
+| 层级 | 代表文件 | 当前价值 |
+|---|---|---|
+| Schema resolver UT | `be/test/format_v2/parquet/parquet_schema_test.cpp` | 直接构造 Parquet schema node，验证 `ParquetColumnSchema` 的 kind、type、level 和非法 schema 拒绝。速度快，适合覆盖 schema 分支。 |
+| Type resolver UT | `be/test/format_v2/parquet/parquet_type_test.cpp` | 覆盖 physical/logical/converted type 到 Doris type 的映射。 |
+| Leaf value UT | `be/test/format_v2/parquet/parquet_leaf_reader_test.cpp` | 覆盖 nullable spacing、binary/fixed/bool/float16 等 leaf append 细节。 |
+| Column reader UT | `be/test/format_v2/parquet/parquet_column_reader_test.cpp` | 用 Arrow writer 生成真实 parquet 文件，覆盖 scalar/struct/list/map 的 read、skip、select、overflow。 |
+| File reader UT | `be/test/format_v2/parquet/parquet_reader_test.cpp` | 覆盖 open/read、多 row group、predicate selection、statistics/dictionary/page index pruning、row position、delete predicate。 |
+| Table reader UT | `be/test/format_v2/table_reader_test.cpp` | 覆盖 table schema 到 file schema mapping、aggregate pushdown、default value、Iceberg delete/virtual column 等跨层行为。 |
+
+这个方向是正确的，但目前有三个明显缺口：
+
+1. Schema 兼容测试和真实读取测试之间缺少桥接。`parquet_schema_test.cpp` 可以证明 legacy LIST/MAP schema 被解析成期望的 tree，但不能证明 `ListColumnReader`、`MapColumnReader` 可以正确消费对应 def/rep levels。
+2. 真实 parquet 文件主要由 Arrow writer 生成。Arrow 生成的文件通常符合标准 layout，不能充分代表 Hive、Spark、old parquet-mr、旧 Doris 或其它 legacy writer 的 schema 形态。
+3. 异常路径和组合路径覆盖不足。比如 optional map key 被 schema 接受后，真实数据中 key 为 null 必须在 materialize 阶段报错；key/value stream 不对齐、invalid repeated level、non-nullable complex column 读到 null 等 corruption 路径需要专门测试。
+
+## 改进原则
+
+1. 按风险分层测试，不用单一大 fixture 覆盖所有逻辑。
+2. Schema resolver 只验证 schema 归一化，不承担真实读取正确性的证明。
+3. Def/rep level materialization 要有直接单测，避免所有边界都依赖真实 parquet 文件构造。
+4. 对 legacy layout 使用 golden parquet corpus，而不是只用 Arrow writer 动态生成。
+5. Reader 集成测试覆盖跨模块行为，避免在 SQL regression 中验证过多 BE 内部细节。
+6. SQL regression 只保留用户可见和跨层最关键路径，避免回归测试过慢。
+
+## 推荐测试分层
+
+### L0: Schema Resolver Table-Driven UT
+
+位置：`be/test/format_v2/parquet/parquet_schema_test.cpp`
+
+职责：覆盖 `parquet_column_schema.cpp` 的 schema 归一化规则。建议把 LIST/MAP case 整理成 table-driven 形式，每个 case 明确：
+
+- 输入 schema layout
+- 是否成功
+- top-level kind/type/nullability
+- child kind/name/type/nullability
+- definition/repetition level
+- error message 关键字
+
+必须覆盖的 schema 形态：
+
+| 类别 | Case |
+|---|---|
+| LIST 标准格式 | Standard 3-level list: `optional group a (LIST) { repeated group list { optional int32 element; } }` |
+| LIST legacy | repeated primitive, repeated group named `array`, repeated group named `<list_name>_tuple`, repeated group with multiple children |
+| LIST wrapper 判定 | repeated group with logical annotation, repeated group whose only child is repeated, repeated group whose only child is optional scalar |
+| Bare repeated | repeated primitive field, repeated group field inside struct |
+| MAP 标准格式 | required/optional outer map, required/optional value |
+| MAP 兼容格式 | optional key accepted at schema level, `MAP_KEY_VALUE` converted annotation |
+| Invalid schema | LIST outer has zero/multiple children, non-repeated LIST child, MAP outer has zero/multiple children, primitive MAP entry, non-repeated MAP entry, entry child count not equal to 2, repeated outer LIST/MAP in normal mode |
+| Unsupported type | UTC TIME rejection, unsupported physical/logical type |
+
+L0 的验收标准：schema branch 新增或修改时，必须有对应 table-driven case；但 L0 通过不代表 reader 行为充分。
+
+### L1: Def/Rep Level Materializer UT
+
+位置建议：
+
+- `be/test/format_v2/parquet/parquet_nested_materializer_test.cpp`
+- 或拆分为 `parquet_list_column_reader_test.cpp`、`parquet_map_column_reader_test.cpp`
+
+职责：用 fake child reader 直接喂 definition levels、repetition levels 和 leaf values，验证 `ListColumnReader` / `MapColumnReader` 的 offsets、nullmap、child values、cursor 和错误路径。
+
+这种方式比构造真实 parquet 文件更适合覆盖边界，因为 def/rep level 是复杂类型 reader 的核心输入。
+
+建议增加测试工具：
+
+```cpp
+class FakeNestedColumnReader final : public ParquetColumnReader {
+public:
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+};
+```
+
+必须覆盖的 materialize case：
+
+| 类别 | Case |
+|---|---|
+| LIST 正常路径 | null list, empty list, list with values, list with null element, consecutive repeated elements |
+| LIST 操作 | read 分批、skip 后 read、select 非连续行、select 跨 overflow 边界 |
+| LIST 异常 | first level has `rep_level == list.repetition_level`, non-nullable LIST 读到 null, child value count 不匹配 |
+| MAP 正常路径 | null map, empty map, one entry, multiple entries, nullable value, complex value |
+| MAP 操作 | read 分批、skip 后 read、select 非连续行、value scalar path 和 complex value path |
+| MAP 异常 | null key, value stream ended before key stream, key/value repetition level 不对齐, key count 不匹配, value count 不匹配, non-nullable MAP 读到 null |
+
+L1 的验收标准：`ListColumnReader::build_nested_column()` 和 `MapColumnReader::build_nested_column()` 的主要分支必须有直接 UT；corruption path 不能只靠真实文件偶然触发。
+
+### L2: Golden Parquet Corpus UT
+
+位置建议：
+
+- 数据文件：`be/test/exec/test_data/parquet_v2_compat/`
+- 测试文件：`be/test/format_v2/parquet/parquet_compat_corpus_test.cpp`
+
+职责：保存小型真实 parquet 文件，覆盖非 Arrow 标准 writer 或难以用 Arrow writer 生成的 legacy layout。每个文件控制在几十行以内，配套记录 schema 来源和 expected output。
+
+建议文件来源：
+
+| 来源 | 覆盖目标 |
+|---|---|
+| Arrow writer | 标准 LIST/MAP、page v2、dictionary/plain、不同 row group/page size |
+| Spark | Spark nested list/map schema、nullable struct/list/map 混合 |
+| Hive/parquet-mr | legacy two-level list、optional map key、`array` / `bag` / `key_value` 等命名兼容 |
+| 手工生成 | malformed-but-parseable def/rep level edge case，或特殊 converted annotation |
+
+Golden 文件命名建议：
+
+```text
+be/test/exec/test_data/parquet_v2_compat/
+  list_two_level_repeated_primitive.parquet
+  list_tuple_struct_element.parquet
+  list_repeated_group_with_logical_map_element.parquet
+  map_optional_key_no_null.parquet
+  map_optional_key_with_null.parquet
+  map_value_list_nullable.parquet
+  nested_list_struct_map_list.parquet
+  README.md
+```
+
+每个 corpus case 至少验证：
+
+- `get_schema()` 输出是否符合预期
+- full read 输出是否符合预期
+- projection read 输出是否符合预期
+- skip/select 后输出是否符合预期
+- 预期失败文件是否返回明确错误
+
+L2 的验收标准：每一个 schema compatibility rule 至少有一个真实 parquet 文件证明 reader 可以消费该 layout。
+
+### L3: New Parquet Reader Integration UT
+
+位置：`be/test/format_v2/parquet/parquet_reader_test.cpp`
+
+职责：覆盖 file reader 层的组合行为，不重复 L1 的低层 def/rep 细节。
+
+建议补充或保留以下组合：
+
+| 类别 | Case |
+|---|---|
+| Projection + predicate | `SELECT s.b WHERE s.a > x` 对应 file-local projection 与 predicate projection 合并 |
+| Complex non-predicate select | predicate 过滤后，非谓词复杂列通过 selection vector 读取 |
+| Row group/page pruning + complex projection | page index 缩小 row ranges 后，list/map/struct 输出行数和 offsets 正确 |
+| Dictionary/statistics pruning | nested scalar leaf predicate 可 prune，但 repeated leaf 不做错误 aggregate/pruning |
+| Delete predicate | delete predicate 和 query predicate 同时作用时 row position、selection、输出列一致 |
+| Timestamp TZ | timestamp tz mapping 后 schema、read、min/max pushdown 一致 |
+| Reopen split | 同一个 reader reopen 不残留 selection、cast、predicate projection、page skip state |
+
+L3 的验收标准：跨 reader state 的行为必须有 UT，尤其是 reopen、filter 后 selection、page skip 后 output column 不 double skip。
+
+### L4: Table Reader And SQL Regression
+
+位置：
+
+- `be/test/format_v2/table_reader_test.cpp`
+- `regression-test/suites/external_table_p*_parquet/` 或现有 parquet 外表相关目录
+
+职责：覆盖用户可见行为和 FE/BE 接口组合，不在 regression 中验证 BE 内部 offset/nullmap 细节。
+
+建议保留少量高价值 SQL regression：
+
+| 场景 | SQL 覆盖 |
+|---|---|
+| Legacy LIST/MAP 文件可读 | `SELECT *`, `SELECT nested_child`, `WHERE nested_child predicate` |
+| Schema evolution | missing nested child with default, reordered/renamed nested field |
+| Predicate pushdown 正确性 | row group/page pruning 开关开启时结果与关闭时一致 |
+| Aggregate pushdown 正确性 | `count`, `min`, `max` 对 flat leaf 和 supported nested single leaf 正确；repeated leaf fallback |
+| Iceberg/Paimon delete | delete vector / position delete / equality delete 与 parquet reader 组合结果正确 |
+
+L4 的验收标准：新增用户可见兼容能力时必须有 SQL regression；纯内部 refactor 不强制补 SQL regression，但需要 L0-L3 覆盖。
+
+## 覆盖矩阵
+
+下面的矩阵用于判断新改动应该补哪一层测试。
+
+| 逻辑区域 | L0 Schema | L1 Def/Rep | L2 Corpus | L3 Reader | L4 SQL |
+|---|---:|---:|---:|---:|---:|
+| Parquet type mapping | 必须 | 不需要 | 可选 | 可选 | 可选 |
+| LIST/MAP schema compatibility | 必须 | 可选 | 必须 | 可选 | 必须覆盖用户可见新增能力 |
+| Bare repeated field | 必须 | 必须 | 必须 | 可选 | 可选 |
+| List offsets/nullmap | 不足 | 必须 | 必须 | 必须 | 可选 |
+| Map offsets/nullmap/key validation | 不足 | 必须 | 必须 | 必须 | 可选 |
+| Projection pruning | 可选 | 可选 | 必须 | 必须 | 必须覆盖用户可见路径 |
+| Predicate selection | 不需要 | 可选 | 可选 | 必须 | 必须覆盖关键路径 |
+| Statistics/dictionary/page pruning | 不需要 | 不需要 | 可选 | 必须 | 结果一致性必须 |
+| Aggregate pushdown | 不需要 | 不需要 | 可选 | 必须 | 必须 |
+| Delete predicate / row position | 不需要 | 不需要 | 可选 | 必须 | Iceberg/Paimon 必须 |
+| Error/corruption path | 必须覆盖 schema error | 必须覆盖 materialize error | 必须覆盖真实坏文件 | 可选 | 可选 |
+
+## 推荐优先级
+
+### P0: 立即补齐的正确性保护
+
+1. 为 legacy LIST schema 增加真实读取 corpus：
+   - repeated primitive list
+   - `<list_name>_tuple` struct element
+   - repeated group with multiple children
+2. 为 optional MAP key 增加两类真实读取：
+   - optional key 但所有 key 非 null，读取成功
+   - optional key 且存在 null key，读取失败并包含 `contains null key`
+3. 增加 fake def/rep level materializer UT：
+   - list null/empty/null element/multi element
+   - map null/empty/null value/multi entry/null key
+4. 增加 skip/select 覆盖：
+   - legacy list corpus 上执行 skip/select
+   - map value list 或 list struct map list 上执行 select
+
+### P1: 组合路径保护
+
+1. Projection + predicate 同时命中同一 nested struct 的不同 child。
+2. Page index pruning 后读取 complex output column，验证没有 double skip。
+3. Row group statistics/dictionary pruning 后从后续 row group 读取 nested column。
+4. Reopen split 后 predicate projection、selection vector、page skip plan 不残留。
+
+### P2: 完整性和长期质量
+
+1. 建立 `parquet_v2_compat` corpus README，记录文件生成方式、writer 版本、schema、预期行为。
+2. 对 changed files 定期跑 coverage，关注 branch coverage，不只看 line coverage。
+3. 对 schema resolver 增加 table-driven case，减少散落 assert。
+4. 对 materializer 增加 fuzz/property-style 小范围测试：随机生成合法 list/map rows，转换为 def/rep levels 后读回比较原始 logical rows。
+
+## 测试数据构造建议
+
+### 动态生成数据
+
+适合：
+
+- Arrow 标准 schema
+- row group/page size 控制
+- dictionary/plain/page index/statistics 行为
+- type mapping 常规 case
+
+优点是无需维护二进制文件，case 可读性高。
+
+缺点是不能覆盖大量 legacy writer layout。
+
+### Golden parquet 文件
+
+适合：
+
+- Hive/Spark/parquet-mr legacy LIST/MAP schema
+- Arrow writer 不容易生成的 converted annotation
+- malformed-but-parseable 文件
+- 兼容性回归保护
+
+要求：
+
+1. 文件尽量小，通常 3 到 20 行。
+2. 配套 README 说明生成命令、writer 版本、schema、逻辑数据。
+3. 不在 UT 中依赖外部网络或外部服务。
+4. 预期结果在 C++ UT 中直接断言，SQL regression 的 `.out` 仍由 regression 脚本生成。
+
+### Fake reader 数据
+
+适合：
+
+- def/rep level 边界
+- corruption path
+- cursor/overflow 状态
+- non-nullable output 遇到 null
+
+要求：
+
+1. fake reader 只模拟 `ParquetColumnReader` 必需接口。
+2. 每个 case 明确输入 levels 和 expected logical rows。
+3. 错误 case 检查 `Status` 类型和关键错误文本。
+
+## 验收标准
+
+一个 new parquet reader 改动合入前，建议满足：
+
+1. 改动 schema resolver：至少补 L0；如果新增兼容能力，补 L2；如果用户可见，补 L4。
+2. 改动 list/map/struct reader：至少补 L1 和 L3；涉及 legacy layout 时补 L2。
+3. 改动 pruning/predicate/aggregate：至少补 L3；用户可见 SQL 语义补 L4。
+4. 改动 table reader mapping/schema evolution：至少补 `table_reader_test.cpp`，必要时补 L4。
+5. 新增 error handling：必须有负向 UT，不能只依赖代码审查。
+
+推荐执行命令：
+
+```bash
+./run-be-ut.sh --run '--filter=ParquetSchemaTest.*'
+./run-be-ut.sh --run '--filter=ParquetColumnReaderTest.*:NewParquetReaderTest.*:ParquetScanTest.*'
+./run-be-ut.sh --run '--filter=TableReaderTest.*'
+```
+
+对重要重构或发布前验证，建议执行：
+
+```bash
+./run-be-ut.sh --run '--filter=Parquet*:*TableReaderTest*' --coverage
+```
+
+如果本地工具链无法执行 UT，需要在提交说明或 PR 中明确说明失败原因，并在 CI 或可用环境补跑。
+
+## 不建议的方式
+
+1. 不建议用更多 schema-only case 替代真实读取 case。schema 正确不等于 reader 正确。
+2. 不建议只用 Arrow writer 动态生成文件证明 compatibility。兼容性问题通常来自非 Arrow writer。
+3. 不建议把所有复杂类型组合塞进一个巨大 fixture 后只断言少量输出。失败定位困难，覆盖意图不清晰。
+4. 不建议把内部 def/rep level 边界全部放到 SQL regression。执行慢、定位差、难覆盖异常路径。
+5. 不建议用 100% line coverage 作为合入门槛。更合理的是 changed branch coverage + 风险矩阵覆盖。
+
+## 最小落地计划
+
+第一阶段只需要完成 P0：
+
+1. 新增 `parquet_nested_materializer_test.cpp`，覆盖 list/map def/rep 核心正常和异常路径。
+2. 新增 `be/test/exec/test_data/parquet_v2_compat/README.md` 和 4 到 6 个小型 golden parquet 文件。
+3. 新增 `parquet_compat_corpus_test.cpp`，对 golden 文件做 schema/full read/projection/skip/select 断言。
+4. 将现有 `parquet_schema_test.cpp` 中 LIST/MAP schema case 整理为 table-driven 或至少按类别分组。
+
+完成第一阶段后，才能较有信心地说 new parquet reader 的关键逻辑有有效测试保护；否则当前 UT 只能证明主路径和部分 schema 分支，不能充分发现 legacy compatibility 和 complex materialization 的问题。
diff --git a/docs/parquet-list-map-compat-design.md b/docs/parquet-list-map-compat-design.md
new file mode 100644
index 00000000000000..a02ca6e822aaf0
--- /dev/null
+++ b/docs/parquet-list-map-compat-design.md
@@ -0,0 +1,664 @@
+# Parquet LIST/MAP Compatibility Design
+
+本文描述如何参考 Arrow Parquet 的 LIST/MAP 兼容策略，在 Doris new parquet reader 中支持更多 Parquet 标准和 legacy 复杂类型 schema。
+
+目标不是改变 `ListColumnReader` / `MapColumnReader` 的读取模型，而是在 schema 构建阶段把不同物理 schema 归一化成 Doris 当前 reader 可以消费的统一 `ParquetColumnSchema` tree。
+
+## 背景
+
+Parquet 的复杂类型是通过 group schema、logical/converted annotation、definition levels 和 repetition levels 共同表达的。
+
+标准 LIST/MAP schema 比较明确，但历史 writer 产生过多种 legacy 形态。例如 LIST 可能缺少标准 `list.element` wrapper，MAP entry group 可能叫 `key_value`、`entries` 或其它名字。
+
+Arrow C++ 的处理思路是：
+
+1. 在 Parquet schema conversion 阶段识别标准和 legacy schema。
+2. 将这些 schema 归一化为 Arrow `ListType` / `MapType` / `StructType`。
+3. 后续 reader 只消费归一化后的 nested field tree，不在读取阶段继续判断 legacy schema 名字。
+
+Doris new parquet reader 应采用相同边界：
+
+1. `parquet_column_schema.cpp` 负责兼容不同 LIST/MAP physical schema。
+2. `ParquetColumnSchema` 输出统一的 LIST/MAP child tree。
+3. `ListColumnReader` / `MapColumnReader` / `ParquetLeafReader` 不感知 legacy schema 形态。
+
+## 当前 Doris 限制
+
+当前 `build_node_schema()` 的 LIST 分支只支持标准 3-level LIST：
+
+```text
+optional group a (LIST) {
+  repeated group list {
+    optional int32 element;
+  }
+}
+```
+
+当前限制：
+
+- outer LIST group 必须只有一个 child。
+- repeated child 必须是 group。
+- repeated group 必须只有一个 child。
+- 不支持 repeated primitive list。
+- 不支持 repeated group 多字段 struct element。
+- 不支持 `array` / `<parent>_tuple` 这类 legacy structural name。
+
+当前 MAP 分支支持标准 MAP 结构：
+
+```text
+optional group m (MAP) {
+  repeated group key_value {
+    required binary key;
+    optional int32 value;
+  }
+}
+```
+
+当前限制：
+
+- outer MAP group 必须只有一个 child。
+- entry child 必须 repeated group。
+- entry group 必须正好两个 children。
+- key 必须 required。
+- 不支持 key-only map。
+- 不支持没有 repeated entry layer 的非标准 MAP。
+
+## 设计原则
+
+1. 兼容逻辑只放在 schema 构建阶段。
+2. reader 层继续消费统一 schema tree。
+3. 不支持会改变 reader model 的格式，例如没有 repeated entry layer 的 MAP。
+4. 第一阶段不支持 key-only map，因为 Doris `ColumnMap` 需要 values column。
+5. 对容易误判的 schema 保持严格，避免把普通 struct 错解析成 LIST/MAP。
+6. 支持范围对齐 Arrow 的稳定 legacy compatibility 规则，而不是无限放宽。
+
+MAP projection 语义也保持收敛：
+
+- partial MAP projection 只表示 value subtree pruning，例如 `MAP<K, STRUCT<a,b>>` 投影 `value.b` 后输出 `MAP<K, STRUCT<b>>`。
+- key 不作为可裁剪 projection 子树。reader 始终读取完整 key stream，因为 key stream 决定 entry existence、offsets，并且 key 本身承载 MAP 的 key equality 语义。
+- schema projection 重建 `DataTypeMap` 时保留原始 key type，只根据 projected value child 重建 value type。
+
+## LIST 兼容规则
+
+对于 outer group annotated as `LIST`：
+
+```text
+optional group a (LIST) {
+  repeated ... repeated_child;
+}
+```
+
+先要求：
+
+- outer LIST group 必须只有一个 child。
+- child 必须是 repeated。
+
+然后根据 repeated child 形态判断 element schema node。
+
+### 1. 标准 3-level LIST
+
+```text
+optional group a (LIST) {
+  repeated group list {
+    optional int32 element;
+  }
+}
+```
+
+解析：
+
+- repeated child 是 wrapper。
+- element 是 wrapper 的唯一 child：`list.element`。
+- `ParquetColumnSchema(LIST).children[0]` 指向 element schema。
+
+### 2. Repeated primitive legacy LIST
+
+```text
+optional group a (LIST) {
+  repeated int32 element;
+}
+```
+
+解析：
+
+- repeated primitive 本身是 element。
+- element 本身不 nullable，因为 repeated primitive 不提供额外 optional element level。
+- array 自身 nullable 仍由 outer LIST group 决定。
+
+### 3. Repeated group as struct element
+
+```text
+optional group a (LIST) {
+  repeated group element {
+    optional int32 x;
+    optional binary y;
+  }
+}
+```
+
+解析：
+
+- repeated group 有多个 children。
+- repeated group 本身是 element。
+- element type 是 `STRUCT<x, y>`。
+
+### 4. Legacy structural name
+
+Arrow 会将某些名字视作 structural element，而不是标准 wrapper。
+
+```text
+optional group a (LIST) {
+  repeated group array {
+    optional int32 item;
+  }
+}
+```
+
+```text
+optional group a (LIST) {
+  repeated group a_tuple {
+    optional int32 item;
+  }
+}
+```
+
+解析：
+
+- repeated group 名为 `array`，或名为 `<list_name>_tuple`。
+- repeated group 本身是 element。
+- 即使它只有一个 child，也不要剥掉这一层。
+
+### 5. One-child repeated group wrapper
+
+```text
+optional group a (LIST) {
+  repeated group list {
+    optional int32 element;
+  }
+}
+```
+
+如果 repeated group 只有一个 child，且不是 legacy structural name，则按 wrapper 处理：
+
+- element 是 repeated group 的唯一 child。
+
+但这里不能只按 child 数量判断。需要额外保持 Arrow / parquet-format 的 backward compatibility 规则：
+
+- 如果 repeated group 自身带 `LIST` 或 `MAP` annotation，则 repeated group 本身是 element，不剥 wrapper。
+- 如果 repeated group 的唯一 child 也是 repeated，则 repeated group 本身是 element，不剥 wrapper。
+- 只有当 repeated group 无 logical annotation、唯一 child 非 repeated、且不是 legacy structural name 时，才把它当作标准 wrapper 剥掉。
+
+这样可以避免把 two-level `List<List<T>>`、two-level `List<Map<K, V>>` 或单字段 repeated struct element 错解析成少一层的结构。
+
+## LIST schema resolver
+
+建议在 `parquet_column_schema.cpp` 中新增 helper：
+
+```cpp
+struct ListElementResolution {
+    const parquet::schema::Node* repeated_node = nullptr;
+    const parquet::schema::Node* element_node = nullptr;
+    SchemaBuildContext repeated_context;
+    SchemaBuildContext element_context;
+    bool element_is_repeated_node = false;
+};
+
+Status resolve_list_element_node(
+        const parquet::SchemaDescriptor& schema,
+        const parquet::schema::GroupNode& list_group,
+        const SchemaBuildContext& list_context,
+        ListElementResolution* result);
+```
+
+Resolver 逻辑：
+
+```text
+if list_group.field_count != 1:
+    reject
+
+repeated_node = list_group.field(0)
+if !repeated_node.is_repeated:
+    reject
+
+repeated_context = child_context(list_context, repeated_node, 0)
+
+if repeated_node.is_primitive:
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+repeated_group = as_group(repeated_node)
+if repeated_group.field_count == 0:
+    reject
+
+if repeated_group.field_count > 1:
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+if has_structural_list_name(list_group.name, repeated_group.name):
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+if repeated_group has LIST or MAP annotation:
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+only_child = repeated_group.field(0)
+if only_child.is_repeated:
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+element_node = only_child
+element_context = child_context(repeated_context, only_child, 0)
+element_is_repeated_node = false
+```
+
+`has_structural_list_name()` 对齐 Arrow 的 legacy rule：
+
+```text
+name == "array" || name == list_name + "_tuple"
+```
+
+## LIST schema build
+
+`build_node_schema()` 的 LIST 分支改为：
+
+```text
+resolve_list_element_node(...)
+
+column_schema.kind = LIST
+column_schema.definition_level = repeated_context.definition_level
+column_schema.repetition_level = repeated_context.repetition_level
+column_schema.repeated_repetition_level = repeated_context.repeated_repetition_level
+
+build child schema from resolved element_node and element_context
+column_schema.type = nullable_if_needed(DataTypeArray(child.type), list_node)
+column_schema.children = [child]
+propagate_child_levels(column_schema)
+```
+
+### repeated group itself as element
+
+当 element 是 repeated group 本身时，需要注意不要把这个 repeated group 再解释成一层 LIST。
+
+预期效果：
+
+```text
+optional group a (LIST) {
+  repeated group element {
+    optional int32 x;
+    optional binary y;
+  }
+}
+```
+
+应构造成：
+
+```text
+LIST
+  child: STRUCT<x, y>
+```
+
+而不是：
+
+```text
+LIST
+  child: LIST or extra repeated container
+```
+
+实现上可以新增一个 internal build mode：
+
+```cpp
+enum class SchemaBuildMode {
+    NORMAL,
+    REPEATED_GROUP_AS_LIST_ELEMENT,
+};
+```
+
+当 mode 是 `REPEATED_GROUP_AS_LIST_ELEMENT`：
+
+- 当前 repeated group 作为 element 本身构造成 STRUCT 或 annotated logical type。
+- 它的 repeated level 已经由 list entry 层消费，不再把 repeated 当作额外 array 层。
+- 如果当前 repeated group 是普通 group，则构造成 `STRUCT` element。
+- 如果当前 repeated group 带 `LIST` annotation，则继续按 LIST 解析它的 child repeated layer，构造成 nested list element。
+- 如果当前 repeated group 带 `MAP` 或 `MAP_KEY_VALUE` annotation，则继续按 MAP 解析它的 child repeated entry layer，构造成 map element。
+- 构造当前 element schema 时，不得再次因为“当前节点本身是 repeated”引入隐式 list；只有它内部的 child repeated layer 才能产生下一层 list/map repetition 语义。
+
+如果希望保持改动更小，也可以新增专用函数：
+
+```cpp
+Status build_repeated_group_as_list_element_schema(...);
+```
+
+该函数至少需要处理 repeated group 作为普通 struct element 的场景；如果选择不用通用 build mode，则还需要显式覆盖 repeated group annotated as LIST/MAP 的场景。
+
+## MAP 兼容规则
+
+对于 outer group annotated as `MAP` 或 legacy `MAP_KEY_VALUE`：
+
+```text
+optional group m (MAP) {
+  repeated group entries {
+    required binary key;
+    optional int32 value;
+  }
+}
+```
+
+支持：
+
+- 只有 outer group 带 `MAP` / `MAP_KEY_VALUE` annotation 时，才进入 MAP 兼容解析。
+- entry group 名字可以是 `key_value`、`entries` 或其它。
+- key/value 字段名不强制必须叫 `key` / `value`。
+- 第一个 child 是 key。
+- 第二个 child 是 value。
+- key 必须 required。
+- value 可以 required 或 optional。
+
+不支持：
+
+- outer MAP group 多个 children。
+- entry child 非 repeated。
+- entry child 是 primitive。
+- entry group 没有 value，即 key-only map。
+- 没有 repeated entry layer 的 MAP。
+- nullable key。
+
+## MAP schema resolver
+
+建议新增 helper：
+
+```cpp
+struct MapEntryResolution {
+    const parquet::schema::GroupNode* entry_group = nullptr;
+    SchemaBuildContext entry_context;
+};
+
+Status resolve_map_entry_group(
+        const parquet::schema::GroupNode& map_group,
+        const SchemaBuildContext& map_context,
+        MapEntryResolution* result);
+```
+
+Resolver 逻辑：
+
+```text
+if map_group.field_count != 1:
+    reject
+
+entry_node = map_group.field(0)
+if !entry_node.is_repeated:
+    reject
+if entry_node.is_primitive:
+    reject
+
+entry_group = as_group(entry_node)
+if entry_group.field_count != 2:
+    reject
+
+key_node = entry_group.field(0)
+value_node = entry_group.field(1)
+if key_node.repetition != REQUIRED:
+    reject
+
+entry_context = child_context(map_context, entry_node, 0)
+return
+```
+
+## MAP schema build
+
+`build_node_schema()` 的 MAP 分支应和 LIST 一样在 schema 构建阶段折叠物理 wrapper。
+`key_value` / `entries` / 任意合法 entry group 只用于解析 repeated entry level，不出现在
+最终 `ParquetColumnSchema.children` 中：
+
+```text
+MAP
+  child[0]: key
+  child[1]: value
+```
+
+构造流程：
+
+```text
+resolve_map_entry_group(...)
+
+column_schema.kind = MAP
+column_schema.definition_level = entry_context.definition_level
+column_schema.repetition_level = entry_context.repetition_level
+column_schema.repeated_repetition_level = entry_context.repeated_repetition_level
+
+build key child from entry_group.field(0)
+build value child from entry_group.field(1)
+
+column_schema.type = nullable_if_needed(DataTypeMap(nullable(key.type), nullable(value.type)), map_node)
+column_schema.children = [key_schema, value_schema]
+propagate_child_levels(column_schema)
+```
+
+这里保持 `MapColumnReader` 的直接 key/value 假设：
+
+- `column_schema.children[0]` 是 key。
+- `column_schema.children[1]` 是 value。
+- MAP node 自身保存 entry repeated group 的 `definition_level` / `repetition_level` /
+  `repeated_repetition_level`，用于 materialize offsets、null map 和 empty map。
+
+注意：`DataTypeMap` 中把 key type 包成 nullable 是 Doris nested column materialization 的内部类型约定，不代表 Parquet nullable key 被支持。Schema resolver 仍必须在 `key_node.repetition != REQUIRED` 时 reject。
+
+## 不支持 key-only map 的原因
+
+Key-only map 可能长这样：
+
+```text
+optional group m (MAP) {
+  repeated group entries {
+    required binary key;
+  }
+}
+```
+
+理论上可以解释为 set-like map 或 `MAP<K, NULL>`，但 Doris `ColumnMap` 需要 keys column 和 values column。
+
+若要支持，需要额外设计：
+
+- synthetic null value schema。
+- constant-null value reader。
+- `MapColumnReader` value stream 缺失时的特殊路径。
+
+这会改变 reader tree，不属于本次 schema compatibility 的最小范围。因此第一阶段明确 reject。
+
+## 不支持 no-entry MAP 的原因
+
+No-entry MAP 可能长这样：
+
+```text
+optional group m (MAP) {
+  required binary key;
+  optional int32 value;
+}
+```
+
+它缺少 repeated entry layer，因此没有 repetition level 可以表达多个 map entries，也无法生成 Doris `ColumnMap` offsets。
+
+这不是标准 MAP，也不是 Arrow 主要兼容的 legacy 形态。第一阶段应 reject。
+
+## 对 reader 层的影响
+
+预期不修改 reader 层核心逻辑。
+
+保持：
+
+- `ListColumnReader` 只读取 `column_schema.children[0]` 作为 element reader。
+- `MapColumnReader` 读取 `column_schema.children[0/1]` 作为 key/value reader。
+- `MapColumnReader` 对 partial MAP projection 只接受 value child projection，显式 key child projection 应 reject；即使只裁剪 value，reader 也必须完整读取 key stream。
+- `ParquetLeafReader` 只负责 leaf records/levels/values 读取和 batch materialization。
+- `nested_column_materializer.*` 只负责 Doris nested Column 构造 helper。
+
+风险点在 LIST repeated group as element：
+
+- 如果该 repeated group 是 struct element，需要确保 schema builder 不把 repeated group 再解释成一个额外 repeated container。
+- 这个风险应通过专用 build mode 或专用 helper 解决。
+
+## 错误处理策略
+
+错误信息应明确指出具体 unsupported schema 原因：
+
+- LIST outer group child count invalid。
+- LIST child is not repeated。
+- LIST repeated group has no child。
+- MAP outer group child count invalid。
+- MAP entry is not repeated group。
+- MAP entry child count is not 2。
+- MAP key is nullable。
+
+不要用过于笼统的 `Unsupported parquet LIST encoding` 覆盖所有错误，否则后续排查文件兼容性问题会困难。
+
+## 测试计划
+
+### LIST 正例
+
+1. 标准 3-level LIST：
+
+```text
+optional group a (LIST) {
+  repeated group list {
+    optional int32 element;
+  }
+}
+```
+
+2. Repeated primitive legacy LIST：
+
+```text
+optional group a (LIST) {
+  repeated int32 element;
+}
+```
+
+3. Repeated group struct element：
+
+```text
+optional group a (LIST) {
+  repeated group element {
+    optional int32 x;
+    optional binary y;
+  }
+}
+```
+
+4. Legacy `array` name：
+
+```text
+optional group a (LIST) {
+  repeated group array {
+    optional int32 item;
+  }
+}
+```
+
+5. Legacy `<parent>_tuple` name：
+
+```text
+optional group a (LIST) {
+  repeated group a_tuple {
+    optional int32 item;
+  }
+}
+```
+
+6. Repeated group annotated as nested LIST：
+
+```text
+optional group a (LIST) {
+  repeated group array (LIST) {
+    repeated int32 array;
+  }
+}
+```
+
+预期解析为 `ARRAY<ARRAY<INT>>`，不要剥掉 `array (LIST)` 这一层。
+
+7. Repeated group annotated as MAP：
+
+```text
+optional group a (LIST) {
+  repeated group array (MAP) {
+    repeated group key_value {
+      required binary key;
+      optional int32 value;
+    }
+  }
+}
+```
+
+预期解析为 `ARRAY<MAP<STRING, INT>>`，不要剥掉 `array (MAP)` 这一层。
+
+8. One-child repeated group whose child is repeated：
+
+```text
+optional group a (LIST) {
+  repeated group element {
+    repeated int32 items;
+  }
+}
+```
+
+预期 repeated group 本身是 struct element，解析为 `ARRAY<STRUCT<items: ARRAY<INT>>>`，不要把 `items` 提升成 list element。
+
+### LIST 反例
+
+1. outer LIST group 多 child。
+2. outer LIST child 非 repeated。
+3. repeated group 无 child。
+4. repeated LIST-annotated outer group，除非它作为 another two-level LIST 的 element 被专门支持。
+
+### MAP 正例
+
+1. 标准 `key_value` entry group。
+2. `entries` entry group name。
+3. entry group 任意名字，但结构为 repeated group with required key and value。
+4. `MAP_KEY_VALUE` legacy converted type。
+5. key/value 字段名非 `key`/`value`，但位置正确。
+
+### MAP 反例
+
+1. nullable key。
+2. outer MAP group 多 child。
+3. entry child 非 repeated。
+4. entry child 是 primitive。
+5. key-only map。
+6. no-entry MAP。
+
+## 实施步骤
+
+1. 在 `parquet_column_schema.cpp` 增加 LIST helper：
+   - `has_structural_list_name()`
+   - `resolve_list_element_node()`
+   - 必要时增加 repeated group as element 的 build helper。
+2. 改造 LIST 分支，输出统一 `ParquetColumnSchemaKind::LIST` schema tree。
+3. 增加 LIST schema/unit/regression 测试。
+   - 覆盖 repeated primitive、multi-field struct element、`array` / `<parent>_tuple` structural name。
+   - 覆盖 two-level `List<List<T>>`、two-level `List<Map<K, V>>`、单 child repeated group 且 child repeated 的 struct element。
+   - read 测试至少覆盖 null list、empty list、单元素、多元素，验证 def/rep materialization。
+4. 增加 MAP helper：
+   - `resolve_map_entry_group()`
+5. 改造 MAP 分支，放宽 entry group 名字限制，但保持 key/value 结构严格，并在 schema build 阶段折叠 entry wrapper，输出 `MAP -> key,value`。
+6. 增加 MAP schema/unit/regression 测试。
+   - 覆盖 entry group 名字兼容。
+   - 覆盖 `ParquetColumnSchema(MAP).children == [key, value]`。
+   - 覆盖 partial MAP projection 只允许 value child，key child projection reject。
+7. 如后续确有需求，再单独设计 key-only map 或 key subtree projection 支持。
+
+## 预期收益
+
+- 支持更多由 Arrow、Spark、Hive、旧 Parquet writer 产生的 LIST/MAP schema。
+- 兼容逻辑集中在 schema builder，reader 层保持稳定。
+- 为后续 complex parquet reader 的兼容性测试建立清晰边界。
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index 17a742b835a4fb..27698c2d1f9700 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -47,6 +47,7 @@
 import org.apache.doris.thrift.TFileRangeDesc;
 import org.apache.doris.thrift.TPaimonDeletionFileDesc;
 import org.apache.doris.thrift.TPaimonFileDesc;
+import org.apache.doris.thrift.TPaimonReaderType;
 import org.apache.doris.thrift.TPushAggOp;
 import org.apache.doris.thrift.TTableFormatFileDesc;
 
@@ -262,8 +263,10 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit)
             rangeDesc.setFormatType(TFileFormatType.FORMAT_JNI);
             // Use Paimon native serialization for paimon-cpp reader
             if (sessionVariable.isEnablePaimonCppReader() && split instanceof DataSplit) {
+                fileDesc.setReaderType(TPaimonReaderType.PAIMON_CPP);
                 fileDesc.setPaimonSplit(PaimonUtil.encodeDataSplitToString((DataSplit) split));
             } else {
+                fileDesc.setReaderType(TPaimonReaderType.PAIMON_JNI);
                 fileDesc.setPaimonSplit(PaimonUtil.encodeObjectToString(split));
             }
             // Set table location for paimon-cpp reader
@@ -274,6 +277,7 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit)
             rangeDesc.setSelfSplitWeight(paimonSplit.getSelfSplitWeight());
         } else {
             // use native reader
+            fileDesc.setReaderType(TPaimonReaderType.PAIMON_NATIVE);
             if (fileFormat.equals("orc")) {
                 rangeDesc.setFormatType(TFileFormatType.FORMAT_ORC);
             } else if (fileFormat.equals("parquet")) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index d017b921efd79e..4b72bdf3390c56 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -96,6 +96,7 @@ public class SessionVariable implements Serializable, Writable {
     public static final String SCAN_QUEUE_MEM_LIMIT = "scan_queue_mem_limit";
     public static final String MAX_SCANNERS_CONCURRENCY = "max_scanners_concurrency";
     public static final String MAX_FILE_SCANNERS_CONCURRENCY = "max_file_scanners_concurrency";
+    public static final String ENABLE_FILE_SCANNER_V2 = "enable_file_scanner_v2";
     public static final String MIN_SCANNERS_CONCURRENCY = "min_scanners_concurrency";
     public static final String MIN_FILE_SCANNERS_CONCURRENCY = "min_file_scanners_concurrency";
     public static final String MIN_SCAN_SCHEDULER_CONCURRENCY = "min_scan_scheduler_concurrency";
@@ -1137,6 +1138,11 @@ public static double getHotValueThreshold() {
             "FileScanNode 扫描数据的最大并发，默认为 16", "The max threads to read data of FileScanNode, default 16"})
     public int maxFileScannersConcurrency = 16;
 
+    @VarAttrDef.VarAttr(name = ENABLE_FILE_SCANNER_V2, needForward = true, description = {
+            "开启后 FileScanNode 会在支持的查询场景使用 FileScannerV2，默认关闭",
+            "When enabled, FileScanNode uses FileScannerV2 for supported query scans. Disabled by default."})
+    public boolean enableFileScannerV2 = true;
+
     @VarAttrDef.VarAttr(name = LOCAL_EXCHANGE_FREE_BLOCKS_LIMIT)
     public int localExchangeFreeBlocksLimit = 4;
 
@@ -2940,10 +2946,9 @@ public static boolean isEagerAggregationOnJoin() {
     public static final String ENABLE_MC_LIMIT_SPLIT_OPTIMIZATION = "enable_mc_limit_split_optimization";
     @VarAttrDef.VarAttr(
             name = ENABLE_EXTERNAL_TABLE_BATCH_MODE,
-            fuzzy = true,
             description = {"使能外表的 batch mode 功能", "Enable the batch mode function of the external table."},
             needForward = true)
-    public boolean enableExternalTableBatchMode = true;
+    public boolean enableExternalTableBatchMode = false;
 
     @VarAttrDef.VarAttr(
             name = ENABLE_MC_LIMIT_SPLIT_OPTIMIZATION,
@@ -3907,13 +3912,6 @@ private void setFuzzyForCatalog(Random random) {
         this.hiveTextCompression = Util.getRandomString(
                 "gzip", "defalte", "bzip2", "zstd", "lz4", "lzo", "snappy", "plain");
 
-        // batch mode
-        this.enableExternalTableBatchMode = random.nextBoolean();
-        if (this.enableExternalTableBatchMode) {
-            this.numPartitionsInBatchMode = Util.getRandomInt(0, 1024, Integer.MAX_VALUE);
-            this.numFilesInBatchMode = Util.getRandomInt(0, 1024, Integer.MAX_VALUE);
-        }
-
         // common
         this.enableCountPushDownForExternalTable = random.nextBoolean();
     }
@@ -5473,6 +5471,7 @@ public TQueryOptions toThrift() {
         tResult.setScanQueueMemLimit(maxScanQueueMemByte);
         tResult.setMaxScannersConcurrency(maxScannersConcurrency);
         tResult.setMaxFileScannersConcurrency(maxFileScannersConcurrency);
+        tResult.setEnableFileScannerV2(enableFileScannerV2);
         tResult.setMaxColumnReaderNum(maxColumnReaderNum);
         tResult.setParallelPrepareThreshold(parallelPrepareThreshold);
         tResult.setMinScannersConcurrency(minScannersConcurrency);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java
index 370e4965765854..4d140b2ba57037 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java
@@ -47,6 +47,7 @@ public void testAnalyzeFileFormatProperties() {
 
         Assert.assertEquals(TParquetCompressionType.SNAPPY, parquetFileFormatProperties.getParquetCompressionType());
         Assert.assertEquals(false, parquetFileFormatProperties.isParquetDisableDictionary());
+        Assert.assertTrue(parquetFileFormatProperties.isEnableInt96Timestamps());
     }
 
     @Test
@@ -139,6 +140,7 @@ public void testFullTResultFileSinkOptions() {
         parquetFileFormatProperties.fullTResultFileSinkOptions(sinkOptions);
         Assert.assertEquals(parquetFileFormatProperties.getParquetCompressionType(), sinkOptions.getParquetCompressionType());
         Assert.assertEquals(parquetFileFormatProperties.isParquetDisableDictionary(), sinkOptions.isParquetDisableDictionary());
+        Assert.assertEquals(parquetFileFormatProperties.isEnableInt96Timestamps(), sinkOptions.isEnableInt96Timestamps());
     }
 
     @Test
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index c17199d74edf91..a17cd140c93418 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -88,6 +88,10 @@ enum TExprNodeType {
   TRY_CAST_EXPR = 41
   // for search DSL function
   SEARCH_EXPR = 42,
+  // Normal predicate expression
+  PREDICATE = 43,
+  // Normal literal
+  LITERAL = 44,
 }
 
 //enum TAggregationOp {
diff --git a/gensrc/thrift/Opcodes.thrift b/gensrc/thrift/Opcodes.thrift
index 1e4002357e7599..a2d709799482eb 100644
--- a/gensrc/thrift/Opcodes.thrift
+++ b/gensrc/thrift/Opcodes.thrift
@@ -97,4 +97,6 @@ enum TExprOpcode {
     MATCH_REGEXP = 76,
     MATCH_PHRASE_EDGE = 77,
     TRY_CAST = 78,
+    // Delete operator from Iceberg/Paimon
+    DELETE = 79,
 }
diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift
index 0d8618dbc78a0f..5ace9d75e24c08 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -505,6 +505,7 @@ struct TQueryOptions {
   // In read path, read from file cache or remote storage when execute query.
   1000: optional bool disable_file_cache = false
   1001: optional i32 file_cache_query_limit_percent = -1
+  1002: optional bool enable_file_scanner_v2 = false
 }
 
 
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 4f8826bdb3f6f4..86be200f778f1c 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -355,6 +355,12 @@ struct TPaimonDeletionFileDesc {
     3: optional i64 length;
 }
 
+enum TPaimonReaderType {
+    PAIMON_NATIVE = 0,
+    PAIMON_JNI = 1,
+    PAIMON_CPP = 2,
+}
+
 struct TPaimonFileDesc {
     1: optional string paimon_split
     2: optional string paimon_column_names
@@ -372,6 +378,8 @@ struct TPaimonFileDesc {
     14: optional string paimon_table  // deprecated
     15: optional i64 row_count // deprecated
     16: optional i64 schema_id; // for schema change.
+    // Reader implementation for logical paimon split. Native file split uses range format type.
+    17: optional TPaimonReaderType reader_type;
 }
 
 struct TTrinoConnectorFileDesc {
diff --git a/regression-test/data/export_p0/export/test_show_export.out b/regression-test/data/export_p0/export/test_show_export.out
index 90277ca28f2a9f..eb2d2ab154b1b3 100644
--- a/regression-test/data/export_p0/export/test_show_export.out
+++ b/regression-test/data/export_p0/export/test_show_export.out
@@ -102,156 +102,156 @@
 99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99	99.99	99.99	char99	99
 
 -- !select_load1 --
-1	2017-10-01	2017-10-01T00:00	Beijing	1	1	true	1	1	1.1	1.1	char1	1	1
-10	2017-10-01	2017-10-01T00:00	Beijing	10	10	true	10	10	10.1	10.1	char10	10	10
+1	2017-10-01	2017-10-01T00:00	Beijing	1	1	true	1	1	1.1	1.1	char1	1.000000000	1
+10	2017-10-01	2017-10-01T00:00	Beijing	10	10	true	10	10	10.1	10.1	char10	10.000000000	10
 100	2017-10-01	2017-10-01T00:00	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N
-11	2017-10-01	2017-10-01T00:00	Beijing	11	11	true	11	11	11.11	11.11	char11	11	11
-12	2017-10-01	2017-10-01T00:00	Beijing	12	12	true	12	12	12.12	12.12	char12	12	12
-13	2017-10-01	2017-10-01T00:00	Beijing	13	13	true	13	13	13.13	13.13	char13	13	13
-14	2017-10-01	2017-10-01T00:00	Beijing	14	14	true	14	14	14.14	14.14	char14	14	14
-15	2017-10-01	2017-10-01T00:00	Beijing	15	15	true	15	15	15.15	15.15	char15	15	15
-16	2017-10-01	2017-10-01T00:00	Beijing	16	16	true	16	16	16.16	16.16	char16	16	16
-17	2017-10-01	2017-10-01T00:00	Beijing	17	17	true	17	17	17.17	17.17	char17	17	17
-18	2017-10-01	2017-10-01T00:00	Beijing	18	18	true	18	18	18.18	18.18	char18	18	18
-19	2017-10-01	2017-10-01T00:00	Beijing	19	19	true	19	19	19.19	19.19	char19	19	19
-2	2017-10-01	2017-10-01T00:00	Beijing	2	2	true	2	2	2.2	2.2	char2	2	2
-20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20	20
-21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21	21
-22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22	22
-23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23	23
-24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24	24
-25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25	25
-26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26	26
-27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27	27
-28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28	28
-29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29	29
-3	2017-10-01	2017-10-01T00:00	Beijing	3	3	true	3	3	3.3	3.3	char3	3	3
-30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30	30
-31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31	31
-32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32	32
-33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33	33
-34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34	34
-35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35	35
-36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36	36
-37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37	37
-38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38	38
-39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39	39
-4	2017-10-01	2017-10-01T00:00	Beijing	4	4	true	4	4	4.4	4.4	char4	4	4
-40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40	40
-41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41	41
-42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42	42
-43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43	43
-44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44	44
-45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45	45
-46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46	46
-47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47	47
-48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48	48
-49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49	49
-5	2017-10-01	2017-10-01T00:00	Beijing	5	5	true	5	5	5.5	5.5	char5	5	5
-50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50	50
-51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51	51
-52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52	52
-53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53	53
-54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54	54
-55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55	55
-56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56	56
-57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57	57
-58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58	58
-59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59	59
-6	2017-10-01	2017-10-01T00:00	Beijing	6	6	true	6	6	6.6	6.6	char6	6	6
-60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60	60
-61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61	61
-62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62	62
-63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63	63
-64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64	64
-65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65	65
-66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66	66
-67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67	67
-68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68	68
-69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69	69
-7	2017-10-01	2017-10-01T00:00	Beijing	7	7	true	7	7	7.7	7.7	char7	7	7
-70	2017-10-01	2017-10-01T00:00	Beijing	70	70	true	70	70	70.7	70.7	char70	70	70
-71	2017-10-01	2017-10-01T00:00	Beijing	71	71	true	71	71	71.71	71.71	char71	71	71
-72	2017-10-01	2017-10-01T00:00	Beijing	72	72	true	72	72	72.72	72.72	char72	72	72
-73	2017-10-01	2017-10-01T00:00	Beijing	73	73	true	73	73	73.73	73.73	char73	73	73
-74	2017-10-01	2017-10-01T00:00	Beijing	74	74	true	74	74	74.74	74.74	char74	74	74
-75	2017-10-01	2017-10-01T00:00	Beijing	75	75	true	75	75	75.75	75.75	char75	75	75
-76	2017-10-01	2017-10-01T00:00	Beijing	76	76	true	76	76	76.76	76.76	char76	76	76
-77	2017-10-01	2017-10-01T00:00	Beijing	77	77	true	77	77	77.77	77.77	char77	77	77
-78	2017-10-01	2017-10-01T00:00	Beijing	78	78	true	78	78	78.78	78.78	char78	78	78
-79	2017-10-01	2017-10-01T00:00	Beijing	79	79	true	79	79	79.79	79.79	char79	79	79
-8	2017-10-01	2017-10-01T00:00	Beijing	8	8	true	8	8	8.8	8.8	char8	8	8
-80	2017-10-01	2017-10-01T00:00	Beijing	80	80	true	80	80	80.8	80.8	char80	80	80
-81	2017-10-01	2017-10-01T00:00	Beijing	81	81	true	81	81	81.81	81.81	char81	81	81
-82	2017-10-01	2017-10-01T00:00	Beijing	82	82	true	82	82	82.82	82.82	char82	82	82
-83	2017-10-01	2017-10-01T00:00	Beijing	83	83	true	83	83	83.83	83.83	char83	83	83
-84	2017-10-01	2017-10-01T00:00	Beijing	84	84	true	84	84	84.84	84.84	char84	84	84
-85	2017-10-01	2017-10-01T00:00	Beijing	85	85	true	85	85	85.85	85.85	char85	85	85
-86	2017-10-01	2017-10-01T00:00	Beijing	86	86	true	86	86	86.86	86.86	char86	86	86
-87	2017-10-01	2017-10-01T00:00	Beijing	87	87	true	87	87	87.87	87.87	char87	87	87
-88	2017-10-01	2017-10-01T00:00	Beijing	88	88	true	88	88	88.88	88.88	char88	88	88
-89	2017-10-01	2017-10-01T00:00	Beijing	89	89	true	89	89	89.89	89.89	char89	89	89
-9	2017-10-01	2017-10-01T00:00	Beijing	9	9	true	9	9	9.9	9.9	char9	9	9
-90	2017-10-01	2017-10-01T00:00	Beijing	90	90	true	90	90	90.9	90.9	char90	90	90
-91	2017-10-01	2017-10-01T00:00	Beijing	91	91	true	91	91	91.91	91.91	char91	91	91
-92	2017-10-01	2017-10-01T00:00	Beijing	92	92	true	92	92	92.92	92.92	char92	92	92
-93	2017-10-01	2017-10-01T00:00	Beijing	93	93	true	93	93	93.93	93.93	char93	93	93
-94	2017-10-01	2017-10-01T00:00	Beijing	94	94	true	94	94	94.94	94.94	char94	94	94
-95	2017-10-01	2017-10-01T00:00	Beijing	95	95	true	95	95	95.95	95.95	char95	95	95
-96	2017-10-01	2017-10-01T00:00	Beijing	96	96	true	96	96	96.96	96.96	char96	96	96
-97	2017-10-01	2017-10-01T00:00	Beijing	97	97	true	97	97	97.97	97.97	char97	97	97
-98	2017-10-01	2017-10-01T00:00	Beijing	98	98	true	98	98	98.98	98.98	char98	98	98
-99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99.99	99.99	char99	99	99
+11	2017-10-01	2017-10-01T00:00	Beijing	11	11	true	11	11	11.11	11.11	char11	11.000000000	11
+12	2017-10-01	2017-10-01T00:00	Beijing	12	12	true	12	12	12.12	12.12	char12	12.000000000	12
+13	2017-10-01	2017-10-01T00:00	Beijing	13	13	true	13	13	13.13	13.13	char13	13.000000000	13
+14	2017-10-01	2017-10-01T00:00	Beijing	14	14	true	14	14	14.14	14.14	char14	14.000000000	14
+15	2017-10-01	2017-10-01T00:00	Beijing	15	15	true	15	15	15.15	15.15	char15	15.000000000	15
+16	2017-10-01	2017-10-01T00:00	Beijing	16	16	true	16	16	16.16	16.16	char16	16.000000000	16
+17	2017-10-01	2017-10-01T00:00	Beijing	17	17	true	17	17	17.17	17.17	char17	17.000000000	17
+18	2017-10-01	2017-10-01T00:00	Beijing	18	18	true	18	18	18.18	18.18	char18	18.000000000	18
+19	2017-10-01	2017-10-01T00:00	Beijing	19	19	true	19	19	19.19	19.19	char19	19.000000000	19
+2	2017-10-01	2017-10-01T00:00	Beijing	2	2	true	2	2	2.2	2.2	char2	2.000000000	2
+20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20.000000000	20
+21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21.000000000	21
+22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22.000000000	22
+23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23.000000000	23
+24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24.000000000	24
+25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25.000000000	25
+26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26.000000000	26
+27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27.000000000	27
+28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28.000000000	28
+29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29.000000000	29
+3	2017-10-01	2017-10-01T00:00	Beijing	3	3	true	3	3	3.3	3.3	char3	3.000000000	3
+30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30.000000000	30
+31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31.000000000	31
+32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32.000000000	32
+33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33.000000000	33
+34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34.000000000	34
+35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35.000000000	35
+36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36.000000000	36
+37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37.000000000	37
+38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38.000000000	38
+39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39.000000000	39
+4	2017-10-01	2017-10-01T00:00	Beijing	4	4	true	4	4	4.4	4.4	char4	4.000000000	4
+40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40.000000000	40
+41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41.000000000	41
+42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42.000000000	42
+43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43.000000000	43
+44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44.000000000	44
+45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45.000000000	45
+46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46.000000000	46
+47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47.000000000	47
+48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48.000000000	48
+49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49.000000000	49
+5	2017-10-01	2017-10-01T00:00	Beijing	5	5	true	5	5	5.5	5.5	char5	5.000000000	5
+50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50.000000000	50
+51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51.000000000	51
+52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52.000000000	52
+53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53.000000000	53
+54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54.000000000	54
+55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55.000000000	55
+56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56.000000000	56
+57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57.000000000	57
+58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58.000000000	58
+59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59.000000000	59
+6	2017-10-01	2017-10-01T00:00	Beijing	6	6	true	6	6	6.6	6.6	char6	6.000000000	6
+60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60.000000000	60
+61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61.000000000	61
+62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62.000000000	62
+63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63.000000000	63
+64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64.000000000	64
+65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65.000000000	65
+66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66.000000000	66
+67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67.000000000	67
+68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68.000000000	68
+69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69.000000000	69
+7	2017-10-01	2017-10-01T00:00	Beijing	7	7	true	7	7	7.7	7.7	char7	7.000000000	7
+70	2017-10-01	2017-10-01T00:00	Beijing	70	70	true	70	70	70.7	70.7	char70	70.000000000	70
+71	2017-10-01	2017-10-01T00:00	Beijing	71	71	true	71	71	71.71	71.71	char71	71.000000000	71
+72	2017-10-01	2017-10-01T00:00	Beijing	72	72	true	72	72	72.72	72.72	char72	72.000000000	72
+73	2017-10-01	2017-10-01T00:00	Beijing	73	73	true	73	73	73.73	73.73	char73	73.000000000	73
+74	2017-10-01	2017-10-01T00:00	Beijing	74	74	true	74	74	74.74	74.74	char74	74.000000000	74
+75	2017-10-01	2017-10-01T00:00	Beijing	75	75	true	75	75	75.75	75.75	char75	75.000000000	75
+76	2017-10-01	2017-10-01T00:00	Beijing	76	76	true	76	76	76.76	76.76	char76	76.000000000	76
+77	2017-10-01	2017-10-01T00:00	Beijing	77	77	true	77	77	77.77	77.77	char77	77.000000000	77
+78	2017-10-01	2017-10-01T00:00	Beijing	78	78	true	78	78	78.78	78.78	char78	78.000000000	78
+79	2017-10-01	2017-10-01T00:00	Beijing	79	79	true	79	79	79.79	79.79	char79	79.000000000	79
+8	2017-10-01	2017-10-01T00:00	Beijing	8	8	true	8	8	8.8	8.8	char8	8.000000000	8
+80	2017-10-01	2017-10-01T00:00	Beijing	80	80	true	80	80	80.8	80.8	char80	80.000000000	80
+81	2017-10-01	2017-10-01T00:00	Beijing	81	81	true	81	81	81.81	81.81	char81	81.000000000	81
+82	2017-10-01	2017-10-01T00:00	Beijing	82	82	true	82	82	82.82	82.82	char82	82.000000000	82
+83	2017-10-01	2017-10-01T00:00	Beijing	83	83	true	83	83	83.83	83.83	char83	83.000000000	83
+84	2017-10-01	2017-10-01T00:00	Beijing	84	84	true	84	84	84.84	84.84	char84	84.000000000	84
+85	2017-10-01	2017-10-01T00:00	Beijing	85	85	true	85	85	85.85	85.85	char85	85.000000000	85
+86	2017-10-01	2017-10-01T00:00	Beijing	86	86	true	86	86	86.86	86.86	char86	86.000000000	86
+87	2017-10-01	2017-10-01T00:00	Beijing	87	87	true	87	87	87.87	87.87	char87	87.000000000	87
+88	2017-10-01	2017-10-01T00:00	Beijing	88	88	true	88	88	88.88	88.88	char88	88.000000000	88
+89	2017-10-01	2017-10-01T00:00	Beijing	89	89	true	89	89	89.89	89.89	char89	89.000000000	89
+9	2017-10-01	2017-10-01T00:00	Beijing	9	9	true	9	9	9.9	9.9	char9	9.000000000	9
+90	2017-10-01	2017-10-01T00:00	Beijing	90	90	true	90	90	90.9	90.9	char90	90.000000000	90
+91	2017-10-01	2017-10-01T00:00	Beijing	91	91	true	91	91	91.91	91.91	char91	91.000000000	91
+92	2017-10-01	2017-10-01T00:00	Beijing	92	92	true	92	92	92.92	92.92	char92	92.000000000	92
+93	2017-10-01	2017-10-01T00:00	Beijing	93	93	true	93	93	93.93	93.93	char93	93.000000000	93
+94	2017-10-01	2017-10-01T00:00	Beijing	94	94	true	94	94	94.94	94.94	char94	94.000000000	94
+95	2017-10-01	2017-10-01T00:00	Beijing	95	95	true	95	95	95.95	95.95	char95	95.000000000	95
+96	2017-10-01	2017-10-01T00:00	Beijing	96	96	true	96	96	96.96	96.96	char96	96.000000000	96
+97	2017-10-01	2017-10-01T00:00	Beijing	97	97	true	97	97	97.97	97.97	char97	97.000000000	97
+98	2017-10-01	2017-10-01T00:00	Beijing	98	98	true	98	98	98.98	98.98	char98	98.000000000	98
+99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99.99	99.99	char99	99.000000000	99
 
 -- !select_load1 --
-20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20	20
-21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21	21
-22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22	22
-23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23	23
-24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24	24
-25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25	25
-26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26	26
-27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27	27
-28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28	28
-29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29	29
-30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30	30
-31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31	31
-32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32	32
-33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33	33
-34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34	34
-35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35	35
-36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36	36
-37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37	37
-38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38	38
-39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39	39
-40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40	40
-41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41	41
-42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42	42
-43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43	43
-44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44	44
-45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45	45
-46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46	46
-47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47	47
-48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48	48
-49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49	49
-50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50	50
-51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51	51
-52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52	52
-53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53	53
-54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54	54
-55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55	55
-56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56	56
-57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57	57
-58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58	58
-59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59	59
-60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60	60
-61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61	61
-62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62	62
-63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63	63
-64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64	64
-65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65	65
-66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66	66
-67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67	67
-68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68	68
-69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69	69
+20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20.000000000	20
+21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21.000000000	21
+22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22.000000000	22
+23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23.000000000	23
+24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24.000000000	24
+25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25.000000000	25
+26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26.000000000	26
+27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27.000000000	27
+28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28.000000000	28
+29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29.000000000	29
+30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30.000000000	30
+31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31.000000000	31
+32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32.000000000	32
+33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33.000000000	33
+34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34.000000000	34
+35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35.000000000	35
+36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36.000000000	36
+37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37.000000000	37
+38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38.000000000	38
+39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39.000000000	39
+40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40.000000000	40
+41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41.000000000	41
+42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42.000000000	42
+43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43.000000000	43
+44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44.000000000	44
+45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45.000000000	45
+46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46.000000000	46
+47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47.000000000	47
+48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48.000000000	48
+49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49.000000000	49
+50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50.000000000	50
+51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51.000000000	51
+52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52.000000000	52
+53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53.000000000	53
+54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54.000000000	54
+55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55.000000000	55
+56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56.000000000	56
+57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57.000000000	57
+58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58.000000000	58
+59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59.000000000	59
+60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60.000000000	60
+61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61.000000000	61
+62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62.000000000	62
+63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63.000000000	63
+64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64.000000000	64
+65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65.000000000	65
+66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66.000000000	66
+67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67.000000000	67
+68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68.000000000	68
+69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69.000000000	69
 
diff --git a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
index c8ff8cafdd9854..cd7fe1e40fdb2d 100644
--- a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
+++ b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
@@ -127,3 +127,15 @@
 9	doris_9	{"user_id":9, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":9, "sex":9, "bool_col":1, "int_col":9, "bigint_col":9, "largeint_col":"9", "float_col":9.9, "double_col":9.9, "char_col":"char9_1234", "decimal_col":9.000000000}
 10	doris_10	{"user_id":10, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":null, "age":null, "sex":null, "bool_col":null, "int_col":null, "bigint_col":null, "largeint_col":null, "float_col":null, "double_col":null, "char_col":null, "decimal_col":null}
 
+-- !select_load7 --
+1	doris_1	{"user_id":1, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":1, "sex":1, "bool_col":1, "int_col":1, "bigint_col":1, "largeint_col":"1", "float_col":1.1, "double_col":1.1, "char_col":"char1_1234", "decimal_col":1.000000000}
+2	doris_2	{"user_id":2, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":2, "sex":2, "bool_col":1, "int_col":2, "bigint_col":2, "largeint_col":"2", "float_col":2.2, "double_col":2.2, "char_col":"char2_1234", "decimal_col":2.000000000}
+3	doris_3	{"user_id":3, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":3, "sex":3, "bool_col":1, "int_col":3, "bigint_col":3, "largeint_col":"3", "float_col":3.3, "double_col":3.3, "char_col":"char3_1234", "decimal_col":3.000000000}
+4	doris_4	{"user_id":4, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":4, "sex":4, "bool_col":1, "int_col":4, "bigint_col":4, "largeint_col":"4", "float_col":4.4, "double_col":4.4, "char_col":"char4_1234", "decimal_col":4.000000000}
+5	doris_5	{"user_id":5, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":5, "sex":5, "bool_col":1, "int_col":5, "bigint_col":5, "largeint_col":"5", "float_col":5.5, "double_col":5.5, "char_col":"char5_1234", "decimal_col":5.000000000}
+6	doris_6	{"user_id":6, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":6, "sex":6, "bool_col":1, "int_col":6, "bigint_col":6, "largeint_col":"6", "float_col":6.6, "double_col":6.6, "char_col":"char6_1234", "decimal_col":6.000000000}
+7	doris_7	{"user_id":7, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":7, "sex":7, "bool_col":1, "int_col":7, "bigint_col":7, "largeint_col":"7", "float_col":7.7, "double_col":7.7, "char_col":"char7_1234", "decimal_col":7.000000000}
+8	doris_8	{"user_id":8, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":8, "sex":8, "bool_col":1, "int_col":8, "bigint_col":8, "largeint_col":"8", "float_col":8.8, "double_col":8.800000000000001, "char_col":"char8_1234", "decimal_col":8.000000000}
+9	doris_9	{"user_id":9, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":9, "sex":9, "bool_col":1, "int_col":9, "bigint_col":9, "largeint_col":"9", "float_col":9.9, "double_col":9.9, "char_col":"char9_1234", "decimal_col":9.000000000}
+10	doris_10	{"user_id":10, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":null, "age":null, "sex":null, "bool_col":null, "int_col":null, "bigint_col":null, "largeint_col":null, "float_col":null, "double_col":null, "char_col":null, "decimal_col":null}
+
diff --git a/regression-test/data/export_p0/test_export_parquet.out b/regression-test/data/export_p0/test_export_parquet.out
index c3358efa4a97af..941dd4469a66c8 100644
--- a/regression-test/data/export_p0/test_export_parquet.out
+++ b/regression-test/data/export_p0/test_export_parquet.out
@@ -102,104 +102,104 @@
 99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99	99.99	99.99	char99	99	0.0.0.99	::99
 
 -- !select_load1 --
-1	2017-10-01	2017-10-01T00:00	Beijing	1	1	true	1	1	1.1	1.1	char1	1	1	1	::1
-10	2017-10-01	2017-10-01T00:00	Beijing	10	10	true	10	10	10.1	10.1	char10	10	10	10	::10
+1	2017-10-01	2017-10-01T00:00	Beijing	1	1	true	1	1	1.1	1.1	char1	1.000000000	1	1	::1
+10	2017-10-01	2017-10-01T00:00	Beijing	10	10	true	10	10	10.1	10.1	char10	10.000000000	10	10	::10
 100	2017-10-01	2017-10-01T00:00	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N
-11	2017-10-01	2017-10-01T00:00	Beijing	11	11	true	11	11	11.11	11.11	char11	11	11	11	::11
-12	2017-10-01	2017-10-01T00:00	Beijing	12	12	true	12	12	12.12	12.12	char12	12	12	12	::12
-13	2017-10-01	2017-10-01T00:00	Beijing	13	13	true	13	13	13.13	13.13	char13	13	13	13	::13
-14	2017-10-01	2017-10-01T00:00	Beijing	14	14	true	14	14	14.14	14.14	char14	14	14	14	::14
-15	2017-10-01	2017-10-01T00:00	Beijing	15	15	true	15	15	15.15	15.15	char15	15	15	15	::15
-16	2017-10-01	2017-10-01T00:00	Beijing	16	16	true	16	16	16.16	16.16	char16	16	16	16	::16
-17	2017-10-01	2017-10-01T00:00	Beijing	17	17	true	17	17	17.17	17.17	char17	17	17	17	::17
-18	2017-10-01	2017-10-01T00:00	Beijing	18	18	true	18	18	18.18	18.18	char18	18	18	18	::18
-19	2017-10-01	2017-10-01T00:00	Beijing	19	19	true	19	19	19.19	19.19	char19	19	19	19	::19
-2	2017-10-01	2017-10-01T00:00	Beijing	2	2	true	2	2	2.2	2.2	char2	2	2	2	::2
-20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20	20	20	::20
-21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21	21	21	::21
-22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22	22	22	::22
-23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23	23	23	::23
-24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24	24	24	::24
-25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25	25	25	::25
-26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26	26	26	::26
-27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27	27	27	::27
-28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28	28	28	::28
-29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29	29	29	::29
-3	2017-10-01	2017-10-01T00:00	Beijing	3	3	true	3	3	3.3	3.3	char3	3	3	3	::3
-30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30	30	30	::30
-31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31	31	31	::31
-32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32	32	32	::32
-33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33	33	33	::33
-34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34	34	34	::34
-35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35	35	35	::35
-36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36	36	36	::36
-37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37	37	37	::37
-38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38	38	38	::38
-39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39	39	39	::39
-4	2017-10-01	2017-10-01T00:00	Beijing	4	4	true	4	4	4.4	4.4	char4	4	4	4	::4
-40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40	40	40	::40
-41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41	41	41	::41
-42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42	42	42	::42
-43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43	43	43	::43
-44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44	44	44	::44
-45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45	45	45	::45
-46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46	46	46	::46
-47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47	47	47	::47
-48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48	48	48	::48
-49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49	49	49	::49
-5	2017-10-01	2017-10-01T00:00	Beijing	5	5	true	5	5	5.5	5.5	char5	5	5	5	::5
-50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50	50	50	::50
-51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51	51	51	::51
-52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52	52	52	::52
-53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53	53	53	::53
-54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54	54	54	::54
-55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55	55	55	::55
-56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56	56	56	::56
-57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57	57	57	::57
-58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58	58	58	::58
-59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59	59	59	::59
-6	2017-10-01	2017-10-01T00:00	Beijing	6	6	true	6	6	6.6	6.6	char6	6	6	6	::6
-60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60	60	60	::60
-61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61	61	61	::61
-62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62	62	62	::62
-63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63	63	63	::63
-64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64	64	64	::64
-65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65	65	65	::65
-66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66	66	66	::66
-67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67	67	67	::67
-68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68	68	68	::68
-69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69	69	69	::69
-7	2017-10-01	2017-10-01T00:00	Beijing	7	7	true	7	7	7.7	7.7	char7	7	7	7	::7
-70	2017-10-01	2017-10-01T00:00	Beijing	70	70	true	70	70	70.7	70.7	char70	70	70	70	::70
-71	2017-10-01	2017-10-01T00:00	Beijing	71	71	true	71	71	71.71	71.71	char71	71	71	71	::71
-72	2017-10-01	2017-10-01T00:00	Beijing	72	72	true	72	72	72.72	72.72	char72	72	72	72	::72
-73	2017-10-01	2017-10-01T00:00	Beijing	73	73	true	73	73	73.73	73.73	char73	73	73	73	::73
-74	2017-10-01	2017-10-01T00:00	Beijing	74	74	true	74	74	74.74	74.74	char74	74	74	74	::74
-75	2017-10-01	2017-10-01T00:00	Beijing	75	75	true	75	75	75.75	75.75	char75	75	75	75	::75
-76	2017-10-01	2017-10-01T00:00	Beijing	76	76	true	76	76	76.76	76.76	char76	76	76	76	::76
-77	2017-10-01	2017-10-01T00:00	Beijing	77	77	true	77	77	77.77	77.77	char77	77	77	77	::77
-78	2017-10-01	2017-10-01T00:00	Beijing	78	78	true	78	78	78.78	78.78	char78	78	78	78	::78
-79	2017-10-01	2017-10-01T00:00	Beijing	79	79	true	79	79	79.79	79.79	char79	79	79	79	::79
-8	2017-10-01	2017-10-01T00:00	Beijing	8	8	true	8	8	8.8	8.8	char8	8	8	8	::8
-80	2017-10-01	2017-10-01T00:00	Beijing	80	80	true	80	80	80.8	80.8	char80	80	80	80	::80
-81	2017-10-01	2017-10-01T00:00	Beijing	81	81	true	81	81	81.81	81.81	char81	81	81	81	::81
-82	2017-10-01	2017-10-01T00:00	Beijing	82	82	true	82	82	82.82	82.82	char82	82	82	82	::82
-83	2017-10-01	2017-10-01T00:00	Beijing	83	83	true	83	83	83.83	83.83	char83	83	83	83	::83
-84	2017-10-01	2017-10-01T00:00	Beijing	84	84	true	84	84	84.84	84.84	char84	84	84	84	::84
-85	2017-10-01	2017-10-01T00:00	Beijing	85	85	true	85	85	85.85	85.85	char85	85	85	85	::85
-86	2017-10-01	2017-10-01T00:00	Beijing	86	86	true	86	86	86.86	86.86	char86	86	86	86	::86
-87	2017-10-01	2017-10-01T00:00	Beijing	87	87	true	87	87	87.87	87.87	char87	87	87	87	::87
-88	2017-10-01	2017-10-01T00:00	Beijing	88	88	true	88	88	88.88	88.88	char88	88	88	88	::88
-89	2017-10-01	2017-10-01T00:00	Beijing	89	89	true	89	89	89.89	89.89	char89	89	89	89	::89
-9	2017-10-01	2017-10-01T00:00	Beijing	9	9	true	9	9	9.9	9.9	char9	9	9	9	::9
-90	2017-10-01	2017-10-01T00:00	Beijing	90	90	true	90	90	90.9	90.9	char90	90	90	90	::90
-91	2017-10-01	2017-10-01T00:00	Beijing	91	91	true	91	91	91.91	91.91	char91	91	91	91	::91
-92	2017-10-01	2017-10-01T00:00	Beijing	92	92	true	92	92	92.92	92.92	char92	92	92	92	::92
-93	2017-10-01	2017-10-01T00:00	Beijing	93	93	true	93	93	93.93	93.93	char93	93	93	93	::93
-94	2017-10-01	2017-10-01T00:00	Beijing	94	94	true	94	94	94.94	94.94	char94	94	94	94	::94
-95	2017-10-01	2017-10-01T00:00	Beijing	95	95	true	95	95	95.95	95.95	char95	95	95	95	::95
-96	2017-10-01	2017-10-01T00:00	Beijing	96	96	true	96	96	96.96	96.96	char96	96	96	96	::96
-97	2017-10-01	2017-10-01T00:00	Beijing	97	97	true	97	97	97.97	97.97	char97	97	97	97	::97
-98	2017-10-01	2017-10-01T00:00	Beijing	98	98	true	98	98	98.98	98.98	char98	98	98	98	::98
-99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99.99	99.99	char99	99	99	99	::99
+11	2017-10-01	2017-10-01T00:00	Beijing	11	11	true	11	11	11.11	11.11	char11	11.000000000	11	11	::11
+12	2017-10-01	2017-10-01T00:00	Beijing	12	12	true	12	12	12.12	12.12	char12	12.000000000	12	12	::12
+13	2017-10-01	2017-10-01T00:00	Beijing	13	13	true	13	13	13.13	13.13	char13	13.000000000	13	13	::13
+14	2017-10-01	2017-10-01T00:00	Beijing	14	14	true	14	14	14.14	14.14	char14	14.000000000	14	14	::14
+15	2017-10-01	2017-10-01T00:00	Beijing	15	15	true	15	15	15.15	15.15	char15	15.000000000	15	15	::15
+16	2017-10-01	2017-10-01T00:00	Beijing	16	16	true	16	16	16.16	16.16	char16	16.000000000	16	16	::16
+17	2017-10-01	2017-10-01T00:00	Beijing	17	17	true	17	17	17.17	17.17	char17	17.000000000	17	17	::17
+18	2017-10-01	2017-10-01T00:00	Beijing	18	18	true	18	18	18.18	18.18	char18	18.000000000	18	18	::18
+19	2017-10-01	2017-10-01T00:00	Beijing	19	19	true	19	19	19.19	19.19	char19	19.000000000	19	19	::19
+2	2017-10-01	2017-10-01T00:00	Beijing	2	2	true	2	2	2.2	2.2	char2	2.000000000	2	2	::2
+20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20.000000000	20	20	::20
+21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21.000000000	21	21	::21
+22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22.000000000	22	22	::22
+23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23.000000000	23	23	::23
+24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24.000000000	24	24	::24
+25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25.000000000	25	25	::25
+26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26.000000000	26	26	::26
+27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27.000000000	27	27	::27
+28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28.000000000	28	28	::28
+29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29.000000000	29	29	::29
+3	2017-10-01	2017-10-01T00:00	Beijing	3	3	true	3	3	3.3	3.3	char3	3.000000000	3	3	::3
+30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30.000000000	30	30	::30
+31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31.000000000	31	31	::31
+32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32.000000000	32	32	::32
+33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33.000000000	33	33	::33
+34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34.000000000	34	34	::34
+35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35.000000000	35	35	::35
+36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36.000000000	36	36	::36
+37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37.000000000	37	37	::37
+38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38.000000000	38	38	::38
+39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39.000000000	39	39	::39
+4	2017-10-01	2017-10-01T00:00	Beijing	4	4	true	4	4	4.4	4.4	char4	4.000000000	4	4	::4
+40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40.000000000	40	40	::40
+41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41.000000000	41	41	::41
+42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42.000000000	42	42	::42
+43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43.000000000	43	43	::43
+44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44.000000000	44	44	::44
+45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45.000000000	45	45	::45
+46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46.000000000	46	46	::46
+47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47.000000000	47	47	::47
+48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48.000000000	48	48	::48
+49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49.000000000	49	49	::49
+5	2017-10-01	2017-10-01T00:00	Beijing	5	5	true	5	5	5.5	5.5	char5	5.000000000	5	5	::5
+50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50.000000000	50	50	::50
+51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51.000000000	51	51	::51
+52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52.000000000	52	52	::52
+53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53.000000000	53	53	::53
+54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54.000000000	54	54	::54
+55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55.000000000	55	55	::55
+56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56.000000000	56	56	::56
+57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57.000000000	57	57	::57
+58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58.000000000	58	58	::58
+59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59.000000000	59	59	::59
+6	2017-10-01	2017-10-01T00:00	Beijing	6	6	true	6	6	6.6	6.6	char6	6.000000000	6	6	::6
+60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60.000000000	60	60	::60
+61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61.000000000	61	61	::61
+62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62.000000000	62	62	::62
+63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63.000000000	63	63	::63
+64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64.000000000	64	64	::64
+65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65.000000000	65	65	::65
+66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66.000000000	66	66	::66
+67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67.000000000	67	67	::67
+68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68.000000000	68	68	::68
+69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69.000000000	69	69	::69
+7	2017-10-01	2017-10-01T00:00	Beijing	7	7	true	7	7	7.7	7.7	char7	7.000000000	7	7	::7
+70	2017-10-01	2017-10-01T00:00	Beijing	70	70	true	70	70	70.7	70.7	char70	70.000000000	70	70	::70
+71	2017-10-01	2017-10-01T00:00	Beijing	71	71	true	71	71	71.71	71.71	char71	71.000000000	71	71	::71
+72	2017-10-01	2017-10-01T00:00	Beijing	72	72	true	72	72	72.72	72.72	char72	72.000000000	72	72	::72
+73	2017-10-01	2017-10-01T00:00	Beijing	73	73	true	73	73	73.73	73.73	char73	73.000000000	73	73	::73
+74	2017-10-01	2017-10-01T00:00	Beijing	74	74	true	74	74	74.74	74.74	char74	74.000000000	74	74	::74
+75	2017-10-01	2017-10-01T00:00	Beijing	75	75	true	75	75	75.75	75.75	char75	75.000000000	75	75	::75
+76	2017-10-01	2017-10-01T00:00	Beijing	76	76	true	76	76	76.76	76.76	char76	76.000000000	76	76	::76
+77	2017-10-01	2017-10-01T00:00	Beijing	77	77	true	77	77	77.77	77.77	char77	77.000000000	77	77	::77
+78	2017-10-01	2017-10-01T00:00	Beijing	78	78	true	78	78	78.78	78.78	char78	78.000000000	78	78	::78
+79	2017-10-01	2017-10-01T00:00	Beijing	79	79	true	79	79	79.79	79.79	char79	79.000000000	79	79	::79
+8	2017-10-01	2017-10-01T00:00	Beijing	8	8	true	8	8	8.8	8.8	char8	8.000000000	8	8	::8
+80	2017-10-01	2017-10-01T00:00	Beijing	80	80	true	80	80	80.8	80.8	char80	80.000000000	80	80	::80
+81	2017-10-01	2017-10-01T00:00	Beijing	81	81	true	81	81	81.81	81.81	char81	81.000000000	81	81	::81
+82	2017-10-01	2017-10-01T00:00	Beijing	82	82	true	82	82	82.82	82.82	char82	82.000000000	82	82	::82
+83	2017-10-01	2017-10-01T00:00	Beijing	83	83	true	83	83	83.83	83.83	char83	83.000000000	83	83	::83
+84	2017-10-01	2017-10-01T00:00	Beijing	84	84	true	84	84	84.84	84.84	char84	84.000000000	84	84	::84
+85	2017-10-01	2017-10-01T00:00	Beijing	85	85	true	85	85	85.85	85.85	char85	85.000000000	85	85	::85
+86	2017-10-01	2017-10-01T00:00	Beijing	86	86	true	86	86	86.86	86.86	char86	86.000000000	86	86	::86
+87	2017-10-01	2017-10-01T00:00	Beijing	87	87	true	87	87	87.87	87.87	char87	87.000000000	87	87	::87
+88	2017-10-01	2017-10-01T00:00	Beijing	88	88	true	88	88	88.88	88.88	char88	88.000000000	88	88	::88
+89	2017-10-01	2017-10-01T00:00	Beijing	89	89	true	89	89	89.89	89.89	char89	89.000000000	89	89	::89
+9	2017-10-01	2017-10-01T00:00	Beijing	9	9	true	9	9	9.9	9.9	char9	9.000000000	9	9	::9
+90	2017-10-01	2017-10-01T00:00	Beijing	90	90	true	90	90	90.9	90.9	char90	90.000000000	90	90	::90
+91	2017-10-01	2017-10-01T00:00	Beijing	91	91	true	91	91	91.91	91.91	char91	91.000000000	91	91	::91
+92	2017-10-01	2017-10-01T00:00	Beijing	92	92	true	92	92	92.92	92.92	char92	92.000000000	92	92	::92
+93	2017-10-01	2017-10-01T00:00	Beijing	93	93	true	93	93	93.93	93.93	char93	93.000000000	93	93	::93
+94	2017-10-01	2017-10-01T00:00	Beijing	94	94	true	94	94	94.94	94.94	char94	94.000000000	94	94	::94
+95	2017-10-01	2017-10-01T00:00	Beijing	95	95	true	95	95	95.95	95.95	char95	95.000000000	95	95	::95
+96	2017-10-01	2017-10-01T00:00	Beijing	96	96	true	96	96	96.96	96.96	char96	96.000000000	96	96	::96
+97	2017-10-01	2017-10-01T00:00	Beijing	97	97	true	97	97	97.97	97.97	char97	97.000000000	97	97	::97
+98	2017-10-01	2017-10-01T00:00	Beijing	98	98	true	98	98	98.98	98.98	char98	98.000000000	98	98	::98
+99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99.99	99.99	char99	99.000000000	99	99	::99
 
diff --git a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out
index 59e94ef9429ec9..784ad963ce4a72 100644
--- a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out
+++ b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out
@@ -30,14 +30,14 @@
 8	nereids	\N
 
 -- !select_base2 --
-1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
-2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.9E-324	char2	100000000	100000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
-3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.4028235e+38	1.7976931348623157E308	char3	999999999	999999999	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
+1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1.000000000	1.000000000	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
+2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.940656458412465e-324	char2	100000000.000000000	100000000.000000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
+3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.402823E38	1.797693134862316e+308	char3	999999999.000000000	999999999.000000000	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
 
 -- !select_tvf2 --
-1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
-2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.9E-324	char2	100000000	100000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
-3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.4028235e+38	1.7976931348623157E308	char3	999999999	999999999	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
+1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1.000000000	1.000000000	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
+2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.940656458412465e-324	char2	100000000.000000000	100000000.000000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
+3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.402823E38	1.797693134862316e+308	char3	999999999.000000000	999999999.000000000	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
 
 -- !hive_docker_02 --
 1	2023-04-20	2023-04-20	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
@@ -75,14 +75,14 @@
 8	nereids	\N
 
 -- !select_base2 --
-1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
-2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.9E-324	char2	100000000	100000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
-3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.4028235e+38	1.7976931348623157E308	char3	999999999	999999999	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
+1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1.000000000	1.000000000	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
+2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.940656458412465e-324	char2	100000000.000000000	100000000.000000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
+3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.402823E38	1.797693134862316e+308	char3	999999999.000000000	999999999.000000000	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
 
 -- !select_tvf2 --
-1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
-2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.9E-324	char2	100000000	100000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
-3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.4028235e+38	1.7976931348623157E308	char3	999999999	999999999	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
+1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1.000000000	1.000000000	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
+2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.940656458412465e-324	char2	100000000.000000000	100000000.000000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
+3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.402823E38	1.797693134862316e+308	char3	999999999.000000000	999999999.000000000	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
 
 -- !hive_docker_02 --
 1	2023-04-20	2023-04-20	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
diff --git a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.out b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.out
similarity index 100%
rename from regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.out
rename to regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.out
diff --git a/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out b/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out
index 160c99248fe90c..9adea59bbfba3e 100644
--- a/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out
+++ b/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out
@@ -199,203 +199,3 @@ true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
 -- !hive_docker_ctas_types_02 --
 true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
 
--- !ctas_01 --
-2
-3
-
--- !hive_docker_ctas_01 --
-2
-3
-
--- !ctas_02 --
-2
-3
-
--- !hive_docker_ctas_02 --
-2
-3
-
--- !ctas_03 --
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_03 --
-22	value_for_pt11	value_for_pt22
-
--- !ctas_04 --
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_04 --
-22	value_for_pt11	value_for_pt22
-
--- !ctas_05 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_05 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !ctas_06 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_06 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !ctas_ex01 --
-2
-3
-
--- !hive_docker_ctas_ex01 --
-2
-3
-
--- !ctas_ex02 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	\N
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_ex02 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	__HIVE_DEFAULT_PARTITION__
-22	value_for_pt11	value_for_pt22
-
--- !ctas_03 --
-\N	another string value for col2
-\N	string value for col2
-\N	yet another string value for col2
-
--- !hive_docker_ctas_ex03 --
-\N	another string value for col2
-\N	string value for col2
-\N	yet another string value for col2
-
--- !ctas_04 --
-\N	11	value_for_pt1
-\N	22	value_for_pt11
-
--- !hive_docker_ctas_ex04 --
-\N	11	value_for_pt1
-\N	22	value_for_pt11
-
--- !qualified_table1 --
-11	value_for_pt1
-22	value_for_pt11
-
--- !qualified_table2 --
-11	value_for_pt1
-22	value_for_pt11
-
--- !ctas_types_01 --
-true	127	32767	2147483647	9223372036854775807	default	22.12345	3.141592653	99999.9999	default	default	2023-05-29	2023-05-29T23:19:34
-
--- !hive_docker_ctas_types_01 --
-true	127	32767	2147483647	9223372036854775807	default	22.12345	3.141592653	99999.9999	default	default	2023-05-29	2023-05-29 23:19:34.0
-
--- !ctas_types_02 --
-true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
-
--- !hive_docker_ctas_types_02 --
-true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
-
--- !ctas_01 --
-2
-3
-
--- !hive_docker_ctas_01 --
-2
-3
-
--- !ctas_02 --
-2
-3
-
--- !hive_docker_ctas_02 --
-2
-3
-
--- !ctas_03 --
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_03 --
-22	value_for_pt11	value_for_pt22
-
--- !ctas_04 --
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_04 --
-22	value_for_pt11	value_for_pt22
-
--- !ctas_05 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_05 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !ctas_06 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_06 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !ctas_ex01 --
-2
-3
-
--- !hive_docker_ctas_ex01 --
-2
-3
-
--- !ctas_ex02 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	\N
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_ex02 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	__HIVE_DEFAULT_PARTITION__
-22	value_for_pt11	value_for_pt22
-
--- !ctas_03 --
-\N	another string value for col2
-\N	string value for col2
-\N	yet another string value for col2
-
--- !hive_docker_ctas_ex03 --
-\N	another string value for col2
-\N	string value for col2
-\N	yet another string value for col2
-
--- !ctas_04 --
-\N	11	value_for_pt1
-\N	22	value_for_pt11
-
--- !hive_docker_ctas_ex04 --
-\N	11	value_for_pt1
-\N	22	value_for_pt11
-
--- !qualified_table1 --
-11	value_for_pt1
-22	value_for_pt11
-
--- !qualified_table2 --
-11	value_for_pt1
-22	value_for_pt11
-
--- !ctas_types_01 --
-true	127	32767	2147483647	9223372036854775807	default	22.12345	3.141592653	99999.9999	default	default	2023-05-29	2023-05-29T23:19:34
-
--- !hive_docker_ctas_types_01 --
-true	127	32767	2147483647	9223372036854775807	default	22.12345	3.141592653	99999.9999	default	default	2023-05-29	2023-05-29 23:19:34.0
-
--- !ctas_types_02 --
-true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
-
--- !hive_docker_ctas_types_02 --
-true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
-
diff --git a/regression-test/data/external_table_p0/hive/test_complex_types.out b/regression-test/data/external_table_p0/hive/test_complex_types.out
index 4a9dbbe835c7fc..5f81514ea59b23 100644
--- a/regression-test/data/external_table_p0/hive/test_complex_types.out
+++ b/regression-test/data/external_table_p0/hive/test_complex_types.out
@@ -47,51 +47,3 @@
 -- !date_dict --
 2036-12-28	1898-12-28	2539-12-28
 
--- !null_element_at --
-0
-
--- !map_key_select --
-38111	0.770169659057425
-
--- !map_keys --
-["9wXr9n-TBm9Wyt-r8H-SkAq", "CPDH4G-ZXGPkku-3wY-ktaQ", "RvNlMt-HHjHN5M-VjP-xHAI", "qKIhKy-Ws344os-haX-2pmT", "DOJJ5l-UEkwVMs-x9F-HifD", "m871g8-1eFi7jt-oBq-S0yc", "wXugVP-v2fc6IF-DeU-On3T", "B0mXFX-QvgUgo7-Dih-6rDu", "E9zv3F-xMqSbMa-il4-FuDg", "msuFIN-ZkKO8TY-tu4-veH0", "0rSUyl-Un07aIW-KAx-WHnX", "XvbmO8-WA6oAqc-ihc-s8IL", "G6B6RD-AicAlZb-16u-Pn1I", "coDK0Q-tMg1294-JMQ-ZWQu", "4c0aWh-yhL6BOX-rRu-1n0r", "G4iUcG-ZhWw62v-VLt-n6lH", "IIB7qD-WQistwT-Vux-0c9B", "7cTyuR-5ssXm2S-sJR-JTIZ", "3KPhSW-FICEImf-bba-PCiQ", "qQ7Yup-XBeQGFz-3EP-q0vd", "gjRxRo-Af9Oqx5-IzN-3B9d", "1zSj57-nNZpZ0b-ZKn-BeY0", "sTK0mn-wkp1Xp5-PRS-txVM", "sLrM0s-1KnXLb6-1A3-Z1vJ", "UkYdkP-k7YKiKS-Fxp-qAcI", "v8p0YV-R5pAKZ8-UMr-P1bQ", "RJdTav-jk3os9Z-yRk-WhwV", "lB91ic-pNFZkE4-hBx-e104", "gmRV6e-GKJUg0L-ok7-J6Lz", "o3LUyz-7Toh54O-czG-Xep8", "8fzHhM-4otPAss-qTm-phg8", "kZsHhe-vfClpAR-b3H-7aHl", "TdZnlG-BUgMs7Z-iBM-9c3v", "RipJXn-p4gZkyy-1ZY-xkWe", "ke730M-LmMjGdc-EFy-0LUK", "jBSExJ-GXTc5TB-NSa-xBEd", "kI7Cc8-DSg5RdF-qLo-2bhe", "bAn3VI-x6xXWpB-zWe-G5CJ", "jAil30-kbt6K6z-kbr-8foB", "IHIwNs-1QGqy8l-i8i-vu4G", "p0IbZr-tHCtwiV-0hq-NtIt", "iggdij-M3YNBpd-yiD-a8Ro", "BrJEww-C4LpgaS-AeB-So4U", "xnO3Fi-8rXcpgj-zpm-EmuX", "5w57da-phYtDUx-px2-6frG", "31MfFs-1WyUAr6-gQ0-xLxY", "ryBl2p-rSoPhwd-WPv-NCAU", "KN5TEt-gOfJ4Hy-3pp-HiBa", "ytqxb8-utXXjUf-m41-i6ir", "WhGUGz-zzyvEpD-9BM-2bVf", "dE1tFe-zHClt4u-0cY-TQnC", "MveBhC-g29c0dU-tCT-R6nC", "JTpxue-xSqAhGo-AZk-zB1t", "92TVdU-qDJesPN-0lb-JOd3", "0PODnh-IciBdOZ-0CS-oNeL", "KkkW6x-TiemXQw-OiH-dZ9s", "PIs5Aj-g02HRXw-957-GD2z", "yJIzuw-au6460e-0Tl-XYEJ", "KHvMCD-OQDL0eX-nqK-TmEt", "6QJJgV-Z3IZ1Rf-wyv-rIJ6", "qA9ycc-sR2qm6P-PtB-AIax", "uDeuEb-B0t0Ljr-dWk-jkC4", "5vPy52-ygN0MMH-UB4-nZQL", "zbbmrQ-pT3uAuU-Kae-HjM5", "3QShHS-7RwUB10-0W2-H4Qy", "PMc4QI-5lNajXU-f8m-RGIi", "O9t3dl-q8YHozj-saR-A3Jm", "k4eH3O-aHnTKY7-ADp-4Vsi", "RA4epe-lWWnOff-bpM-bSR4", "6ysu2R-gSc5dwU-cv0-LqCJ", "tVl3TY-o42NMVO-k3S-iqOY", "NMgTrr-W1RrCvP-Zaf-paL7", "d1CJmF-CeG5asM-xms-1dwN", "N1D30g-zFjiGzI-eHC-Sof4", "tOhfKu-Gdtf9Ne-KwA-JdHV", "XLzwK0-6ocGDrS-TtU-wlEI", "XDgZfb-Sxc45Zn-mVO-S2QO", "GQD7a0-fnt9BZs-Kvh-dPbJ", "9dJxj9-HFwEQMY-6p9-s8Vt", "1qU9pA-QJGAna9-JoG-H7GS", "rKIkxA-UnGWYSn-0li-ziuB", "tbPazx-IjUrQ8J-NZe-VOPL", "xBpSIv-U6ojkK7-9p5-LviD", "88bnWI-pxrKa7T-n2d-tXk9", "0XviXp-9ksT8s0-fDy-35SW", "e0XauA-GNRALmd-SM2-Y4Gf", "kyvYBk-Bk5M4Xq-gxX-kE1B", "dIiQzS-5sT4ogL-6IV-tLmb", "OlGOyH-dyL1nzj-B2M-z8ir", "zC9Gtn-x8hpfPD-KOu-k31W", "qSq3z2-Lpv0YcB-hBq-Sabd", "LSyNyi-tBZUx1l-hAj-mwsx", "2c9aTP-hXloMK7-ufH-dgq6", "aXksHO-zARQxfo-sgS-8Bf4", "ioOXAL-eVUF0W8-vZx-ZeYX", "DXUkAP-A7SqnHj-V4U-PJfz", "cnzZXk-AOMepfN-hym-qbDH", "CMlAd6-8FF1yXs-fae-Izfv", "qiXnUv-e2PsJWm-tLF-KpjE", "Gfx3k9-JvXa7Wd-rI1-1e1E"]
-
--- !map_values --
-[0.9805502029231666, 0.5330291595754054, 0.3002474487337981, 0.4856360175030267, 0.7687106425158624, 0.6993506644925102, 0.2849354808825807, 0.3473417455186141, 0.1350012944304507, 0.9708132103700939, 0.1858304263994345, 0.4886337264552073, 0.3635474169515766, 0.5640845268971175, 0.1374134087807577, 0.7766547647451623, 0.5835323296668318, 0.3654459547110349, 0.5479776709993764, 0.8379932542117192, 0.1566504627835081, 0.03371222042250388, 0.1699781825927229, 0.3579630495075078, 0.02809253185597727, 0.7204247029840027, 0.2760499256423206, 0.676890893219096, 0.03529878656700025, 0.02276578351027858, 0.09794991730625469, 0.5278062884613351, 0.1370404181139102, 0.5440352476580856, 0.7205540629419929, 0.1350852984195943, 0.4160946400431862, 0.2972295454562929, 0.9217426503585693, 0.58103998733474, 0.8845427436377473, 0.1017928267299423, 0.9547186973943892, 0.1680102784708342, 0.0008487745421986714, 0.1695241541106989, 0.6783921749433292, 0.7193818386971084, 0.930443435029246, 0.4846665469390518, 0.9924998940864419, 0.7238288481079148, 0.7053563817759009, 0.9735160772776755, 0.7782499787869234, 0.7413304280548174, 0.7550983926033307, 0.8713660446322186, 0.9205209678792637, 0.3419724898972277, 0.3696806985755556, 0.03023259817152302, 0.02477452604862684, 0.9764129157525588, 0.5933057559470283, 0.7612511554831843, 0.378758227033635, 0.9312730459544121, 0.6712083507802412, 0.165080800084368, 0.2292866463959062, 0.3736665350268106, 0.2048064464080658, 0.08394355937496834, 0.8494979696731824, 0.4321556255662622, 0.3534668267198027, 0.8791700434102772, 0.2274527583015258, 0.04886968507359402, 0.7936598110174163, 0.5449717343415919, 0.7635939445968348, 0.08505586183986624, 0.3509115026589145, 0.9633191745238908, 0.3972533910389617, 0.4659759249919267, 0.1579051246328464, 0.7853565578107594, 0.9894919939745654, 0.9395365730655929, 0.202260767382666, 0.1619636856192768, 0.5105569529841616, 0.4531109229280732, 0.2579134268597084, 0.7962109089915747, 0.2772969229539421, 0.9315902037607061]
-
--- !map_contains_key --
-1077	[0.7805560995873845, 0.9303489002269559, 0.2529522997521877, 0.662270811026298, 0.664725297532439, 0.1019441091764477, 0.9614059300688174, 0.5278126009983843, 0.5287505841216708, 0.426116738236779, 0.4230050239387118, 0.5327026330053651, 0.6025481777942603, 0.2710733647257627, 0.613792118138183, 0.002100302783562991, 0.3200675048728582, 0.5485611014660204, 0.5121510581313707, 0.5145136652805358]	{"9wXr9n-TBm9Wyt-r8H-SkAq":0.9338329010480995, "CPDH4G-ZXGPkku-3wY-ktaQ":0.4355256963350881, "RvNlMt-HHjHN5M-VjP-xHAI":0.3263474611804782, "qKIhKy-Ws344os-haX-2pmT":0.565450203625137, "DOJJ5l-UEkwVMs-x9F-HifD":0.09375622010822238, "m871g8-1eFi7jt-oBq-S0yc":0.8819687247951038, "wXugVP-v2fc6IF-DeU-On3T":0.3448233486447311, "B0mXFX-QvgUgo7-Dih-6rDu":0.1914040395475467, "E9zv3F-xMqSbMa-il4-FuDg":0.3857021891084336, "msuFIN-ZkKO8TY-tu4-veH0":0.6646172653074628, "0rSUyl-Un07aIW-KAx-WHnX":0.3558009910430974, "XvbmO8-WA6oAqc-ihc-s8IL":0.4058206434411423, "G6B6RD-AicAlZb-16u-Pn1I":0.7203554946895749, "coDK0Q-tMg1294-JMQ-ZWQu":0.8236328627743186, "4c0aWh-yhL6BOX-rRu-1n0r":0.1398091184230428, "G4iUcG-ZhWw62v-VLt-n6lH":0.1838288978254214, "IIB7qD-WQistwT-Vux-0c9B":0.9174389144309458, "7cTyuR-5ssXm2S-sJR-JTIZ":0.8132237242672837, "3KPhSW-FICEImf-bba-PCiQ":0.6302643579943553, "qQ7Yup-XBeQGFz-3EP-q0vd":0.6109025726752364, "gjRxRo-Af9Oqx5-IzN-3B9d":0.9251468490326916, "1zSj57-nNZpZ0b-ZKn-BeY0":0.5628463109107144, "sTK0mn-wkp1Xp5-PRS-txVM":0.7905808129559996, "sLrM0s-1KnXLb6-1A3-Z1vJ":0.4234598677670157, "UkYdkP-k7YKiKS-Fxp-qAcI":0.7541401266679869, "v8p0YV-R5pAKZ8-UMr-P1bQ":0.2931152565110683, "RJdTav-jk3os9Z-yRk-WhwV":0.5263811309738877, "lB91ic-pNFZkE4-hBx-e104":0.6692292834321788, "gmRV6e-GKJUg0L-ok7-J6Lz":0.05924766959664352, "o3LUyz-7Toh54O-czG-Xep8":0.6284193821127264, "8fzHhM-4otPAss-qTm-phg8":0.8953002441537012, "kZsHhe-vfClpAR-b3H-7aHl":0.1775015612747399, "TdZnlG-BUgMs7Z-iBM-9c3v":0.2749839439504633, "RipJXn-p4gZkyy-1ZY-xkWe":0.05461626895038973, "ke730M-LmMjGdc-EFy-0LUK":0.3078176183644828, "jBSExJ-GXTc5TB-NSa-xBEd":0.6617827850054024, "kI7Cc8-DSg5RdF-qLo-2bhe":0.9835707461323488, "bAn3VI-x6xXWpB-zWe-G5CJ":0.2179821229979456, "jAil30-kbt6K6z-kbr-8foB":0.9788066977245138, "IHIwNs-1QGqy8l-i8i-vu4G":0.4967939741245059, "p0IbZr-tHCtwiV-0hq-NtIt":0.05018379510905702, "iggdij-M3YNBpd-yiD-a8Ro":0.982385582884686, "BrJEww-C4LpgaS-AeB-So4U":0.9024855415553655, "xnO3Fi-8rXcpgj-zpm-EmuX":0.2052911881746857, "5w57da-phYtDUx-px2-6frG":0.2969063879156597, "31MfFs-1WyUAr6-gQ0-xLxY":0.4879555128313509, "ryBl2p-rSoPhwd-WPv-NCAU":0.7954485484495807, "KN5TEt-gOfJ4Hy-3pp-HiBa":0.1533389643648807, "ytqxb8-utXXjUf-m41-i6ir":0.6150208673719357, "WhGUGz-zzyvEpD-9BM-2bVf":0.581040090228354, "dE1tFe-zHClt4u-0cY-TQnC":0.7608999632369456, "MveBhC-g29c0dU-tCT-R6nC":0.3345734028221851, "JTpxue-xSqAhGo-AZk-zB1t":0.3504030277488054, "92TVdU-qDJesPN-0lb-JOd3":0.7387694998319805, "0PODnh-IciBdOZ-0CS-oNeL":0.9515905965769644, "KkkW6x-TiemXQw-OiH-dZ9s":0.4082412331999081, "PIs5Aj-g02HRXw-957-GD2z":0.641526116451016, "yJIzuw-au6460e-0Tl-XYEJ":0.7521928530356236, "KHvMCD-OQDL0eX-nqK-TmEt":0.1309616727896826, "6QJJgV-Z3IZ1Rf-wyv-rIJ6":0.7007110387725962, "qA9ycc-sR2qm6P-PtB-AIax":0.4462977655645909, "uDeuEb-B0t0Ljr-dWk-jkC4":0.6904672767407958, "5vPy52-ygN0MMH-UB4-nZQL":0.6057596542200021, "zbbmrQ-pT3uAuU-Kae-HjM5":0.9812657498686279, "3QShHS-7RwUB10-0W2-H4Qy":0.4155760848860853, "PMc4QI-5lNajXU-f8m-RGIi":0.7046420976800288, "O9t3dl-q8YHozj-saR-A3Jm":0.8543344954196586, "k4eH3O-aHnTKY7-ADp-4Vsi":0.2655832454718557, "RA4epe-lWWnOff-bpM-bSR4":0.7523252210222883, "6ysu2R-gSc5dwU-cv0-LqCJ":0.7830899322716732, "tVl3TY-o42NMVO-k3S-iqOY":0.7923823401215799, "NMgTrr-W1RrCvP-Zaf-paL7":0.4686928654756936, "d1CJmF-CeG5asM-xms-1dwN":0.7622908781076493, "N1D30g-zFjiGzI-eHC-Sof4":0.847542878440137, "tOhfKu-Gdtf9Ne-KwA-JdHV":0.4999285217445154, "XLzwK0-6ocGDrS-TtU-wlEI":0.3985354402705095, "XDgZfb-Sxc45Zn-mVO-S2QO":0.05791580337644187, "GQD7a0-fnt9BZs-Kvh-dPbJ":0.663903859916476, "9dJxj9-HFwEQMY-6p9-s8Vt":0.2194407595305434, "1qU9pA-QJGAna9-JoG-H7GS":0.8877401947295382, "rKIkxA-UnGWYSn-0li-ziuB":0.1607906275036466, "tbPazx-IjUrQ8J-NZe-VOPL":0.6809166916797593, "xBpSIv-U6ojkK7-9p5-LviD":0.1195672647379901, "88bnWI-pxrKa7T-n2d-tXk9":0.1956068951787721, "0XviXp-9ksT8s0-fDy-35SW":0.8690659418822626, "e0XauA-GNRALmd-SM2-Y4Gf":0.6840816888752089, "kyvYBk-Bk5M4Xq-gxX-kE1B":0.7744771682336401, "dIiQzS-5sT4ogL-6IV-tLmb":0.0340772833497166, "OlGOyH-dyL1nzj-B2M-z8ir":0.3765608037933722, "zC9Gtn-x8hpfPD-KOu-k31W":0.864392047887076, "qSq3z2-Lpv0YcB-hBq-Sabd":0.1542847609246678, "LSyNyi-tBZUx1l-hAj-mwsx":0.304034328298701, "2c9aTP-hXloMK7-ufH-dgq6":0.1016852552953107, "aXksHO-zARQxfo-sgS-8Bf4":0.5490533082019959, "ioOXAL-eVUF0W8-vZx-ZeYX":0.4528164038481785, "DXUkAP-A7SqnHj-V4U-PJfz":0.3607407447425939, "cnzZXk-AOMepfN-hym-qbDH":0.4587361500592568, "CMlAd6-8FF1yXs-fae-Izfv":0.07555019720825917, "qiXnUv-e2PsJWm-tLF-KpjE":0.9409681065363688, "Gfx3k9-JvXa7Wd-rI1-1e1E":0.7492793312178226}	{"name":"r8HXXQM4XHoI", "age":238221053, "tip":"2023-07-26 15:40:37.694000"}
-
--- !array_max --
-11028
-
--- !null_element_at_orc --
-0
-
--- !map_key_select_orc --
-38111	0.770169659057425
-
--- !map_keys_orc --
-["9wXr9n-TBm9Wyt-r8H-SkAq", "CPDH4G-ZXGPkku-3wY-ktaQ", "RvNlMt-HHjHN5M-VjP-xHAI", "qKIhKy-Ws344os-haX-2pmT", "DOJJ5l-UEkwVMs-x9F-HifD", "m871g8-1eFi7jt-oBq-S0yc", "wXugVP-v2fc6IF-DeU-On3T", "B0mXFX-QvgUgo7-Dih-6rDu", "E9zv3F-xMqSbMa-il4-FuDg", "msuFIN-ZkKO8TY-tu4-veH0", "0rSUyl-Un07aIW-KAx-WHnX", "XvbmO8-WA6oAqc-ihc-s8IL", "G6B6RD-AicAlZb-16u-Pn1I", "coDK0Q-tMg1294-JMQ-ZWQu", "4c0aWh-yhL6BOX-rRu-1n0r", "G4iUcG-ZhWw62v-VLt-n6lH", "IIB7qD-WQistwT-Vux-0c9B", "7cTyuR-5ssXm2S-sJR-JTIZ", "3KPhSW-FICEImf-bba-PCiQ", "qQ7Yup-XBeQGFz-3EP-q0vd", "gjRxRo-Af9Oqx5-IzN-3B9d", "1zSj57-nNZpZ0b-ZKn-BeY0", "sTK0mn-wkp1Xp5-PRS-txVM", "sLrM0s-1KnXLb6-1A3-Z1vJ", "UkYdkP-k7YKiKS-Fxp-qAcI", "v8p0YV-R5pAKZ8-UMr-P1bQ", "RJdTav-jk3os9Z-yRk-WhwV", "lB91ic-pNFZkE4-hBx-e104", "gmRV6e-GKJUg0L-ok7-J6Lz", "o3LUyz-7Toh54O-czG-Xep8", "8fzHhM-4otPAss-qTm-phg8", "kZsHhe-vfClpAR-b3H-7aHl", "TdZnlG-BUgMs7Z-iBM-9c3v", "RipJXn-p4gZkyy-1ZY-xkWe", "ke730M-LmMjGdc-EFy-0LUK", "jBSExJ-GXTc5TB-NSa-xBEd", "kI7Cc8-DSg5RdF-qLo-2bhe", "bAn3VI-x6xXWpB-zWe-G5CJ", "jAil30-kbt6K6z-kbr-8foB", "IHIwNs-1QGqy8l-i8i-vu4G", "p0IbZr-tHCtwiV-0hq-NtIt", "iggdij-M3YNBpd-yiD-a8Ro", "BrJEww-C4LpgaS-AeB-So4U", "xnO3Fi-8rXcpgj-zpm-EmuX", "5w57da-phYtDUx-px2-6frG", "31MfFs-1WyUAr6-gQ0-xLxY", "ryBl2p-rSoPhwd-WPv-NCAU", "KN5TEt-gOfJ4Hy-3pp-HiBa", "ytqxb8-utXXjUf-m41-i6ir", "WhGUGz-zzyvEpD-9BM-2bVf", "dE1tFe-zHClt4u-0cY-TQnC", "MveBhC-g29c0dU-tCT-R6nC", "JTpxue-xSqAhGo-AZk-zB1t", "92TVdU-qDJesPN-0lb-JOd3", "0PODnh-IciBdOZ-0CS-oNeL", "KkkW6x-TiemXQw-OiH-dZ9s", "PIs5Aj-g02HRXw-957-GD2z", "yJIzuw-au6460e-0Tl-XYEJ", "KHvMCD-OQDL0eX-nqK-TmEt", "6QJJgV-Z3IZ1Rf-wyv-rIJ6", "qA9ycc-sR2qm6P-PtB-AIax", "uDeuEb-B0t0Ljr-dWk-jkC4", "5vPy52-ygN0MMH-UB4-nZQL", "zbbmrQ-pT3uAuU-Kae-HjM5", "3QShHS-7RwUB10-0W2-H4Qy", "PMc4QI-5lNajXU-f8m-RGIi", "O9t3dl-q8YHozj-saR-A3Jm", "k4eH3O-aHnTKY7-ADp-4Vsi", "RA4epe-lWWnOff-bpM-bSR4", "6ysu2R-gSc5dwU-cv0-LqCJ", "tVl3TY-o42NMVO-k3S-iqOY", "NMgTrr-W1RrCvP-Zaf-paL7", "d1CJmF-CeG5asM-xms-1dwN", "N1D30g-zFjiGzI-eHC-Sof4", "tOhfKu-Gdtf9Ne-KwA-JdHV", "XLzwK0-6ocGDrS-TtU-wlEI", "XDgZfb-Sxc45Zn-mVO-S2QO", "GQD7a0-fnt9BZs-Kvh-dPbJ", "9dJxj9-HFwEQMY-6p9-s8Vt", "1qU9pA-QJGAna9-JoG-H7GS", "rKIkxA-UnGWYSn-0li-ziuB", "tbPazx-IjUrQ8J-NZe-VOPL", "xBpSIv-U6ojkK7-9p5-LviD", "88bnWI-pxrKa7T-n2d-tXk9", "0XviXp-9ksT8s0-fDy-35SW", "e0XauA-GNRALmd-SM2-Y4Gf", "kyvYBk-Bk5M4Xq-gxX-kE1B", "dIiQzS-5sT4ogL-6IV-tLmb", "OlGOyH-dyL1nzj-B2M-z8ir", "zC9Gtn-x8hpfPD-KOu-k31W", "qSq3z2-Lpv0YcB-hBq-Sabd", "LSyNyi-tBZUx1l-hAj-mwsx", "2c9aTP-hXloMK7-ufH-dgq6", "aXksHO-zARQxfo-sgS-8Bf4", "ioOXAL-eVUF0W8-vZx-ZeYX", "DXUkAP-A7SqnHj-V4U-PJfz", "cnzZXk-AOMepfN-hym-qbDH", "CMlAd6-8FF1yXs-fae-Izfv", "qiXnUv-e2PsJWm-tLF-KpjE", "Gfx3k9-JvXa7Wd-rI1-1e1E"]
-
--- !map_values_orc --
-[0.9805502029231666, 0.5330291595754054, 0.3002474487337981, 0.4856360175030267, 0.7687106425158624, 0.6993506644925102, 0.2849354808825807, 0.3473417455186141, 0.1350012944304507, 0.9708132103700939, 0.1858304263994345, 0.4886337264552073, 0.3635474169515766, 0.5640845268971175, 0.1374134087807577, 0.7766547647451623, 0.5835323296668318, 0.3654459547110349, 0.5479776709993764, 0.8379932542117192, 0.1566504627835081, 0.03371222042250388, 0.1699781825927229, 0.3579630495075078, 0.02809253185597727, 0.7204247029840027, 0.2760499256423206, 0.676890893219096, 0.03529878656700025, 0.02276578351027858, 0.09794991730625469, 0.5278062884613351, 0.1370404181139102, 0.5440352476580856, 0.7205540629419929, 0.1350852984195943, 0.4160946400431862, 0.2972295454562929, 0.9217426503585693, 0.58103998733474, 0.8845427436377473, 0.1017928267299423, 0.9547186973943892, 0.1680102784708342, 0.0008487745421986714, 0.1695241541106989, 0.6783921749433292, 0.7193818386971084, 0.930443435029246, 0.4846665469390518, 0.9924998940864419, 0.7238288481079148, 0.7053563817759009, 0.9735160772776755, 0.7782499787869234, 0.7413304280548174, 0.7550983926033307, 0.8713660446322186, 0.9205209678792637, 0.3419724898972277, 0.3696806985755556, 0.03023259817152302, 0.02477452604862684, 0.9764129157525588, 0.5933057559470283, 0.7612511554831843, 0.378758227033635, 0.9312730459544121, 0.6712083507802412, 0.165080800084368, 0.2292866463959062, 0.3736665350268106, 0.2048064464080658, 0.08394355937496834, 0.8494979696731824, 0.4321556255662622, 0.3534668267198027, 0.8791700434102772, 0.2274527583015258, 0.04886968507359402, 0.7936598110174163, 0.5449717343415919, 0.7635939445968348, 0.08505586183986624, 0.3509115026589145, 0.9633191745238908, 0.3972533910389617, 0.4659759249919267, 0.1579051246328464, 0.7853565578107594, 0.9894919939745654, 0.9395365730655929, 0.202260767382666, 0.1619636856192768, 0.5105569529841616, 0.4531109229280732, 0.2579134268597084, 0.7962109089915747, 0.2772969229539421, 0.9315902037607061]
-
--- !map_contains_key_orc --
-1077	[0.7805560995873845, 0.9303489002269559, 0.2529522997521877, 0.662270811026298, 0.664725297532439, 0.1019441091764477, 0.9614059300688174, 0.5278126009983843, 0.5287505841216708, 0.426116738236779, 0.4230050239387118, 0.5327026330053651, 0.6025481777942603, 0.2710733647257627, 0.613792118138183, 0.002100302783562991, 0.3200675048728582, 0.5485611014660204, 0.5121510581313707, 0.5145136652805358]	{"9wXr9n-TBm9Wyt-r8H-SkAq":0.9338329010480995, "CPDH4G-ZXGPkku-3wY-ktaQ":0.4355256963350881, "RvNlMt-HHjHN5M-VjP-xHAI":0.3263474611804782, "qKIhKy-Ws344os-haX-2pmT":0.565450203625137, "DOJJ5l-UEkwVMs-x9F-HifD":0.09375622010822238, "m871g8-1eFi7jt-oBq-S0yc":0.8819687247951038, "wXugVP-v2fc6IF-DeU-On3T":0.3448233486447311, "B0mXFX-QvgUgo7-Dih-6rDu":0.1914040395475467, "E9zv3F-xMqSbMa-il4-FuDg":0.3857021891084336, "msuFIN-ZkKO8TY-tu4-veH0":0.6646172653074628, "0rSUyl-Un07aIW-KAx-WHnX":0.3558009910430974, "XvbmO8-WA6oAqc-ihc-s8IL":0.4058206434411423, "G6B6RD-AicAlZb-16u-Pn1I":0.7203554946895749, "coDK0Q-tMg1294-JMQ-ZWQu":0.8236328627743186, "4c0aWh-yhL6BOX-rRu-1n0r":0.1398091184230428, "G4iUcG-ZhWw62v-VLt-n6lH":0.1838288978254214, "IIB7qD-WQistwT-Vux-0c9B":0.9174389144309458, "7cTyuR-5ssXm2S-sJR-JTIZ":0.8132237242672837, "3KPhSW-FICEImf-bba-PCiQ":0.6302643579943553, "qQ7Yup-XBeQGFz-3EP-q0vd":0.6109025726752364, "gjRxRo-Af9Oqx5-IzN-3B9d":0.9251468490326916, "1zSj57-nNZpZ0b-ZKn-BeY0":0.5628463109107144, "sTK0mn-wkp1Xp5-PRS-txVM":0.7905808129559996, "sLrM0s-1KnXLb6-1A3-Z1vJ":0.4234598677670157, "UkYdkP-k7YKiKS-Fxp-qAcI":0.7541401266679869, "v8p0YV-R5pAKZ8-UMr-P1bQ":0.2931152565110683, "RJdTav-jk3os9Z-yRk-WhwV":0.5263811309738877, "lB91ic-pNFZkE4-hBx-e104":0.6692292834321788, "gmRV6e-GKJUg0L-ok7-J6Lz":0.05924766959664352, "o3LUyz-7Toh54O-czG-Xep8":0.6284193821127264, "8fzHhM-4otPAss-qTm-phg8":0.8953002441537012, "kZsHhe-vfClpAR-b3H-7aHl":0.1775015612747399, "TdZnlG-BUgMs7Z-iBM-9c3v":0.2749839439504633, "RipJXn-p4gZkyy-1ZY-xkWe":0.05461626895038973, "ke730M-LmMjGdc-EFy-0LUK":0.3078176183644828, "jBSExJ-GXTc5TB-NSa-xBEd":0.6617827850054024, "kI7Cc8-DSg5RdF-qLo-2bhe":0.9835707461323488, "bAn3VI-x6xXWpB-zWe-G5CJ":0.2179821229979456, "jAil30-kbt6K6z-kbr-8foB":0.9788066977245138, "IHIwNs-1QGqy8l-i8i-vu4G":0.4967939741245059, "p0IbZr-tHCtwiV-0hq-NtIt":0.05018379510905702, "iggdij-M3YNBpd-yiD-a8Ro":0.982385582884686, "BrJEww-C4LpgaS-AeB-So4U":0.9024855415553655, "xnO3Fi-8rXcpgj-zpm-EmuX":0.2052911881746857, "5w57da-phYtDUx-px2-6frG":0.2969063879156597, "31MfFs-1WyUAr6-gQ0-xLxY":0.4879555128313509, "ryBl2p-rSoPhwd-WPv-NCAU":0.7954485484495807, "KN5TEt-gOfJ4Hy-3pp-HiBa":0.1533389643648807, "ytqxb8-utXXjUf-m41-i6ir":0.6150208673719357, "WhGUGz-zzyvEpD-9BM-2bVf":0.581040090228354, "dE1tFe-zHClt4u-0cY-TQnC":0.7608999632369456, "MveBhC-g29c0dU-tCT-R6nC":0.3345734028221851, "JTpxue-xSqAhGo-AZk-zB1t":0.3504030277488054, "92TVdU-qDJesPN-0lb-JOd3":0.7387694998319805, "0PODnh-IciBdOZ-0CS-oNeL":0.9515905965769644, "KkkW6x-TiemXQw-OiH-dZ9s":0.4082412331999081, "PIs5Aj-g02HRXw-957-GD2z":0.641526116451016, "yJIzuw-au6460e-0Tl-XYEJ":0.7521928530356236, "KHvMCD-OQDL0eX-nqK-TmEt":0.1309616727896826, "6QJJgV-Z3IZ1Rf-wyv-rIJ6":0.7007110387725962, "qA9ycc-sR2qm6P-PtB-AIax":0.4462977655645909, "uDeuEb-B0t0Ljr-dWk-jkC4":0.6904672767407958, "5vPy52-ygN0MMH-UB4-nZQL":0.6057596542200021, "zbbmrQ-pT3uAuU-Kae-HjM5":0.9812657498686279, "3QShHS-7RwUB10-0W2-H4Qy":0.4155760848860853, "PMc4QI-5lNajXU-f8m-RGIi":0.7046420976800288, "O9t3dl-q8YHozj-saR-A3Jm":0.8543344954196586, "k4eH3O-aHnTKY7-ADp-4Vsi":0.2655832454718557, "RA4epe-lWWnOff-bpM-bSR4":0.7523252210222883, "6ysu2R-gSc5dwU-cv0-LqCJ":0.7830899322716732, "tVl3TY-o42NMVO-k3S-iqOY":0.7923823401215799, "NMgTrr-W1RrCvP-Zaf-paL7":0.4686928654756936, "d1CJmF-CeG5asM-xms-1dwN":0.7622908781076493, "N1D30g-zFjiGzI-eHC-Sof4":0.847542878440137, "tOhfKu-Gdtf9Ne-KwA-JdHV":0.4999285217445154, "XLzwK0-6ocGDrS-TtU-wlEI":0.3985354402705095, "XDgZfb-Sxc45Zn-mVO-S2QO":0.05791580337644187, "GQD7a0-fnt9BZs-Kvh-dPbJ":0.663903859916476, "9dJxj9-HFwEQMY-6p9-s8Vt":0.2194407595305434, "1qU9pA-QJGAna9-JoG-H7GS":0.8877401947295382, "rKIkxA-UnGWYSn-0li-ziuB":0.1607906275036466, "tbPazx-IjUrQ8J-NZe-VOPL":0.6809166916797593, "xBpSIv-U6ojkK7-9p5-LviD":0.1195672647379901, "88bnWI-pxrKa7T-n2d-tXk9":0.1956068951787721, "0XviXp-9ksT8s0-fDy-35SW":0.8690659418822626, "e0XauA-GNRALmd-SM2-Y4Gf":0.6840816888752089, "kyvYBk-Bk5M4Xq-gxX-kE1B":0.7744771682336401, "dIiQzS-5sT4ogL-6IV-tLmb":0.0340772833497166, "OlGOyH-dyL1nzj-B2M-z8ir":0.3765608037933722, "zC9Gtn-x8hpfPD-KOu-k31W":0.864392047887076, "qSq3z2-Lpv0YcB-hBq-Sabd":0.1542847609246678, "LSyNyi-tBZUx1l-hAj-mwsx":0.304034328298701, "2c9aTP-hXloMK7-ufH-dgq6":0.1016852552953107, "aXksHO-zARQxfo-sgS-8Bf4":0.5490533082019959, "ioOXAL-eVUF0W8-vZx-ZeYX":0.4528164038481785, "DXUkAP-A7SqnHj-V4U-PJfz":0.3607407447425939, "cnzZXk-AOMepfN-hym-qbDH":0.4587361500592568, "CMlAd6-8FF1yXs-fae-Izfv":0.07555019720825917, "qiXnUv-e2PsJWm-tLF-KpjE":0.9409681065363688, "Gfx3k9-JvXa7Wd-rI1-1e1E":0.7492793312178226}	{"name":"r8HXXQM4XHoI", "age":238221053, "tip":"2023-07-26 15:40:37.694000"}
-
--- !array_max_orc --
-11028
-
--- !offsets_check --
-0	[1, 2]	[[], [3], null]	{"a":1, "b":2}	{"s1":"e", "s2":null}
-1	[]	[]	{}	\N
-2	\N	\N	\N	{"s1":"h", "s2":10}
-3	[5, null]	[[6, 7], [8, null], null]	{"f":1, "g":null}	{"s1":null, "s2":9}
-
--- !map_with_nullable_key --
-\N	\N	\N	\N	\N	\N	\N	\N	\N		test		test	aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	test	3	4	5.1	6.2	true	false	-1.2	12.30	-1234.5678	123456789.12340000	-1234567890.12345678	1234567890123456789012.1234567800000000	test2	{"test":"test"}	{"test":"test"}	{"test":"test"}	{3:3}	{4:4}	{5:5}	{6:6}	{1:1}	{-1.2:-1.2}	{12.30:12.30}	{-1234.5678:-1234.5678}	{123456789.12340000:123456789.12340000}	{-1234567890.12345678:-1234567890.12345678}	{1234567890123456789012.1234567800000000:1234567890123456789012.1234567800000000}	["test"]	[3]	[4]	[5]	[6]	[1]	["test"]	["test"]	[-1.2]	[12.30]	[-1234.5678]	[123456789.12340000]	[-1234567890.12345678]	[1234567890123456789012.1234567800000000]	{"s_bigint":1}	{"test":[{"s_int":1}]}	{"struct_field":["1", "2", "3"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":null, "struct_non_nulls_after_nulls2":"some string"}	{"struct_field1":null, "struct_field2":"some string", "strict_field3":{"nested_struct_field1":null, "nested_struct_field2":"nested_string2"}}	{"k1":"v1", "k2":null, "k3":"v3"}	[null, "test"]	["test-1", null, "test-2"]	["test", null]	[null, null, null]
-
--- !date_dict --
-2036-12-28	1898-12-28	2539-12-28
-
diff --git a/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out b/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out
index 8a104343fc4e10..57367a3bf0999c 100644
--- a/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out
+++ b/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out
@@ -123,127 +123,3 @@ a126	15
 2017-09-13	2009-09-21T04:23:14.309124
 2024-03-23	2024-02-01T21:11:09.170
 
--- !q01 --
-zhangsan	1
-lisi	1
-
--- !q02 --
-1	1
-2	1
-3	1
-4	1
-
--- !q03 --
-123	china	4	56	sc
-234	america	5	67	ls
-345	cana	4	56	fy
-567	fre	7	89	pa
-
--- !q04 --
-p_partkey2	p_name2	p_mfgr2	p_brand2	p_type2	p_size2	p_con2	p_r_price2	p_comment2
-p_partkey1	p_name1	p_mfgr1	p_brand1	p_type1	p_size1	p_con1	p_r_price1	p_comment1
-p_partkey0	p_name0	p_mfgr0	p_brand0	p_type0	p_size0	p_con0	p_r_price0	p_comment0
-
--- !q05 --
-batchno	appsheet_no	filedate	t_no	tano	t_name	chged_no	mob_no2	home_no	off_no
-off_no	home_no	mob_no2	chged_no	t_name	tano	t_no	filedate	appsheet_no	batchno
-
--- !q06 --
-bill_code	dates	ord_year	ord_month	ord_quarter	on_time
-
--- !q07 --
-2
-
--- !q08 --
-123	zhangsan	12	123.45	2022-01-01
-124	lisi	12	123.45	2022-01-01
-125	lisan	12	123.45	2022-01-02
-
--- !q09 --
-a123	12
-a124	13
-a125	14
-a126	15
-
--- !par_fields_in_file_orc1 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet1 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc2 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet2 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc3 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet3 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc4 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet4 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc5 --
-
--- !par_fields_in_file_parquet5 --
-
--- !par_fields_in_file_orc1 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet1 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc2 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet2 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc3 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet3 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc4 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet4 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc5 --
-
--- !par_fields_in_file_parquet5 --
-
--- !parquet_adjusted_utc --
-1997-09-21	1999-01-12T15:12:31.235784
-1998-01-12	1993-06-11T11:33:12.356500
-2002-09-29	2001-01-17T21:23:42.120
-2008-08-07	2023-09-23T11:12:17.458
-2009-11-13	2011-11-12T01:23:06.986
-2012-07-08	2023-11-09T20:21:16.321
-2017-09-13	2009-09-21T04:23:14.309124
-2024-03-23	2024-02-01T21:11:09.170
-
diff --git a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
index 0402feef40e6b5..deda902d300d43 100644
--- a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
+++ b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
@@ -119,123 +119,3 @@
 -- !q06 --
 2023-01-03T00:00	100	0.3	test3
 
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
diff --git a/regression-test/data/external_table_p0/hive/test_hive_compress_type.out b/regression-test/data/external_table_p0/hive/test_hive_compress_type.out
index ca9ca885c5b854..27d5d949eb4003 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_compress_type.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_compress_type.out
@@ -1,573 +1,440 @@
 -- This file is automatically generated. You should know what you did if you want to edit this
--- !q21 --
-600005
-
--- !q22 --
-1510010
-
--- !q23 --
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	2023-08-21
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	bzip2
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	bzip2
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	deflate
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	deflate
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	gzip
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	gzip
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	lz4
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	plain
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	plain
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	snappy
-
--- !q31 --
-600005
-
--- !q32 --
-1510010
-
--- !q33 --
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	2023-08-21
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	bzip2
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	bzip2
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	deflate
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	deflate
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	gzip
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	gzip
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	lz4
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	plain
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	plain
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	snappy
-
 -- !q42 --
 215
 
 -- !q43 --
 1	100	5	1000000000	10.5	20.75	true	First	A	Alpha	2023-10-06	2023-10-06T14:30	123.45
-1	578	55	2111222273	56.858597	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
-1	979	44	10163954251	28.827957	57.56879940298416	true	Random	Q	DNRGE	2023-12-09	2023-12-10T20:21:58	1581.25
+1	578	55	2111222273	56.8586	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
+1	979	44	10163954251	28.82796	57.56879940298416	true	Random	Q	DNRGE	2023-12-09	2023-12-10T20:21:58	1581.25
 10	1000	50	10000000000	55.25	65.75	false	Tenth	J	Kappa	2023-10-15	2023-10-15T23:30	1012.34
-10	210	26	8549838179	23.438345	73.36477128189287	true	Random	N	VVXIF	2023-11-24	2023-12-13T18:04:58	226.65
-10	386	51	1214815770	13.959902	36.64197990482059	false	Random	J	ORLGI	2023-12-18	2023-11-27T17:13:58	852.62
-10	966	38	2203748112	45.555325	27.908447208440094	true	Random	W	LFAGO	2023-12-14	2023-11-26T20:00:58	1898.68
+10	210	26	8549838179	23.43834	73.36477128189287	true	Random	N	VVXIF	2023-11-24	2023-12-13T18:04:58	226.65
+10	386	51	1214815770	13.9599	36.64197990482059	false	Random	J	ORLGI	2023-12-18	2023-11-27T17:13:58	852.62
+10	966	38	2203748112	45.55532	27.90844720844009	true	Random	W	LFAGO	2023-12-14	2023-11-26T20:00:58	1898.68
 100	281	26	3174393241	51.05278	52.09566669589555	false	Random	F	SLDWB	2023-12-14	2023-12-12T07:03:58	798.30
 100	289	71	4919981667	66.56684	69.73132704711037	true	Random	V	QOLAP	2023-12-17	2023-12-23T09:38:58	217.05
-11	1100	55	11000000000	60.5	70.0	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
-11	426	67	8473986652	17.942455	71.80682514420877	true	Random	X	FXDUV	2023-12-04	2023-12-22T07:51:58	129.81
-11	441	19	7370044350	74.261696	62.013817404758086	true	Random	D	UYKZA	2023-12-23	2023-12-15T11:49:58	1805.14
-11	487	27	14556302216	85.33334	62.596750833474495	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
-11	770	17	7962512669	12.508753	83.33847413902296	true	Random	P	LHJRA	2023-12-06	2023-12-04T15:48:58	970.51
+11	1100	55	11000000000	60.5	70	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
+11	426	67	8473986652	17.94246	71.80682514420877	true	Random	X	FXDUV	2023-12-04	2023-12-22T07:51:58	129.81
+11	441	19	7370044350	74.2617	62.01381740475809	true	Random	D	UYKZA	2023-12-23	2023-12-15T11:49:58	1805.14
+11	487	27	14556302216	85.33334	62.59675083347449	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
+11	770	17	7962512669	12.50875	83.33847413902296	true	Random	P	LHJRA	2023-12-06	2023-12-04T15:48:58	970.51
 12	1200	60	12000000000	65.75	75.25	false	Twelfth	L	Mu	2023-10-17	2023-10-17T02:15	1234.56
-12	751	8	12205294947	23.468674	64.35048302450815	true	Random	K	FCSBV	2023-12-03	2023-12-17T01:10:58	325.26
+12	751	8	12205294947	23.46867	64.35048302450815	true	Random	K	FCSBV	2023-12-03	2023-12-17T01:10:58	325.26
 12	782	48	5080583047	75.55138	49.6324463213595	true	Random	N	WYJDW	2023-12-16	2023-12-18T02:58:58	944.42
-12	987	73	1432735571	40.308147	43.5019559828596	true	Random	S	MZUNG	2023-12-07	2023-12-03T13:42:58	215.12
+12	987	73	1432735571	40.30815	43.5019559828596	true	Random	S	MZUNG	2023-12-07	2023-12-03T13:42:58	215.12
 13	1300	65	13000000000	70.0	80.5	true	Thirteenth	M	Nu	2023-10-18	2023-10-18T03:30	1345.67
-13	335	39	13869202091	30.426075	39.02304533093442	true	Random	L	AULCC	2023-12-08	2023-12-13T00:26:58	387.97
+13	335	39	13869202091	30.42607	39.02304533093442	true	Random	L	AULCC	2023-12-08	2023-12-13T00:26:58	387.97
 13	402	30	10851194313	74.82481	74.90108005771035	false	Random	F	GEMMK	2023-11-27	2023-12-21T15:03:58	1643.55
-13	503	34	6763884255	23.660393	63.9797872103468	true	Random	S	POEBK	2023-12-22	2023-12-23T23:16:58	486.62
-13	696	74	3370487489	84.544014	88.69976219408227	true	Random	H	RTFJI	2023-11-23	2023-11-25T07:32:58	1761.50
-13	745	48	13047949175	51.168613	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
-13	859	65	7433576046	56.136265	34.87823331022725	false	Random	L	CRFUF	2023-12-23	2023-12-12T15:05:58	1037.15
+13	503	34	6763884255	23.66039	63.9797872103468	true	Random	S	POEBK	2023-12-22	2023-12-23T23:16:58	486.62
+13	696	74	3370487489	84.54401	88.69976219408227	true	Random	H	RTFJI	2023-11-23	2023-11-25T07:32:58	1761.50
+13	745	48	13047949175	51.16861	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
+13	859	65	7433576046	56.13626	34.87823331022725	false	Random	L	CRFUF	2023-12-23	2023-12-12T15:05:58	1037.15
 14	1400	70	14000000000	75.25	85.75	false	Fourteenth	N	Xi	2023-10-19	2023-10-19T04:45	1456.78
-14	195	17	2370700139	16.777058	64.81793301410002	false	Random	P	IIGRE	2023-12-12	2023-12-14T22:40:58	1678.44
-14	966	65	7828602539	62.430664	68.85873133439297	true	Random	I	VVOQH	2023-12-01	2023-12-06T00:54:58	1300.43
-14	968	16	11314514196	62.509666	33.1841427251225	false	Random	T	WDEVJ	2023-11-24	2023-12-06T17:54:58	431.61
-15	1500	75	15000000000	80.5	90.0	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
+14	195	17	2370700139	16.77706	64.81793301410002	false	Random	P	IIGRE	2023-12-12	2023-12-14T22:40:58	1678.44
+14	966	65	7828602539	62.43066	68.85873133439297	true	Random	I	VVOQH	2023-12-01	2023-12-06T00:54:58	1300.43
+14	968	16	11314514196	62.50967	33.1841427251225	false	Random	T	WDEVJ	2023-11-24	2023-12-06T17:54:58	431.61
+15	1500	75	15000000000	80.5	90	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
 15	703	67	4284267079	85.38059	91.41088583496226	true	Random	T	PHZRC	2023-12-04	2023-12-08T15:54:58	185.19
-16	135	22	7901304568	43.944805	85.16901944253635	true	Random	K	NUQEP	2023-11-29	2023-11-25T23:42:58	1440.74
+16	135	22	7901304568	43.94481	85.16901944253635	true	Random	K	NUQEP	2023-11-29	2023-11-25T23:42:58	1440.74
 16	615	20	12294128025	77.37379	20.42772029677839	true	Random	U	JHPOB	2023-11-30	2023-12-16T14:29:58	1105.33
-17	289	49	13560709243	39.952793	38.245306832599425	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
-17	499	46	11230409207	51.632103	28.811164197154774	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
+17	289	49	13560709243	39.95279	38.24530683259943	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
+17	499	46	11230409207	51.6321	28.81116419715477	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
 17	646	62	11234805830	76.40492	67.46425239009778	true	Random	N	REHZC	2023-12-09	2023-11-28T02:06:58	365.15
 17	698	55	1807368797	20.17171	43.84496606184709	true	Random	P	SHSJV	2023-12-01	2023-11-25T11:56:58	810.95
 17	794	14	8377523030	28.07663	52.3837762020057	false	Random	E	WPMIN	2023-12-03	2023-11-26T04:59:58	239.42
 17	913	32	4647929554	78.91502	70.54487265463735	true	Random	S	WFPNS	2023-11-27	2023-11-26T03:29:58	321.45
-18	690	17	1399456103	63.261967	42.964715823771236	true	Random	R	BWSRS	2023-12-13	2023-12-23T08:33:58	1840.02
-18	835	17	14265814864	18.923101	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
-19	917	66	2340946367	89.035675	22.649362455875274	false	Random	D	HWHMU	2023-11-30	2023-12-10T02:36:58	1960.07
-19	993	13	7039833438	79.769066	69.79049291517285	true	Random	X	OFSUV	2023-12-11	2023-12-08T01:46:58	1958.95
+18	690	17	1399456103	63.26197	42.96471582377124	true	Random	R	BWSRS	2023-12-13	2023-12-23T08:33:58	1840.02
+18	835	17	14265814864	18.9231	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
+19	917	66	2340946367	89.03568	22.64936245587527	false	Random	D	HWHMU	2023-11-30	2023-12-10T02:36:58	1960.07
+19	993	13	7039833438	79.76907	69.79049291517285	true	Random	X	OFSUV	2023-12-11	2023-12-08T01:46:58	1958.95
 2	200	10	2000000000	15.75	25.5	false	Second	B	Beta	2023-10-07	2023-10-07T15:45	234.56
 2	850	75	7075823565	83.65178	62.56093886118189	false	Random	F	RFHAG	2023-11-24	2023-12-03T01:06:58	495.12
 2	921	62	8557914543	78.52379	58.6849882881372	false	Random	D	KBXXS	2023-12-07	2023-12-02T22:24:58	1782.88
-2	925	46	6013180177	41.107002	34.86561026061906	true	Random	L	XLLXY	2023-12-06	2023-12-09T14:04:58	1246.26
-20	248	64	7704906572	35.089928	76.69128821479936	true	Random	T	KQOMS	2023-11-30	2023-12-11T01:35:58	1799.26
-22	200	41	12163439252	64.621254	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
+2	925	46	6013180177	41.107	34.86561026061906	true	Random	L	XLLXY	2023-12-06	2023-12-09T14:04:58	1246.26
+20	248	64	7704906572	35.08993	76.69128821479936	true	Random	T	KQOMS	2023-11-30	2023-12-11T01:35:58	1799.26
+22	200	41	12163439252	64.62125	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
 22	235	19	6963606423	65.68033	54.1995295752517	true	Random	E	ENVRH	2023-12-22	2023-11-29T14:42:58	864.89
-23	192	8	5102667616	54.111057	40.85713971600841	false	Random	J	EBXEB	2023-12-13	2023-12-10T11:32:58	1824.12
-27	452	74	4240215371	50.569168	75.68204627611644	true	Random	G	AZOWU	2023-12-01	2023-11-26T06:24:58	201.31
+23	192	8	5102667616	54.11106	40.85713971600841	false	Random	J	EBXEB	2023-12-13	2023-12-10T11:32:58	1824.12
+27	452	74	4240215371	50.56917	75.68204627611644	true	Random	G	AZOWU	2023-12-01	2023-11-26T06:24:58	201.31
 27	866	24	5531365994	72.77447	86.96690821165853	false	Random	S	TZPFJ	2023-11-28	2023-12-13T15:31:58	1274.75
-28	655	21	14580233860	12.503378	48.60220286874443	false	Random	P	DUBQQ	2023-12-12	2023-12-03T20:11:58	922.42
-29	157	34	2302882987	51.924015	20.311140937696468	true	Random	R	MBOXJ	2023-12-02	2023-12-03T14:12:58	1620.80
-29	910	52	5544039917	22.179396	46.32732226806482	true	Random	C	TIZAG	2023-11-28	2023-12-14T16:08:58	900.96
-29	923	57	1591814253	68.57371	33.342802789892986	true	Random	Q	ZONGC	2023-12-20	2023-12-13T09:11:58	1465.38
-3	259	74	7422478791	22.291426	75.38227773520089	true	Random	S	VWAXJ	2023-12-01	2023-12-05T21:23:58	1970.57
-3	300	15	3000000000	20.25	30.0	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
-3	422	25	5996825874	89.173584	62.758513798505824	false	Random	Z	CDYAO	2023-12-14	2023-12-08T09:27:58	567.23
+28	655	21	14580233860	12.50338	48.60220286874443	false	Random	P	DUBQQ	2023-12-12	2023-12-03T20:11:58	922.42
+29	157	34	2302882987	51.92402	20.31114093769647	true	Random	R	MBOXJ	2023-12-02	2023-12-03T14:12:58	1620.80
+29	910	52	5544039917	22.1794	46.32732226806482	true	Random	C	TIZAG	2023-11-28	2023-12-14T16:08:58	900.96
+29	923	57	1591814253	68.57371	33.34280278989299	true	Random	Q	ZONGC	2023-12-20	2023-12-13T09:11:58	1465.38
+3	259	74	7422478791	22.29143	75.38227773520089	true	Random	S	VWAXJ	2023-12-01	2023-12-05T21:23:58	1970.57
+3	300	15	3000000000	20.25	30	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
+3	422	25	5996825874	89.17358	62.75851379850582	false	Random	Z	CDYAO	2023-12-14	2023-12-08T09:27:58	567.23
 3	668	60	1942550969	83.43451	87.15906153619602	true	Random	F	QYSRS	2023-12-22	2023-12-10T22:17:58	320.22
-30	292	71	10308444223	63.039078	76.40649540444898	false	Random	G	DRLHY	2023-12-19	2023-12-14T15:32:58	1165.14
-30	572	6	3022031043	57.813908	72.29244668177799	true	Random	X	EHJDN	2023-12-11	2023-12-12T02:44:58	910.38
-30	830	65	12624057029	38.791172	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
+30	292	71	10308444223	63.03908	76.40649540444898	false	Random	G	DRLHY	2023-12-19	2023-12-14T15:32:58	1165.14
+30	572	6	3022031043	57.81391	72.29244668177799	true	Random	X	EHJDN	2023-12-11	2023-12-12T02:44:58	910.38
+30	830	65	12624057029	38.79117	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
 31	395	22	6141426904	88.37914	52.0655270963123	false	Random	J	DRPJV	2023-12-07	2023-11-29T03:15:58	1076.41
-31	990	5	13678786851	15.762894	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
+31	990	5	13678786851	15.76289	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
 33	198	20	13225406950	67.7327	58.63863378877107	true	Random	I	ZKXRA	2023-12-07	2023-11-29T02:33:58	566.76
 33	321	39	12537851805	38.26871	32.6626492245712	true	Random	S	OICCE	2023-12-02	2023-12-19T16:41:58	306.92
-33	916	53	5666674210	57.998173	61.774881852563475	true	Random	J	WJAXA	2023-11-27	2023-12-05T19:58:58	976.13
+33	916	53	5666674210	57.99817	61.77488185256347	true	Random	J	WJAXA	2023-11-27	2023-12-05T19:58:58	976.13
 34	145	44	14060350663	73.02436	68.40544929600975	true	Random	S	UUJFP	2023-11-23	2023-12-12T06:08:58	739.45
-34	585	43	1429300527	61.706585	80.88100239373303	false	Random	O	JKJOH	2023-12-17	2023-12-07T11:00:58	468.11
-35	297	75	2468378214	51.353462	34.18114780065386	false	Random	C	HBYZO	2023-12-05	2023-12-09T21:42:58	534.70
+34	585	43	1429300527	61.70658	80.88100239373303	false	Random	O	JKJOH	2023-12-17	2023-12-07T11:00:58	468.11
+35	297	75	2468378214	51.35346	34.18114780065386	false	Random	C	HBYZO	2023-12-05	2023-12-09T21:42:58	534.70
 37	438	39	6809169396	83.56728	40.90894521029911	true	Random	W	GXPAY	2023-12-07	2023-12-18T06:35:58	383.18
 38	606	57	14585148556	82.67463	79.18300302689997	false	Random	E	RSFUZ	2023-12-16	2023-11-27T18:55:58	970.25
-39	726	50	3865644066	26.225628	28.534393094364418	false	Random	F	NIUCS	2023-12-05	2023-12-04T19:31:58	1953.82
+39	726	50	3865644066	26.22563	28.53439309436442	false	Random	F	NIUCS	2023-12-05	2023-12-04T19:31:58	1953.82
 4	122	24	10738473173	81.15482	60.21481394154484	false	Random	Y	PQJRK	2023-12-20	2023-12-09T02:38:58	1467.35
 4	400	20	4000000000	25.5	35.25	false	Fourth	D	Delta	2023-10-09	2023-10-09T17:30	456.78
-4	569	72	10560903405	50.255936	47.535145739285184	false	Random	O	NRIRC	2023-12-05	2023-12-01T09:10:58	1986.99
+4	569	72	10560903405	50.25594	47.53514573928518	false	Random	O	NRIRC	2023-12-05	2023-12-01T09:10:58	1986.99
 4	682	22	2040832636	60.33469	67.33499498711046	true	Random	W	QUICJ	2023-11-24	2023-12-14T10:17:58	579.56
-40	230	34	10824964541	16.929768	53.812277279703366	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
+40	230	34	10824964541	16.92977	53.81227727970337	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
 40	693	69	13276482882	44.35974	82.57845708670757	true	Random	B	RCCSU	2023-11-29	2023-12-01T20:11:58	183.64
-40	914	7	4902128502	19.442041	33.099787387344406	true	Random	Q	KOCWA	2023-11-28	2023-12-21T09:20:58	1824.80
-41	344	34	14536795918	56.660946	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
+40	914	7	4902128502	19.44204	33.09978738734441	true	Random	Q	KOCWA	2023-11-28	2023-12-21T09:20:58	1824.80
+41	344	34	14536795918	56.66095	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
 41	599	54	8095449906	22.58196	37.99742597458578	false	Random	T	GTQXP	2023-12-12	2023-12-22T19:08:58	743.46
-41	697	21	1200243566	12.466168	68.57243624557165	true	Random	U	JZGEG	2023-12-03	2023-12-10T04:51:58	1323.88
+41	697	21	1200243566	12.46617	68.57243624557165	true	Random	U	JZGEG	2023-12-03	2023-12-10T04:51:58	1323.88
 41	708	64	11745827370	72.84812	35.31028363777645	true	Random	O	WGSQC	2023-12-02	2023-11-25T17:07:58	1666.71
-41	840	65	8988241658	37.428593	42.25992474748068	false	Random	E	HURYX	2023-12-22	2023-12-19T01:55:58	141.89
+41	840	65	8988241658	37.42859	42.25992474748068	false	Random	E	HURYX	2023-12-22	2023-12-19T01:55:58	141.89
 42	143	42	3421815721	65.27691	87.91368867538209	true	Random	S	AXGVL	2023-12-06	2023-11-29T07:36:58	575.01
 42	178	38	7559404453	69.69449	64.37154501388798	true	Random	G	QUMUN	2023-12-14	2023-12-17T01:37:58	1190.44
-42	192	28	14454791024	35.465202	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
+42	192	28	14454791024	35.4652	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
 42	355	72	11536856285	74.42886	53.49032479461299	false	Random	I	IQZEI	2023-12-10	2023-12-06T07:17:58	1098.14
-43	178	64	6969956763	40.980415	52.998828731408516	true	Random	C	XQHYB	2023-12-11	2023-12-07T23:00:58	257.08
+43	178	64	6969956763	40.98042	52.99882873140852	true	Random	C	XQHYB	2023-12-11	2023-12-07T23:00:58	257.08
 43	828	24	12011396947	45.07647	54.2136449479346	true	Random	E	HIDUO	2023-12-02	2023-12-19T01:14:58	233.10
 44	219	38	8596488294	73.52956	94.10797854680568	true	Random	E	HMWBI	2023-12-15	2023-12-06T00:51:58	1907.47
-44	694	55	3626514138	62.504086	72.89799265418553	true	Random	Z	JTDVF	2023-12-01	2023-11-29T12:08:58	1769.92
-44	912	63	8534761366	55.993538	50.235171557550416	false	Random	N	OVQRQ	2023-12-08	2023-11-24T03:39:58	264.92
-44	928	7	1939079012	14.426672	68.86451571230457	false	Random	I	EKVWY	2023-12-15	2023-12-09T10:43:58	846.74
-45	455	25	12639246000	47.011307	26.310712594958694	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
-45	492	43	3870916386	51.069588	42.652270406300794	true	Random	H	JVZTB	2023-12-04	2023-12-09T21:06:58	1517.83
-47	508	48	1456473942	48.488297	20.377955902326608	false	Random	B	CAOEY	2023-11-29	2023-12-10T14:49:58	1865.52
-47	566	50	1426586688	51.278687	40.47151456873397	true	Random	F	YBOSH	2023-11-26	2023-12-15T03:44:58	1806.35
+44	694	55	3626514138	62.50409	72.89799265418553	true	Random	Z	JTDVF	2023-12-01	2023-11-29T12:08:58	1769.92
+44	912	63	8534761366	55.99354	50.23517155755042	false	Random	N	OVQRQ	2023-12-08	2023-11-24T03:39:58	264.92
+44	928	7	1939079012	14.42667	68.86451571230457	false	Random	I	EKVWY	2023-12-15	2023-12-09T10:43:58	846.74
+45	455	25	12639246000	47.01131	26.31071259495869	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
+45	492	43	3870916386	51.06959	42.65227040630079	true	Random	H	JVZTB	2023-12-04	2023-12-09T21:06:58	1517.83
+47	508	48	1456473942	48.4883	20.37795590232661	false	Random	B	CAOEY	2023-11-29	2023-12-10T14:49:58	1865.52
+47	566	50	1426586688	51.27869	40.47151456873397	true	Random	F	YBOSH	2023-11-26	2023-12-15T03:44:58	1806.35
 47	838	73	14910230294	83.69784	82.28901816600579	true	Random	L	SHXYL	2023-11-24	2023-12-05T22:19:58	1062.15
 48	898	59	12871187130	10.13838	70.19705104611333	true	Random	J	WFXNN	2023-12-23	2023-12-17T02:53:58	1050.21
-49	165	38	4482178563	34.706547	69.17129468406594	false	Random	W	CPZNY	2023-12-15	2023-11-23T19:56:58	512.60
-49	412	16	8300982793	56.263252	66.07893608061771	false	Random	K	DWWJI	2023-12-08	2023-12-17T11:32:58	1718.54
+49	165	38	4482178563	34.70655	69.17129468406594	false	Random	W	CPZNY	2023-12-15	2023-11-23T19:56:58	512.60
+49	412	16	8300982793	56.26325	66.07893608061771	false	Random	K	DWWJI	2023-12-08	2023-12-17T11:32:58	1718.54
 49	511	51	8602055259	88.1686	88.98712207285577	false	Random	M	ZDKEY	2023-12-10	2023-11-25T02:44:58	241.08
-49	568	70	2916596630	79.16303	56.114316916863025	false	Random	T	ILLIU	2023-11-23	2023-12-07T11:05:58	1039.03
+49	568	70	2916596630	79.16303	56.11431691686303	false	Random	T	ILLIU	2023-11-23	2023-12-07T11:05:58	1039.03
 5	500	25	5000000000	30.75	40.5	true	Fifth	E	Epsilon	2023-10-10	2023-10-10T18:45	567.89
-5	768	5	4152322228	41.128906	78.60686390712706	false	Random	J	LXKRA	2023-12-05	2023-11-24T18:13:58	1941.98
-5	823	63	13328808917	77.768196	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
-5	887	74	4082758600	22.797577	93.28246034891224	false	Random	V	MPPGX	2023-12-01	2023-11-29T01:53:58	510.50
+5	768	5	4152322228	41.12891	78.60686390712706	false	Random	J	LXKRA	2023-12-05	2023-11-24T18:13:58	1941.98
+5	823	63	13328808917	77.7682	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
+5	887	74	4082758600	22.79758	93.28246034891224	false	Random	V	MPPGX	2023-12-01	2023-11-29T01:53:58	510.50
 50	126	58	4433111715	75.31828	43.28056186824247	false	Random	H	UTDJF	2023-12-19	2023-12-10T08:24:58	368.42
 51	778	59	13914307584	27.48499	91.47665081887983	true	Random	X	FGFHK	2023-12-01	2023-12-10T03:24:58	402.63
-51	898	32	13510411411	18.679659	21.406761033351007	false	Random	L	FECUW	2023-12-10	2023-12-14T02:00:58	700.43
-52	811	31	14085958816	51.067017	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
+51	898	32	13510411411	18.67966	21.40676103335101	false	Random	L	FECUW	2023-12-10	2023-12-14T02:00:58	700.43
+52	811	31	14085958816	51.06702	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
 53	505	52	9862728376	58.40501	57.60544454281924	false	Random	V	WYCTZ	2023-11-24	2023-12-20T05:13:58	210.43
 53	667	49	10531976747	50.22229	49.64660893042742	false	Random	K	WNRJE	2023-12-04	2023-12-19T14:57:58	680.97
-53	713	14	1464447148	23.474258	45.35056918414047	false	Random	Q	UHMLT	2023-12-10	2023-11-30T02:07:58	286.70
+53	713	14	1464447148	23.47426	45.35056918414047	false	Random	Q	UHMLT	2023-12-10	2023-11-30T02:07:58	286.70
 53	715	29	10917905565	41.83069	93.50885201221966	true	Random	U	TRLSY	2023-12-03	2023-11-26T15:13:58	369.72
-54	467	42	13684826428	38.491455	90.10566649802195	true	Random	M	ERFBG	2023-11-24	2023-12-02T16:23:58	211.00
-54	827	55	7054839267	58.555687	25.891004802115663	false	Random	O	ASMLW	2023-12-13	2023-12-20T16:41:58	1369.32
-54	843	34	9547939940	38.66475	36.370944299232434	true	Random	P	NTVIR	2023-12-12	2023-12-02T06:45:58	1628.37
+54	467	42	13684826428	38.49146	90.10566649802195	true	Random	M	ERFBG	2023-11-24	2023-12-02T16:23:58	211.00
+54	827	55	7054839267	58.55569	25.89100480211566	false	Random	O	ASMLW	2023-12-13	2023-12-20T16:41:58	1369.32
+54	843	34	9547939940	38.66475	36.37094429923243	true	Random	P	NTVIR	2023-12-12	2023-12-02T06:45:58	1628.37
 55	908	24	13623721787	40.06427	90.85281792731746	false	Random	B	KFZGI	2023-11-27	2023-12-23T18:06:58	1124.95
-55	964	8	14038541765	70.24135	20.034551391620194	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
-57	936	26	12164628867	56.541275	56.276679149397076	true	Random	O	IPHPZ	2023-12-13	2023-11-30T22:36:58	603.68
-59	144	31	6208909394	67.417076	40.59765633709834	true	Random	D	FLWNA	2023-12-12	2023-12-19T06:17:58	1870.24
+55	964	8	14038541765	70.24135	20.03455139162019	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
+57	936	26	12164628867	56.54128	56.27667914939708	true	Random	O	IPHPZ	2023-12-13	2023-11-30T22:36:58	603.68
+59	144	31	6208909394	67.41708	40.59765633709834	true	Random	D	FLWNA	2023-12-12	2023-12-19T06:17:58	1870.24
 59	509	50	5501336408	39.94401	73.35770882761237	true	Random	I	PVZNO	2023-12-04	2023-11-27T04:40:58	1177.33
 6	600	30	6000000000	35.25	45.75	false	Sixth	F	Zeta	2023-10-11	2023-10-11T19:15	678.90
-60	711	69	1493870104	22.574188	61.30347648465907	false	Random	E	FHKVR	2023-11-27	2023-12-05T11:26:58	1981.61
-60	875	42	14283877167	48.811504	67.0706975606688	true	Random	P	VJOZH	2023-12-06	2023-12-15T05:20:58	781.71
-61	267	61	11407448558	12.877184	42.144845857251944	true	Random	B	NRWNW	2023-11-30	2023-11-25T09:34:58	859.85
-61	414	63	14506877706	12.540966	58.04557426323987	false	Random	H	NUOAD	2023-12-10	2023-12-06T22:52:58	780.50
-62	451	50	12304139502	51.151623	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
-62	793	46	7308804595	39.766644	48.88672198076526	true	Random	V	TPENZ	2023-11-26	2023-12-23T17:51:58	388.46
+60	711	69	1493870104	22.57419	61.30347648465907	false	Random	E	FHKVR	2023-11-27	2023-12-05T11:26:58	1981.61
+60	875	42	14283877167	48.8115	67.0706975606688	true	Random	P	VJOZH	2023-12-06	2023-12-15T05:20:58	781.71
+61	267	61	11407448558	12.87718	42.14484585725194	true	Random	B	NRWNW	2023-11-30	2023-11-25T09:34:58	859.85
+61	414	63	14506877706	12.54097	58.04557426323987	false	Random	H	NUOAD	2023-12-10	2023-12-06T22:52:58	780.50
+62	451	50	12304139502	51.15162	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
+62	793	46	7308804595	39.76664	48.88672198076526	true	Random	V	TPENZ	2023-11-26	2023-12-23T17:51:58	388.46
 63	112	75	12197306353	85.90137	43.48931389222043	false	Random	C	KKAIT	2023-11-27	2023-12-23T04:23:58	1954.90
-63	383	35	5161212745	39.455276	52.33267523851794	false	Random	X	TMYMC	2023-11-29	2023-12-10T09:09:58	1442.54
-63	410	33	1767102777	72.260124	56.971483381024896	false	Random	B	QXNSM	2023-12-12	2023-12-19T22:57:58	1660.73
-64	479	20	1710421528	53.324104	33.55443503561635	false	Random	Q	ONZRK	2023-12-09	2023-12-01T22:29:58	252.13
+63	383	35	5161212745	39.45528	52.33267523851794	false	Random	X	TMYMC	2023-11-29	2023-12-10T09:09:58	1442.54
+63	410	33	1767102777	72.26012	56.9714833810249	false	Random	B	QXNSM	2023-12-12	2023-12-19T22:57:58	1660.73
+64	479	20	1710421528	53.3241	33.55443503561635	false	Random	Q	ONZRK	2023-12-09	2023-12-01T22:29:58	252.13
 64	678	14	13681447851	74.83621	36.94143092647816	true	Random	J	KELFB	2023-12-01	2023-12-07T18:14:58	308.26
-64	719	36	1224510454	64.237434	86.05689694804887	true	Random	E	ZVQPU	2023-11-30	2023-12-03T04:56:58	1879.25
-64	822	26	1154241961	52.165447	26.779469377773403	true	Random	E	YWNAD	2023-12-08	2023-12-19T19:08:58	731.15
-65	571	24	10523050555	45.865078	70.80680527390149	true	Random	Y	DILBW	2023-12-17	2023-11-25T22:41:58	859.30
-66	306	5	14448160602	44.642223	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
-66	521	30	7757576974	69.440155	92.3562810104632	false	Random	H	SSOCR	2023-12-19	2023-11-30T06:51:58	913.34
-67	484	65	10817432713	62.168163	77.02869166077757	true	Random	K	SAJMG	2023-12-19	2023-12-14T19:47:58	488.01
-68	266	31	8183454755	69.19586	23.139304803938643	false	Random	S	STCBM	2023-11-26	2023-12-22T13:42:58	1722.37
-68	554	33	3525526216	29.078024	29.6567390059356	false	Random	Y	EUGOF	2023-11-23	2023-12-15T10:33:58	395.41
-68	591	60	4813122821	33.210274	54.464145718507616	false	Random	X	EXROI	2023-12-07	2023-12-07T00:39:58	290.11
+64	719	36	1224510454	64.23743	86.05689694804887	true	Random	E	ZVQPU	2023-11-30	2023-12-03T04:56:58	1879.25
+64	822	26	1154241961	52.16545	26.7794693777734	true	Random	E	YWNAD	2023-12-08	2023-12-19T19:08:58	731.15
+65	571	24	10523050555	45.86508	70.80680527390149	true	Random	Y	DILBW	2023-12-17	2023-11-25T22:41:58	859.30
+66	306	5	14448160602	44.64222	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
+66	521	30	7757576974	69.44016	92.35628101046321	false	Random	H	SSOCR	2023-12-19	2023-11-30T06:51:58	913.34
+67	484	65	10817432713	62.16816	77.02869166077757	true	Random	K	SAJMG	2023-12-19	2023-12-14T19:47:58	488.01
+68	266	31	8183454755	69.19586	23.13930480393864	false	Random	S	STCBM	2023-11-26	2023-12-22T13:42:58	1722.37
+68	554	33	3525526216	29.07802	29.6567390059356	false	Random	Y	EUGOF	2023-11-23	2023-12-15T10:33:58	395.41
+68	591	60	4813122821	33.21027	54.46414571850762	false	Random	X	EXROI	2023-12-07	2023-12-07T00:39:58	290.11
 68	756	63	5416393421	66.41538	76.32820339134415	false	Random	Y	CUNAL	2023-12-23	2023-12-14T22:49:58	1109.25
-68	922	13	11664232196	72.683266	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
-68	947	60	7257499958	45.661217	77.42577781358565	false	Random	F	ENQGA	2023-11-24	2023-11-29T07:33:58	319.99
-69	416	14	7702410607	31.638903	89.5793904314531	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
+68	922	13	11664232196	72.68327	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
+68	947	60	7257499958	45.66122	77.42577781358565	false	Random	F	ENQGA	2023-11-24	2023-11-29T07:33:58	319.99
+69	416	14	7702410607	31.6389	89.57939043145311	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
 7	340	50	8934567449	83.79683	35.39446967734915	false	Random	L	CWYFN	2023-12-05	2023-12-23T02:26:58	806.15
-7	700	35	7000000000	40.5	50.0	true	Seventh	G	Eta	2023-10-12	2023-10-12T20:30	789.01
+7	700	35	7000000000	40.5	50	true	Seventh	G	Eta	2023-10-12	2023-10-12T20:30	789.01
 7	969	62	3451343234	57.17074	56.74513811095188	false	Random	G	OWDSC	2023-12-19	2023-12-11T17:17:58	1874.22
-70	231	67	4547989149	35.103123	51.93622592177748	true	Random	V	ZBCVY	2023-11-29	2023-12-22T11:41:58	1749.60
-70	421	23	3153379289	27.412096	79.32006404438445	false	Random	L	VLJWK	2023-12-04	2023-12-12T05:31:58	1163.35
+70	231	67	4547989149	35.10312	51.93622592177748	true	Random	V	ZBCVY	2023-11-29	2023-12-22T11:41:58	1749.60
+70	421	23	3153379289	27.4121	79.32006404438445	false	Random	L	VLJWK	2023-12-04	2023-12-12T05:31:58	1163.35
 70	751	56	7828222634	52.8313	55.7263634552559	true	Random	B	TFHMH	2023-11-30	2023-12-24T12:22:58	1166.13
-71	452	25	4464808420	18.155642	61.988641984596185	false	Random	K	YXFVY	2023-12-15	2023-12-08T04:58:58	514.74
-71	594	26	1024634104	62.92234	37.216752731371386	true	Random	J	SPUWU	2023-12-04	2023-12-23T08:50:58	779.97
-72	377	11	3042707243	55.289066	53.72552524152444	true	Random	Q	BAPHV	2023-12-06	2023-11-30T07:14:58	119.39
-73	866	49	4618070115	46.803646	91.41305051885227	true	Random	H	ROYYF	2023-12-07	2023-12-01T10:28:58	1817.67
-74	670	60	4783926122	23.513939	91.24357097091087	true	Random	Y	YFPMC	2023-12-23	2023-12-22T22:29:58	943.62
-75	368	73	6944888766	31.500992	56.88267149430107	false	Random	H	LEXKZ	2023-12-21	2023-12-14T01:12:58	443.91
+71	452	25	4464808420	18.15564	61.98864198459619	false	Random	K	YXFVY	2023-12-15	2023-12-08T04:58:58	514.74
+71	594	26	1024634104	62.92234	37.21675273137139	true	Random	J	SPUWU	2023-12-04	2023-12-23T08:50:58	779.97
+72	377	11	3042707243	55.28907	53.72552524152444	true	Random	Q	BAPHV	2023-12-06	2023-11-30T07:14:58	119.39
+73	866	49	4618070115	46.80365	91.41305051885227	true	Random	H	ROYYF	2023-12-07	2023-12-01T10:28:58	1817.67
+74	670	60	4783926122	23.51394	91.24357097091087	true	Random	Y	YFPMC	2023-12-23	2023-12-22T22:29:58	943.62
+75	368	73	6944888766	31.50099	56.88267149430107	false	Random	H	LEXKZ	2023-12-21	2023-12-14T01:12:58	443.91
 76	410	20	10425110604	66.26356	92.68329033006493	false	Random	L	JHFYD	2023-11-23	2023-11-29T10:34:58	867.56
-76	504	70	14161652666	58.071503	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
+76	504	70	14161652666	58.0715	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
 77	131	19	2964167114	33.23181	53.35246738882714	false	Random	G	AHGFO	2023-12-19	2023-12-01T10:11:58	1837.90
-77	165	36	12887722637	19.729382	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
-79	314	17	6823498005	22.562634	72.70049796639023	true	Random	K	FPSNZ	2023-12-07	2023-12-15T11:52:58	211.50
+77	165	36	12887722637	19.72938	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
+79	314	17	6823498005	22.56263	72.70049796639023	true	Random	K	FPSNZ	2023-12-07	2023-12-15T11:52:58	211.50
 8	550	48	13655992126	52.90345	51.35114230137935	false	Random	X	JTVSE	2023-12-13	2023-12-15T03:49:58	361.55
 8	800	40	8000000000	45.75	55.25	false	Eighth	H	Theta	2023-10-13	2023-10-13T21:45	890.12
 8	866	37	13672147880	81.28999	67.66548594336737	false	Random	H	QDJIM	2023-12-14	2023-12-17T18:44:58	1112.05
-80	267	57	8797946135	35.604717	80.51381110359165	false	Random	K	KQTEX	2023-12-09	2023-12-13T06:19:58	1769.15
-80	815	19	14529289205	19.769405	37.37008094684765	true	Random	Z	WLALH	2023-12-11	2023-12-14T03:24:58	479.38
+80	267	57	8797946135	35.60472	80.51381110359165	false	Random	K	KQTEX	2023-12-09	2023-12-13T06:19:58	1769.15
+80	815	19	14529289205	19.76941	37.37008094684765	true	Random	Z	WLALH	2023-12-11	2023-12-14T03:24:58	479.38
 81	726	66	9327218218	81.50363	39.9702863173827	true	Random	X	WODRP	2023-11-28	2023-12-23T13:25:58	561.98
 82	107	51	1358006007	78.36581	46.09413324325159	true	Random	C	IPNQU	2023-12-01	2023-12-14T05:41:58	417.17
 82	133	60	4616538638	88.8813	30.82745983013354	true	Random	W	KPIJE	2023-12-20	2023-12-01T07:57:58	583.41
-82	531	44	10642962933	26.818586	23.851865471979615	false	Random	F	NMQOD	2023-12-13	2023-12-18T19:34:58	861.78
+82	531	44	10642962933	26.81859	23.85186547197961	false	Random	F	NMQOD	2023-12-13	2023-12-18T19:34:58	861.78
 82	603	60	9083469993	81.24088	44.46228092092543	true	Random	Y	WTQGU	2023-11-30	2023-11-28T13:18:58	1448.45
 82	982	62	8955063933	81.2855	78.30439669511465	true	Random	J	SOCOT	2023-12-02	2023-12-02T21:17:58	814.60
-83	700	46	4569093424	50.063602	47.75811273142146	false	Random	R	TEGAY	2023-12-19	2023-12-07T06:46:58	760.22
-84	427	60	9035762847	81.971306	28.37315065501099	true	Random	L	FETYF	2023-12-01	2023-11-24T15:00:58	1267.12
+83	700	46	4569093424	50.0636	47.75811273142146	false	Random	R	TEGAY	2023-12-19	2023-12-07T06:46:58	760.22
+84	427	60	9035762847	81.97131	28.37315065501099	true	Random	L	FETYF	2023-12-01	2023-11-24T15:00:58	1267.12
 85	375	63	6797318130	85.47522	58.16330728665678	true	Random	E	UNZLS	2023-12-01	2023-12-04T05:17:58	1949.48
-85	845	42	2373712244	74.551315	79.15491248184088	false	Random	B	QJRKO	2023-11-29	2023-12-04T09:20:58	317.17
-85	873	18	7233488476	33.83051	31.655950581225508	false	Random	N	RJTIB	2023-11-23	2023-12-11T15:07:58	1249.52
-86	398	27	13222936963	20.387327	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
+85	845	42	2373712244	74.55132	79.15491248184088	false	Random	B	QJRKO	2023-11-29	2023-12-04T09:20:58	317.17
+85	873	18	7233488476	33.83051	31.65595058122551	false	Random	N	RJTIB	2023-11-23	2023-12-11T15:07:58	1249.52
+86	398	27	13222936963	20.38733	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
 86	662	53	8875065706	28.64778	30.6775849729486	false	Random	N	YNQAY	2023-12-15	2023-11-24T21:56:58	1108.35
-86	728	18	13390353484	61.060482	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
-86	998	74	11080891106	82.568756	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
+86	728	18	13390353484	61.06048	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
+86	998	74	11080891106	82.56876	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
 87	145	64	9022533179	37.80205	63.26081178595084	true	Random	T	PEOPK	2023-12-08	2023-12-07T17:41:58	1167.05
-87	641	64	4786767059	14.765089	70.8793353664754	false	Random	W	SQHGN	2023-12-12	2023-12-24T01:19:58	1316.61
-88	274	41	14108849690	73.74919	42.625751442467404	true	Random	X	BVRFA	2023-12-01	2023-11-25T14:32:58	515.18
-88	728	59	8439434199	30.372904	59.410283344764366	false	Random	F	JODWY	2023-12-04	2023-12-01T07:57:58	1753.88
+87	641	64	4786767059	14.76509	70.8793353664754	false	Random	W	SQHGN	2023-12-12	2023-12-24T01:19:58	1316.61
+88	274	41	14108849690	73.74919	42.6257514424674	true	Random	X	BVRFA	2023-12-01	2023-11-25T14:32:58	515.18
+88	728	59	8439434199	30.3729	59.41028334476437	false	Random	F	JODWY	2023-12-04	2023-12-01T07:57:58	1753.88
 88	765	69	9753682777	83.42646	25.99260711248508	true	Random	M	MEJAX	2023-11-25	2023-12-20T09:21:58	1647.22
-89	129	64	6400162051	67.910965	80.48074661432221	true	Random	Y	ZXJWQ	2023-12-16	2023-12-19T10:23:58	1882.65
-89	377	22	14340881803	32.61157	82.5503801214006	false	Random	K	ACYZU	2023-12-01	2023-11-27T02:05:58	672.13
-89	964	41	12706120446	69.484116	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
-9	113	7	6162580854	11.346889	46.82839094332704	false	Random	A	SJTAF	2023-12-14	2023-11-23T18:27:58	1610.49
+89	129	64	6400162051	67.91096	80.48074661432221	true	Random	Y	ZXJWQ	2023-12-16	2023-12-19T10:23:58	1882.65
+89	377	22	14340881803	32.61157	82.55038012140059	false	Random	K	ACYZU	2023-12-01	2023-11-27T02:05:58	672.13
+89	964	41	12706120446	69.48412	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
+9	113	7	6162580854	11.34689	46.82839094332704	false	Random	A	SJTAF	2023-12-14	2023-11-23T18:27:58	1610.49
 9	268	59	8149280252	86.66627	70.91298799618343	false	Random	E	PVKYK	2023-12-21	2023-11-25T00:28:58	263.17
 9	900	45	9000000000	50.0	60.5	true	Ninth	I	Iota	2023-10-14	2023-10-14T22:15	901.23
 9	907	24	6113036809	66.06377	50.26485838775805	true	Random	X	XLPOL	2023-11-23	2023-12-02T09:03:58	256.61
 90	391	26	12874761259	21.49042	53.46850617467312	true	Random	Q	QTJPE	2023-12-17	2023-12-03T17:40:58	748.05
-91	389	11	14784237986	11.174142	27.692284427565397	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
+91	389	11	14784237986	11.17414	27.6922844275654	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
 91	528	68	14588592231	77.4651	88.92064181463138	false	Random	U	JXZUA	2023-12-16	2023-12-21T02:28:58	1834.07
-92	344	29	5182139341	31.653255	44.26814517218887	true	Random	F	NGHOS	2023-12-06	2023-12-09T21:25:58	1291.06
-93	887	20	13555948969	70.57364	32.621532934876804	false	Random	D	SPMEK	2023-11-26	2023-12-20T18:11:58	258.86
-94	216	49	8773264156	81.617195	43.03983700523827	true	Random	D	VHWYT	2023-12-13	2023-11-30T07:03:58	1178.27
-94	693	60	4818659234	26.04229	83.2975107272106	true	Random	B	ENSQO	2023-12-22	2023-12-12T06:08:58	1283.81
-95	560	62	1389447643	19.202044	85.46518830161321	true	Random	S	LQRRB	2023-12-16	2023-12-12T06:12:58	445.65
-96	595	72	11506136303	21.917727	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
-96	637	39	5516035994	55.90832	60.522041012562816	true	Random	O	YPETL	2023-12-02	2023-11-28T02:47:58	1175.16
-97	415	74	10346322649	21.667427	46.58901867647463	false	Random	R	KWFOF	2023-12-21	2023-11-27T12:18:58	1157.72
+92	344	29	5182139341	31.65326	44.26814517218887	true	Random	F	NGHOS	2023-12-06	2023-12-09T21:25:58	1291.06
+93	887	20	13555948969	70.57364	32.6215329348768	false	Random	D	SPMEK	2023-11-26	2023-12-20T18:11:58	258.86
+94	216	49	8773264156	81.6172	43.03983700523827	true	Random	D	VHWYT	2023-12-13	2023-11-30T07:03:58	1178.27
+94	693	60	4818659234	26.04229	83.29751072721059	true	Random	B	ENSQO	2023-12-22	2023-12-12T06:08:58	1283.81
+95	560	62	1389447643	19.20204	85.46518830161321	true	Random	S	LQRRB	2023-12-16	2023-12-12T06:12:58	445.65
+96	595	72	11506136303	21.91773	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
+96	637	39	5516035994	55.90832	60.52204101256282	true	Random	O	YPETL	2023-12-02	2023-11-28T02:47:58	1175.16
+97	415	74	10346322649	21.66743	46.58901867647463	false	Random	R	KWFOF	2023-12-21	2023-11-27T12:18:58	1157.72
 97	839	60	14818779777	46.17389	68.98285340004992	false	Random	W	HMFPU	2023-12-01	2023-12-04T08:41:58	1683.48
-98	228	65	4782017237	55.10206	31.414570993700565	true	Random	P	EOIFT	2023-12-07	2023-12-15T08:12:58	137.49
-99	632	39	8911195323	74.581276	78.2764804276292	false	Random	Q	WTQCL	2023-12-02	2023-12-05T09:18:58	200.21
+98	228	65	4782017237	55.10206	31.41457099370056	true	Random	P	EOIFT	2023-12-07	2023-12-15T08:12:58	137.49
+99	632	39	8911195323	74.58128	78.2764804276292	false	Random	Q	WTQCL	2023-12-02	2023-12-05T09:18:58	200.21
 
 -- !q44 --
-17	289	49	13560709243	39.952793	38.245306832599425	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
-17	499	46	11230409207	51.632103	28.811164197154774	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
+17	289	49	13560709243	39.95279	38.24530683259943	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
+17	499	46	11230409207	51.6321	28.81116419715477	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
 17	646	62	11234805830	76.40492	67.46425239009778	true	Random	N	REHZC	2023-12-09	2023-11-28T02:06:58	365.15
 17	698	55	1807368797	20.17171	43.84496606184709	true	Random	P	SHSJV	2023-12-01	2023-11-25T11:56:58	810.95
 17	794	14	8377523030	28.07663	52.3837762020057	false	Random	E	WPMIN	2023-12-03	2023-11-26T04:59:58	239.42
 17	913	32	4647929554	78.91502	70.54487265463735	true	Random	S	WFPNS	2023-11-27	2023-11-26T03:29:58	321.45
 
 -- !q45 --
-11	1100	55	11000000000	60.5	70.0	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
-11	487	27	14556302216	85.33334	62.596750833474495	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
+11	1100	55	11000000000	60.5	70	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
+11	487	27	14556302216	85.33334	62.59675083347449	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
 12	1200	60	12000000000	65.75	75.25	false	Twelfth	L	Mu	2023-10-17	2023-10-17T02:15	1234.56
-12	751	8	12205294947	23.468674	64.35048302450815	true	Random	K	FCSBV	2023-12-03	2023-12-17T01:10:58	325.26
+12	751	8	12205294947	23.46867	64.35048302450815	true	Random	K	FCSBV	2023-12-03	2023-12-17T01:10:58	325.26
 13	1300	65	13000000000	70.0	80.5	true	Thirteenth	M	Nu	2023-10-18	2023-10-18T03:30	1345.67
-13	335	39	13869202091	30.426075	39.02304533093442	true	Random	L	AULCC	2023-12-08	2023-12-13T00:26:58	387.97
+13	335	39	13869202091	30.42607	39.02304533093442	true	Random	L	AULCC	2023-12-08	2023-12-13T00:26:58	387.97
 13	402	30	10851194313	74.82481	74.90108005771035	false	Random	F	GEMMK	2023-11-27	2023-12-21T15:03:58	1643.55
-13	745	48	13047949175	51.168613	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
+13	745	48	13047949175	51.16861	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
 14	1400	70	14000000000	75.25	85.75	false	Fourteenth	N	Xi	2023-10-19	2023-10-19T04:45	1456.78
-14	968	16	11314514196	62.509666	33.1841427251225	false	Random	T	WDEVJ	2023-11-24	2023-12-06T17:54:58	431.61
-15	1500	75	15000000000	80.5	90.0	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
+14	968	16	11314514196	62.50967	33.1841427251225	false	Random	T	WDEVJ	2023-11-24	2023-12-06T17:54:58	431.61
+15	1500	75	15000000000	80.5	90	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
 16	615	20	12294128025	77.37379	20.42772029677839	true	Random	U	JHPOB	2023-11-30	2023-12-16T14:29:58	1105.33
-17	289	49	13560709243	39.952793	38.245306832599425	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
-17	499	46	11230409207	51.632103	28.811164197154774	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
+17	289	49	13560709243	39.95279	38.24530683259943	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
+17	499	46	11230409207	51.6321	28.81116419715477	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
 17	646	62	11234805830	76.40492	67.46425239009778	true	Random	N	REHZC	2023-12-09	2023-11-28T02:06:58	365.15
-18	835	17	14265814864	18.923101	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
-22	200	41	12163439252	64.621254	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
-28	655	21	14580233860	12.503378	48.60220286874443	false	Random	P	DUBQQ	2023-12-12	2023-12-03T20:11:58	922.42
-30	830	65	12624057029	38.791172	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
-31	990	5	13678786851	15.762894	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
+18	835	17	14265814864	18.9231	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
+22	200	41	12163439252	64.62125	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
+28	655	21	14580233860	12.50338	48.60220286874443	false	Random	P	DUBQQ	2023-12-12	2023-12-03T20:11:58	922.42
+30	830	65	12624057029	38.79117	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
+31	990	5	13678786851	15.76289	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
 33	198	20	13225406950	67.7327	58.63863378877107	true	Random	I	ZKXRA	2023-12-07	2023-11-29T02:33:58	566.76
 33	321	39	12537851805	38.26871	32.6626492245712	true	Random	S	OICCE	2023-12-02	2023-12-19T16:41:58	306.92
 34	145	44	14060350663	73.02436	68.40544929600975	true	Random	S	UUJFP	2023-11-23	2023-12-12T06:08:58	739.45
 38	606	57	14585148556	82.67463	79.18300302689997	false	Random	E	RSFUZ	2023-12-16	2023-11-27T18:55:58	970.25
 4	122	24	10738473173	81.15482	60.21481394154484	false	Random	Y	PQJRK	2023-12-20	2023-12-09T02:38:58	1467.35
-40	230	34	10824964541	16.929768	53.812277279703366	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
+40	230	34	10824964541	16.92977	53.81227727970337	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
 40	693	69	13276482882	44.35974	82.57845708670757	true	Random	B	RCCSU	2023-11-29	2023-12-01T20:11:58	183.64
-41	344	34	14536795918	56.660946	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
+41	344	34	14536795918	56.66095	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
 41	708	64	11745827370	72.84812	35.31028363777645	true	Random	O	WGSQC	2023-12-02	2023-11-25T17:07:58	1666.71
-42	192	28	14454791024	35.465202	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
+42	192	28	14454791024	35.4652	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
 42	355	72	11536856285	74.42886	53.49032479461299	false	Random	I	IQZEI	2023-12-10	2023-12-06T07:17:58	1098.14
 43	828	24	12011396947	45.07647	54.2136449479346	true	Random	E	HIDUO	2023-12-02	2023-12-19T01:14:58	233.10
-45	455	25	12639246000	47.011307	26.310712594958694	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
+45	455	25	12639246000	47.01131	26.31071259495869	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
 47	838	73	14910230294	83.69784	82.28901816600579	true	Random	L	SHXYL	2023-11-24	2023-12-05T22:19:58	1062.15
 48	898	59	12871187130	10.13838	70.19705104611333	true	Random	J	WFXNN	2023-12-23	2023-12-17T02:53:58	1050.21
-5	823	63	13328808917	77.768196	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
+5	823	63	13328808917	77.7682	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
 51	778	59	13914307584	27.48499	91.47665081887983	true	Random	X	FGFHK	2023-12-01	2023-12-10T03:24:58	402.63
-51	898	32	13510411411	18.679659	21.406761033351007	false	Random	L	FECUW	2023-12-10	2023-12-14T02:00:58	700.43
-52	811	31	14085958816	51.067017	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
+51	898	32	13510411411	18.67966	21.40676103335101	false	Random	L	FECUW	2023-12-10	2023-12-14T02:00:58	700.43
+52	811	31	14085958816	51.06702	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
 53	715	29	10917905565	41.83069	93.50885201221966	true	Random	U	TRLSY	2023-12-03	2023-11-26T15:13:58	369.72
-54	467	42	13684826428	38.491455	90.10566649802195	true	Random	M	ERFBG	2023-11-24	2023-12-02T16:23:58	211.00
+54	467	42	13684826428	38.49146	90.10566649802195	true	Random	M	ERFBG	2023-11-24	2023-12-02T16:23:58	211.00
 55	908	24	13623721787	40.06427	90.85281792731746	false	Random	B	KFZGI	2023-11-27	2023-12-23T18:06:58	1124.95
-55	964	8	14038541765	70.24135	20.034551391620194	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
-57	936	26	12164628867	56.541275	56.276679149397076	true	Random	O	IPHPZ	2023-12-13	2023-11-30T22:36:58	603.68
-60	875	42	14283877167	48.811504	67.0706975606688	true	Random	P	VJOZH	2023-12-06	2023-12-15T05:20:58	781.71
-61	267	61	11407448558	12.877184	42.144845857251944	true	Random	B	NRWNW	2023-11-30	2023-11-25T09:34:58	859.85
-61	414	63	14506877706	12.540966	58.04557426323987	false	Random	H	NUOAD	2023-12-10	2023-12-06T22:52:58	780.50
-62	451	50	12304139502	51.151623	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
+55	964	8	14038541765	70.24135	20.03455139162019	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
+57	936	26	12164628867	56.54128	56.27667914939708	true	Random	O	IPHPZ	2023-12-13	2023-11-30T22:36:58	603.68
+60	875	42	14283877167	48.8115	67.0706975606688	true	Random	P	VJOZH	2023-12-06	2023-12-15T05:20:58	781.71
+61	267	61	11407448558	12.87718	42.14484585725194	true	Random	B	NRWNW	2023-11-30	2023-11-25T09:34:58	859.85
+61	414	63	14506877706	12.54097	58.04557426323987	false	Random	H	NUOAD	2023-12-10	2023-12-06T22:52:58	780.50
+62	451	50	12304139502	51.15162	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
 63	112	75	12197306353	85.90137	43.48931389222043	false	Random	C	KKAIT	2023-11-27	2023-12-23T04:23:58	1954.90
 64	678	14	13681447851	74.83621	36.94143092647816	true	Random	J	KELFB	2023-12-01	2023-12-07T18:14:58	308.26
-66	306	5	14448160602	44.642223	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
-67	484	65	10817432713	62.168163	77.02869166077757	true	Random	K	SAJMG	2023-12-19	2023-12-14T19:47:58	488.01
-68	922	13	11664232196	72.683266	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
-76	504	70	14161652666	58.071503	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
-77	165	36	12887722637	19.729382	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
+66	306	5	14448160602	44.64222	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
+67	484	65	10817432713	62.16816	77.02869166077757	true	Random	K	SAJMG	2023-12-19	2023-12-14T19:47:58	488.01
+68	922	13	11664232196	72.68327	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
+76	504	70	14161652666	58.0715	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
+77	165	36	12887722637	19.72938	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
 8	550	48	13655992126	52.90345	51.35114230137935	false	Random	X	JTVSE	2023-12-13	2023-12-15T03:49:58	361.55
 8	866	37	13672147880	81.28999	67.66548594336737	false	Random	H	QDJIM	2023-12-14	2023-12-17T18:44:58	1112.05
-80	815	19	14529289205	19.769405	37.37008094684765	true	Random	Z	WLALH	2023-12-11	2023-12-14T03:24:58	479.38
-86	398	27	13222936963	20.387327	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
-86	728	18	13390353484	61.060482	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
-86	998	74	11080891106	82.568756	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
-88	274	41	14108849690	73.74919	42.625751442467404	true	Random	X	BVRFA	2023-12-01	2023-11-25T14:32:58	515.18
-89	377	22	14340881803	32.61157	82.5503801214006	false	Random	K	ACYZU	2023-12-01	2023-11-27T02:05:58	672.13
-89	964	41	12706120446	69.484116	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
+80	815	19	14529289205	19.76941	37.37008094684765	true	Random	Z	WLALH	2023-12-11	2023-12-14T03:24:58	479.38
+86	398	27	13222936963	20.38733	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
+86	728	18	13390353484	61.06048	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
+86	998	74	11080891106	82.56876	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
+88	274	41	14108849690	73.74919	42.6257514424674	true	Random	X	BVRFA	2023-12-01	2023-11-25T14:32:58	515.18
+89	377	22	14340881803	32.61157	82.55038012140059	false	Random	K	ACYZU	2023-12-01	2023-11-27T02:05:58	672.13
+89	964	41	12706120446	69.48412	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
 90	391	26	12874761259	21.49042	53.46850617467312	true	Random	Q	QTJPE	2023-12-17	2023-12-03T17:40:58	748.05
-91	389	11	14784237986	11.174142	27.692284427565397	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
+91	389	11	14784237986	11.17414	27.6922844275654	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
 91	528	68	14588592231	77.4651	88.92064181463138	false	Random	U	JXZUA	2023-12-16	2023-12-21T02:28:58	1834.07
-93	887	20	13555948969	70.57364	32.621532934876804	false	Random	D	SPMEK	2023-11-26	2023-12-20T18:11:58	258.86
-96	595	72	11506136303	21.917727	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
+93	887	20	13555948969	70.57364	32.6215329348768	false	Random	D	SPMEK	2023-11-26	2023-12-20T18:11:58	258.86
+96	595	72	11506136303	21.91773	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
 97	839	60	14818779777	46.17389	68.98285340004992	false	Random	W	HMFPU	2023-12-01	2023-12-04T08:41:58	1683.48
 
 -- !q46 --
-1	578	55	2111222273	56.858597	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
-29	910	52	5544039917	22.179396	46.32732226806482	true	Random	C	TIZAG	2023-11-28	2023-12-14T16:08:58	900.96
-3	300	15	3000000000	20.25	30.0	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
-43	178	64	6969956763	40.980415	52.998828731408516	true	Random	C	XQHYB	2023-12-11	2023-12-07T23:00:58	257.08
-69	416	14	7702410607	31.638903	89.5793904314531	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
+1	578	55	2111222273	56.8586	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
+29	910	52	5544039917	22.1794	46.32732226806482	true	Random	C	TIZAG	2023-11-28	2023-12-14T16:08:58	900.96
+3	300	15	3000000000	20.25	30	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
+43	178	64	6969956763	40.98042	52.99882873140852	true	Random	C	XQHYB	2023-12-11	2023-12-07T23:00:58	257.08
+69	416	14	7702410607	31.6389	89.57939043145311	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
 82	107	51	1358006007	78.36581	46.09413324325159	true	Random	C	IPNQU	2023-12-01	2023-12-14T05:41:58	417.17
 
 -- !q47 --
-1	578	55	2111222273	56.858597	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
-1	979	44	10163954251	28.827957	57.56879940298416	true	Random	Q	DNRGE	2023-12-09	2023-12-10T20:21:58	1581.25
+1	578	55	2111222273	56.8586	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
+1	979	44	10163954251	28.82796	57.56879940298416	true	Random	Q	DNRGE	2023-12-09	2023-12-10T20:21:58	1581.25
 10	1000	50	10000000000	55.25	65.75	false	Tenth	J	Kappa	2023-10-15	2023-10-15T23:30	1012.34
-10	966	38	2203748112	45.555325	27.908447208440094	true	Random	W	LFAGO	2023-12-14	2023-11-26T20:00:58	1898.68
-11	1100	55	11000000000	60.5	70.0	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
-11	441	19	7370044350	74.261696	62.013817404758086	true	Random	D	UYKZA	2023-12-23	2023-12-15T11:49:58	1805.14
-11	487	27	14556302216	85.33334	62.596750833474495	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
+10	966	38	2203748112	45.55532	27.90844720844009	true	Random	W	LFAGO	2023-12-14	2023-11-26T20:00:58	1898.68
+11	1100	55	11000000000	60.5	70	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
+11	441	19	7370044350	74.2617	62.01381740475809	true	Random	D	UYKZA	2023-12-23	2023-12-15T11:49:58	1805.14
+11	487	27	14556302216	85.33334	62.59675083347449	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
 12	1200	60	12000000000	65.75	75.25	false	Twelfth	L	Mu	2023-10-17	2023-10-17T02:15	1234.56
 13	1300	65	13000000000	70.0	80.5	true	Thirteenth	M	Nu	2023-10-18	2023-10-18T03:30	1345.67
 13	402	30	10851194313	74.82481	74.90108005771035	false	Random	F	GEMMK	2023-11-27	2023-12-21T15:03:58	1643.55
-13	696	74	3370487489	84.544014	88.69976219408227	true	Random	H	RTFJI	2023-11-23	2023-11-25T07:32:58	1761.50
-13	745	48	13047949175	51.168613	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
-13	859	65	7433576046	56.136265	34.87823331022725	false	Random	L	CRFUF	2023-12-23	2023-12-12T15:05:58	1037.15
+13	696	74	3370487489	84.54401	88.69976219408227	true	Random	H	RTFJI	2023-11-23	2023-11-25T07:32:58	1761.50
+13	745	48	13047949175	51.16861	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
+13	859	65	7433576046	56.13626	34.87823331022725	false	Random	L	CRFUF	2023-12-23	2023-12-12T15:05:58	1037.15
 14	1400	70	14000000000	75.25	85.75	false	Fourteenth	N	Xi	2023-10-19	2023-10-19T04:45	1456.78
-14	195	17	2370700139	16.777058	64.81793301410002	false	Random	P	IIGRE	2023-12-12	2023-12-14T22:40:58	1678.44
-14	966	65	7828602539	62.430664	68.85873133439297	true	Random	I	VVOQH	2023-12-01	2023-12-06T00:54:58	1300.43
-15	1500	75	15000000000	80.5	90.0	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
-16	135	22	7901304568	43.944805	85.16901944253635	true	Random	K	NUQEP	2023-11-29	2023-11-25T23:42:58	1440.74
+14	195	17	2370700139	16.77706	64.81793301410002	false	Random	P	IIGRE	2023-12-12	2023-12-14T22:40:58	1678.44
+14	966	65	7828602539	62.43066	68.85873133439297	true	Random	I	VVOQH	2023-12-01	2023-12-06T00:54:58	1300.43
+15	1500	75	15000000000	80.5	90	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
+16	135	22	7901304568	43.94481	85.16901944253635	true	Random	K	NUQEP	2023-11-29	2023-11-25T23:42:58	1440.74
 16	615	20	12294128025	77.37379	20.42772029677839	true	Random	U	JHPOB	2023-11-30	2023-12-16T14:29:58	1105.33
-17	499	46	11230409207	51.632103	28.811164197154774	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
-18	690	17	1399456103	63.261967	42.964715823771236	true	Random	R	BWSRS	2023-12-13	2023-12-23T08:33:58	1840.02
-18	835	17	14265814864	18.923101	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
-19	917	66	2340946367	89.035675	22.649362455875274	false	Random	D	HWHMU	2023-11-30	2023-12-10T02:36:58	1960.07
-19	993	13	7039833438	79.769066	69.79049291517285	true	Random	X	OFSUV	2023-12-11	2023-12-08T01:46:58	1958.95
+17	499	46	11230409207	51.6321	28.81116419715477	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
+18	690	17	1399456103	63.26197	42.96471582377124	true	Random	R	BWSRS	2023-12-13	2023-12-23T08:33:58	1840.02
+18	835	17	14265814864	18.9231	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
+19	917	66	2340946367	89.03568	22.64936245587527	false	Random	D	HWHMU	2023-11-30	2023-12-10T02:36:58	1960.07
+19	993	13	7039833438	79.76907	69.79049291517285	true	Random	X	OFSUV	2023-12-11	2023-12-08T01:46:58	1958.95
 2	921	62	8557914543	78.52379	58.6849882881372	false	Random	D	KBXXS	2023-12-07	2023-12-02T22:24:58	1782.88
-2	925	46	6013180177	41.107002	34.86561026061906	true	Random	L	XLLXY	2023-12-06	2023-12-09T14:04:58	1246.26
-20	248	64	7704906572	35.089928	76.69128821479936	true	Random	T	KQOMS	2023-11-30	2023-12-11T01:35:58	1799.26
-22	200	41	12163439252	64.621254	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
-23	192	8	5102667616	54.111057	40.85713971600841	false	Random	J	EBXEB	2023-12-13	2023-12-10T11:32:58	1824.12
+2	925	46	6013180177	41.107	34.86561026061906	true	Random	L	XLLXY	2023-12-06	2023-12-09T14:04:58	1246.26
+20	248	64	7704906572	35.08993	76.69128821479936	true	Random	T	KQOMS	2023-11-30	2023-12-11T01:35:58	1799.26
+22	200	41	12163439252	64.62125	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
+23	192	8	5102667616	54.11106	40.85713971600841	false	Random	J	EBXEB	2023-12-13	2023-12-10T11:32:58	1824.12
 27	866	24	5531365994	72.77447	86.96690821165853	false	Random	S	TZPFJ	2023-11-28	2023-12-13T15:31:58	1274.75
-29	157	34	2302882987	51.924015	20.311140937696468	true	Random	R	MBOXJ	2023-12-02	2023-12-03T14:12:58	1620.80
-29	923	57	1591814253	68.57371	33.342802789892986	true	Random	Q	ZONGC	2023-12-20	2023-12-13T09:11:58	1465.38
-3	259	74	7422478791	22.291426	75.38227773520089	true	Random	S	VWAXJ	2023-12-01	2023-12-05T21:23:58	1970.57
-30	292	71	10308444223	63.039078	76.40649540444898	false	Random	G	DRLHY	2023-12-19	2023-12-14T15:32:58	1165.14
-30	830	65	12624057029	38.791172	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
+29	157	34	2302882987	51.92402	20.31114093769647	true	Random	R	MBOXJ	2023-12-02	2023-12-03T14:12:58	1620.80
+29	923	57	1591814253	68.57371	33.34280278989299	true	Random	Q	ZONGC	2023-12-20	2023-12-13T09:11:58	1465.38
+3	259	74	7422478791	22.29143	75.38227773520089	true	Random	S	VWAXJ	2023-12-01	2023-12-05T21:23:58	1970.57
+30	292	71	10308444223	63.03908	76.40649540444898	false	Random	G	DRLHY	2023-12-19	2023-12-14T15:32:58	1165.14
+30	830	65	12624057029	38.79117	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
 31	395	22	6141426904	88.37914	52.0655270963123	false	Random	J	DRPJV	2023-12-07	2023-11-29T03:15:58	1076.41
-31	990	5	13678786851	15.762894	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
-39	726	50	3865644066	26.225628	28.534393094364418	false	Random	F	NIUCS	2023-12-05	2023-12-04T19:31:58	1953.82
+31	990	5	13678786851	15.76289	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
+39	726	50	3865644066	26.22563	28.53439309436442	false	Random	F	NIUCS	2023-12-05	2023-12-04T19:31:58	1953.82
 4	122	24	10738473173	81.15482	60.21481394154484	false	Random	Y	PQJRK	2023-12-20	2023-12-09T02:38:58	1467.35
-4	569	72	10560903405	50.255936	47.535145739285184	false	Random	O	NRIRC	2023-12-05	2023-12-01T09:10:58	1986.99
-40	230	34	10824964541	16.929768	53.812277279703366	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
-40	914	7	4902128502	19.442041	33.099787387344406	true	Random	Q	KOCWA	2023-11-28	2023-12-21T09:20:58	1824.80
-41	344	34	14536795918	56.660946	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
-41	697	21	1200243566	12.466168	68.57243624557165	true	Random	U	JZGEG	2023-12-03	2023-12-10T04:51:58	1323.88
+4	569	72	10560903405	50.25594	47.53514573928518	false	Random	O	NRIRC	2023-12-05	2023-12-01T09:10:58	1986.99
+40	230	34	10824964541	16.92977	53.81227727970337	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
+40	914	7	4902128502	19.44204	33.09978738734441	true	Random	Q	KOCWA	2023-11-28	2023-12-21T09:20:58	1824.80
+41	344	34	14536795918	56.66095	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
+41	697	21	1200243566	12.46617	68.57243624557165	true	Random	U	JZGEG	2023-12-03	2023-12-10T04:51:58	1323.88
 41	708	64	11745827370	72.84812	35.31028363777645	true	Random	O	WGSQC	2023-12-02	2023-11-25T17:07:58	1666.71
 42	178	38	7559404453	69.69449	64.37154501388798	true	Random	G	QUMUN	2023-12-14	2023-12-17T01:37:58	1190.44
-42	192	28	14454791024	35.465202	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
+42	192	28	14454791024	35.4652	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
 42	355	72	11536856285	74.42886	53.49032479461299	false	Random	I	IQZEI	2023-12-10	2023-12-06T07:17:58	1098.14
 44	219	38	8596488294	73.52956	94.10797854680568	true	Random	E	HMWBI	2023-12-15	2023-12-06T00:51:58	1907.47
-44	694	55	3626514138	62.504086	72.89799265418553	true	Random	Z	JTDVF	2023-12-01	2023-11-29T12:08:58	1769.92
-45	455	25	12639246000	47.011307	26.310712594958694	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
-45	492	43	3870916386	51.069588	42.652270406300794	true	Random	H	JVZTB	2023-12-04	2023-12-09T21:06:58	1517.83
-47	508	48	1456473942	48.488297	20.377955902326608	false	Random	B	CAOEY	2023-11-29	2023-12-10T14:49:58	1865.52
-47	566	50	1426586688	51.278687	40.47151456873397	true	Random	F	YBOSH	2023-11-26	2023-12-15T03:44:58	1806.35
+44	694	55	3626514138	62.50409	72.89799265418553	true	Random	Z	JTDVF	2023-12-01	2023-11-29T12:08:58	1769.92
+45	455	25	12639246000	47.01131	26.31071259495869	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
+45	492	43	3870916386	51.06959	42.65227040630079	true	Random	H	JVZTB	2023-12-04	2023-12-09T21:06:58	1517.83
+47	508	48	1456473942	48.4883	20.37795590232661	false	Random	B	CAOEY	2023-11-29	2023-12-10T14:49:58	1865.52
+47	566	50	1426586688	51.27869	40.47151456873397	true	Random	F	YBOSH	2023-11-26	2023-12-15T03:44:58	1806.35
 47	838	73	14910230294	83.69784	82.28901816600579	true	Random	L	SHXYL	2023-11-24	2023-12-05T22:19:58	1062.15
 48	898	59	12871187130	10.13838	70.19705104611333	true	Random	J	WFXNN	2023-12-23	2023-12-17T02:53:58	1050.21
-49	412	16	8300982793	56.263252	66.07893608061771	false	Random	K	DWWJI	2023-12-08	2023-12-17T11:32:58	1718.54
-49	568	70	2916596630	79.16303	56.114316916863025	false	Random	T	ILLIU	2023-11-23	2023-12-07T11:05:58	1039.03
-5	768	5	4152322228	41.128906	78.60686390712706	false	Random	J	LXKRA	2023-12-05	2023-11-24T18:13:58	1941.98
-5	823	63	13328808917	77.768196	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
-52	811	31	14085958816	51.067017	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
-54	827	55	7054839267	58.555687	25.891004802115663	false	Random	O	ASMLW	2023-12-13	2023-12-20T16:41:58	1369.32
-54	843	34	9547939940	38.66475	36.370944299232434	true	Random	P	NTVIR	2023-12-12	2023-12-02T06:45:58	1628.37
+49	412	16	8300982793	56.26325	66.07893608061771	false	Random	K	DWWJI	2023-12-08	2023-12-17T11:32:58	1718.54
+49	568	70	2916596630	79.16303	56.11431691686303	false	Random	T	ILLIU	2023-11-23	2023-12-07T11:05:58	1039.03
+5	768	5	4152322228	41.12891	78.60686390712706	false	Random	J	LXKRA	2023-12-05	2023-11-24T18:13:58	1941.98
+5	823	63	13328808917	77.7682	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
+52	811	31	14085958816	51.06702	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
+54	827	55	7054839267	58.55569	25.89100480211566	false	Random	O	ASMLW	2023-12-13	2023-12-20T16:41:58	1369.32
+54	843	34	9547939940	38.66475	36.37094429923243	true	Random	P	NTVIR	2023-12-12	2023-12-02T06:45:58	1628.37
 55	908	24	13623721787	40.06427	90.85281792731746	false	Random	B	KFZGI	2023-11-27	2023-12-23T18:06:58	1124.95
-55	964	8	14038541765	70.24135	20.034551391620194	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
-59	144	31	6208909394	67.417076	40.59765633709834	true	Random	D	FLWNA	2023-12-12	2023-12-19T06:17:58	1870.24
+55	964	8	14038541765	70.24135	20.03455139162019	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
+59	144	31	6208909394	67.41708	40.59765633709834	true	Random	D	FLWNA	2023-12-12	2023-12-19T06:17:58	1870.24
 59	509	50	5501336408	39.94401	73.35770882761237	true	Random	I	PVZNO	2023-12-04	2023-11-27T04:40:58	1177.33
-60	711	69	1493870104	22.574188	61.30347648465907	false	Random	E	FHKVR	2023-11-27	2023-12-05T11:26:58	1981.61
-62	451	50	12304139502	51.151623	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
+60	711	69	1493870104	22.57419	61.30347648465907	false	Random	E	FHKVR	2023-11-27	2023-12-05T11:26:58	1981.61
+62	451	50	12304139502	51.15162	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
 63	112	75	12197306353	85.90137	43.48931389222043	false	Random	C	KKAIT	2023-11-27	2023-12-23T04:23:58	1954.90
-63	383	35	5161212745	39.455276	52.33267523851794	false	Random	X	TMYMC	2023-11-29	2023-12-10T09:09:58	1442.54
-63	410	33	1767102777	72.260124	56.971483381024896	false	Random	B	QXNSM	2023-12-12	2023-12-19T22:57:58	1660.73
-64	719	36	1224510454	64.237434	86.05689694804887	true	Random	E	ZVQPU	2023-11-30	2023-12-03T04:56:58	1879.25
-66	306	5	14448160602	44.642223	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
-68	266	31	8183454755	69.19586	23.139304803938643	false	Random	S	STCBM	2023-11-26	2023-12-22T13:42:58	1722.37
+63	383	35	5161212745	39.45528	52.33267523851794	false	Random	X	TMYMC	2023-11-29	2023-12-10T09:09:58	1442.54
+63	410	33	1767102777	72.26012	56.9714833810249	false	Random	B	QXNSM	2023-12-12	2023-12-19T22:57:58	1660.73
+64	719	36	1224510454	64.23743	86.05689694804887	true	Random	E	ZVQPU	2023-11-30	2023-12-03T04:56:58	1879.25
+66	306	5	14448160602	44.64222	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
+68	266	31	8183454755	69.19586	23.13930480393864	false	Random	S	STCBM	2023-11-26	2023-12-22T13:42:58	1722.37
 68	756	63	5416393421	66.41538	76.32820339134415	false	Random	Y	CUNAL	2023-12-23	2023-12-14T22:49:58	1109.25
-68	922	13	11664232196	72.683266	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
-69	416	14	7702410607	31.638903	89.5793904314531	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
+68	922	13	11664232196	72.68327	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
+69	416	14	7702410607	31.6389	89.57939043145311	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
 7	969	62	3451343234	57.17074	56.74513811095188	false	Random	G	OWDSC	2023-12-19	2023-12-11T17:17:58	1874.22
-70	231	67	4547989149	35.103123	51.93622592177748	true	Random	V	ZBCVY	2023-11-29	2023-12-22T11:41:58	1749.60
-70	421	23	3153379289	27.412096	79.32006404438445	false	Random	L	VLJWK	2023-12-04	2023-12-12T05:31:58	1163.35
+70	231	67	4547989149	35.10312	51.93622592177748	true	Random	V	ZBCVY	2023-11-29	2023-12-22T11:41:58	1749.60
+70	421	23	3153379289	27.4121	79.32006404438445	false	Random	L	VLJWK	2023-12-04	2023-12-12T05:31:58	1163.35
 70	751	56	7828222634	52.8313	55.7263634552559	true	Random	B	TFHMH	2023-11-30	2023-12-24T12:22:58	1166.13
-73	866	49	4618070115	46.803646	91.41305051885227	true	Random	H	ROYYF	2023-12-07	2023-12-01T10:28:58	1817.67
-76	504	70	14161652666	58.071503	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
+73	866	49	4618070115	46.80365	91.41305051885227	true	Random	H	ROYYF	2023-12-07	2023-12-01T10:28:58	1817.67
+76	504	70	14161652666	58.0715	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
 77	131	19	2964167114	33.23181	53.35246738882714	false	Random	G	AHGFO	2023-12-19	2023-12-01T10:11:58	1837.90
-77	165	36	12887722637	19.729382	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
+77	165	36	12887722637	19.72938	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
 8	866	37	13672147880	81.28999	67.66548594336737	false	Random	H	QDJIM	2023-12-14	2023-12-17T18:44:58	1112.05
-80	267	57	8797946135	35.604717	80.51381110359165	false	Random	K	KQTEX	2023-12-09	2023-12-13T06:19:58	1769.15
+80	267	57	8797946135	35.60472	80.51381110359165	false	Random	K	KQTEX	2023-12-09	2023-12-13T06:19:58	1769.15
 82	603	60	9083469993	81.24088	44.46228092092543	true	Random	Y	WTQGU	2023-11-30	2023-11-28T13:18:58	1448.45
-84	427	60	9035762847	81.971306	28.37315065501099	true	Random	L	FETYF	2023-12-01	2023-11-24T15:00:58	1267.12
+84	427	60	9035762847	81.97131	28.37315065501099	true	Random	L	FETYF	2023-12-01	2023-11-24T15:00:58	1267.12
 85	375	63	6797318130	85.47522	58.16330728665678	true	Random	E	UNZLS	2023-12-01	2023-12-04T05:17:58	1949.48
-85	873	18	7233488476	33.83051	31.655950581225508	false	Random	N	RJTIB	2023-11-23	2023-12-11T15:07:58	1249.52
-86	398	27	13222936963	20.387327	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
+85	873	18	7233488476	33.83051	31.65595058122551	false	Random	N	RJTIB	2023-11-23	2023-12-11T15:07:58	1249.52
+86	398	27	13222936963	20.38733	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
 86	662	53	8875065706	28.64778	30.6775849729486	false	Random	N	YNQAY	2023-12-15	2023-11-24T21:56:58	1108.35
-86	728	18	13390353484	61.060482	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
-86	998	74	11080891106	82.568756	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
+86	728	18	13390353484	61.06048	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
+86	998	74	11080891106	82.56876	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
 87	145	64	9022533179	37.80205	63.26081178595084	true	Random	T	PEOPK	2023-12-08	2023-12-07T17:41:58	1167.05
-87	641	64	4786767059	14.765089	70.8793353664754	false	Random	W	SQHGN	2023-12-12	2023-12-24T01:19:58	1316.61
-88	728	59	8439434199	30.372904	59.410283344764366	false	Random	F	JODWY	2023-12-04	2023-12-01T07:57:58	1753.88
+87	641	64	4786767059	14.76509	70.8793353664754	false	Random	W	SQHGN	2023-12-12	2023-12-24T01:19:58	1316.61
+88	728	59	8439434199	30.3729	59.41028334476437	false	Random	F	JODWY	2023-12-04	2023-12-01T07:57:58	1753.88
 88	765	69	9753682777	83.42646	25.99260711248508	true	Random	M	MEJAX	2023-11-25	2023-12-20T09:21:58	1647.22
-89	129	64	6400162051	67.910965	80.48074661432221	true	Random	Y	ZXJWQ	2023-12-16	2023-12-19T10:23:58	1882.65
-89	964	41	12706120446	69.484116	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
-9	113	7	6162580854	11.346889	46.82839094332704	false	Random	A	SJTAF	2023-12-14	2023-11-23T18:27:58	1610.49
-91	389	11	14784237986	11.174142	27.692284427565397	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
+89	129	64	6400162051	67.91096	80.48074661432221	true	Random	Y	ZXJWQ	2023-12-16	2023-12-19T10:23:58	1882.65
+89	964	41	12706120446	69.48412	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
+9	113	7	6162580854	11.34689	46.82839094332704	false	Random	A	SJTAF	2023-12-14	2023-11-23T18:27:58	1610.49
+91	389	11	14784237986	11.17414	27.6922844275654	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
 91	528	68	14588592231	77.4651	88.92064181463138	false	Random	U	JXZUA	2023-12-16	2023-12-21T02:28:58	1834.07
-92	344	29	5182139341	31.653255	44.26814517218887	true	Random	F	NGHOS	2023-12-06	2023-12-09T21:25:58	1291.06
-94	216	49	8773264156	81.617195	43.03983700523827	true	Random	D	VHWYT	2023-12-13	2023-11-30T07:03:58	1178.27
-94	693	60	4818659234	26.04229	83.2975107272106	true	Random	B	ENSQO	2023-12-22	2023-12-12T06:08:58	1283.81
-96	595	72	11506136303	21.917727	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
-96	637	39	5516035994	55.90832	60.522041012562816	true	Random	O	YPETL	2023-12-02	2023-11-28T02:47:58	1175.16
-97	415	74	10346322649	21.667427	46.58901867647463	false	Random	R	KWFOF	2023-12-21	2023-11-27T12:18:58	1157.72
+92	344	29	5182139341	31.65326	44.26814517218887	true	Random	F	NGHOS	2023-12-06	2023-12-09T21:25:58	1291.06
+94	216	49	8773264156	81.6172	43.03983700523827	true	Random	D	VHWYT	2023-12-13	2023-11-30T07:03:58	1178.27
+94	693	60	4818659234	26.04229	83.29751072721059	true	Random	B	ENSQO	2023-12-22	2023-12-12T06:08:58	1283.81
+96	595	72	11506136303	21.91773	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
+96	637	39	5516035994	55.90832	60.52204101256282	true	Random	O	YPETL	2023-12-02	2023-11-28T02:47:58	1175.16
+97	415	74	10346322649	21.66743	46.58901867647463	false	Random	R	KWFOF	2023-12-21	2023-11-27T12:18:58	1157.72
 97	839	60	14818779777	46.17389	68.98285340004992	false	Random	W	HMFPU	2023-12-01	2023-12-04T08:41:58	1683.48
 
 -- !q48 --
 1	100	5	1000000000	10.5	20.75	true	First	A	Alpha	2023-10-06	2023-10-06T14:30	123.45
 10	1000	50	10000000000	55.25	65.75	false	Tenth	J	Kappa	2023-10-15	2023-10-15T23:30	1012.34
-11	1100	55	11000000000	60.5	70.0	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
+11	1100	55	11000000000	60.5	70	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
 12	1200	60	12000000000	65.75	75.25	false	Twelfth	L	Mu	2023-10-17	2023-10-17T02:15	1234.56
 13	1300	65	13000000000	70.0	80.5	true	Thirteenth	M	Nu	2023-10-18	2023-10-18T03:30	1345.67
 14	1400	70	14000000000	75.25	85.75	false	Fourteenth	N	Xi	2023-10-19	2023-10-19T04:45	1456.78
-15	1500	75	15000000000	80.5	90.0	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
+15	1500	75	15000000000	80.5	90	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
 2	200	10	2000000000	15.75	25.5	false	Second	B	Beta	2023-10-07	2023-10-07T15:45	234.56
-3	300	15	3000000000	20.25	30.0	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
+3	300	15	3000000000	20.25	30	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
 4	400	20	4000000000	25.5	35.25	false	Fourth	D	Delta	2023-10-09	2023-10-09T17:30	456.78
 5	500	25	5000000000	30.75	40.5	true	Fifth	E	Epsilon	2023-10-10	2023-10-10T18:45	567.89
 6	600	30	6000000000	35.25	45.75	false	Sixth	F	Zeta	2023-10-11	2023-10-11T19:15	678.90
-7	700	35	7000000000	40.5	50.0	true	Seventh	G	Eta	2023-10-12	2023-10-12T20:30	789.01
+7	700	35	7000000000	40.5	50	true	Seventh	G	Eta	2023-10-12	2023-10-12T20:30	789.01
 8	800	40	8000000000	45.75	55.25	false	Eighth	H	Theta	2023-10-13	2023-10-13T21:45	890.12
 9	900	45	9000000000	50.0	60.5	true	Ninth	I	Iota	2023-10-14	2023-10-14T22:15	901.23
 
--- !lzo_1 --
-127	317	22	139027217294	5.8534396E7	1.097115615520323E10	true	NxvCOVAHCAzWEFOs	VdEf	vXxekmctPmPmmbecHgf	2023-12-15	2023-12-28T23:15:48	147638.24
-135	194	7	57894842960	1.3718646E7	2.1169820465574505E10	true	aseqfHnnrtaL	HwV	IqXKe	2023-12-28	2023-12-19T06:14:48	32041.77
-139	146	4	149816593644	4.9618156E7	1.3744723380110355E10	false	sreHCjYoJoBOjUJMMBSQ	dD	iBaixPwGysIVgkomhg	2023-12-27	2023-12-19T15:51:48	191090.57
-167	275	28	46739421643	2.790689E7	5.638235691917528E8	false	BDX	iY	pOrAYVd	2023-12-01	2023-12-09T12:59:48	105181.01
-241	496	63	26957970271	3.7214888E7	1.2043262506506804E10	true	VJPXXigvP	wfZp	cwyoMdOxN	2023-12-10	2023-12-18T05:33:48	105023.30
-285	43	47	124246184718	2162507.5	1.6279579779299034E10	false	gXIEVQzqfokBv	raxj	NbGVRlQeotLBDWbDqP	2023-12-17	2023-12-24T03:36:48	75425.14
-311	44	67	79901279497	3.0787934E7	1.5853816694193293E10	false	LTsSxeetbYKCwcJvg	BCrf	XkuC	2023-12-11	2023-12-05T13:16:48	103792.88
-333	390	29	61080978873	2916969.0	1.053228375816898E10	true	HcZnbf	Wp	iHqLLiPhgZ	2023-12-01	2023-12-10T11:31:48	68471.38
-36	369	2	24371701950	5.54394E7	8.576150848699297E9	false	uQpDcwEZT	sd	SwzJInNDb	2023-12-05	2023-12-08T15:00:40	8954.61
-363	375	1	20494251127	8.9166856E7	2.2005002173871223E10	false	hkHvijevoRfHhK	szl	hwHUAjwqTQOmLEPDFbt	2023-12-05	2023-12-08T05:28:48	96630.28
-368	37	42	60649320592	2.3388714E7	1.81031191987985E9	true	yXoDmKpjjRsVV	Hq	MbWlyi	2023-12-19	2023-12-06T00:45:48	197736.91
-414	301	63	87524210634	1.2944316E7	3.5428357192711325E9	true	piECj	tGM	pkOyUdxLBFCw	2023-12-19	2023-12-10T15:28:48	112255.75
-42	132	39	128076453206	3.1733946E7	2.001312160047691E9	false	kzviLgVNqxrDQ	kr	YhdXGtPun	2023-12-15	2023-12-10T01:42:48	189135.64
-427	286	67	78312070726	3.1794338E7	1.7713252925472687E10	true	lHoUCBbY	LTkc	CgMrDWTGppMIaZPk	2023-12-13	2023-12-08T16:02:48	75175.71
-438	491	21	66065079309	6.6624016E7	1.5542114222539822E10	false	CEbvKZRdvMHxzVOIejq	wJ	eoTkUlht	2023-12-08	2023-12-17T19:49:48	86666.80
-469	156	25	41259191749	6.2344956E7	1.5674967382662376E10	true	dfyMUJYNppBDDD	az	lVofKt	2023-12-19	2023-12-09T10:37:48	15427.43
-540	416	70	110655654086	4.9027904E7	1.1345965638449787E10	true	gZF	oPNx	kDYTiiCPhyQqnmPLd	2023-12-26	2023-12-20T22:47:48	177628.27
-563	327	1	86402793406	1.4668673E7	2.1932020019521263E10	false	uEPywVtgb	IN	HCcPuRYlwlezseie	2023-12-27	2023-12-01T09:02:48	12840.38
-585	423	69	141894410515	1.7955736E7	8.784239710423233E9	false	IsWEZJsPRXIFqapTTb	yO	qRAEvl	2023-11-29	2023-12-26T04:50:48	46733.25
-618	390	70	40611757422	4.9496784E7	1.90943138552761E9	true	cuqniQE	dxKv	KlxZsrJad	2023-12-05	2023-11-30T13:41:48	13904.80
-
--- !lzo_2 --
-1078	229	63	79026532317	1.4363472E7	1.193746461651589E10	true	znYrIGhEXITIdyiifBPZ	BBh	klhSDtg	2023-12-14	2023-12-02T06:34:48	80402.53
-1105	186	31	129159878912	1.3102703E7	1.6989058048889019E10	false	OxhcUomBMLjVjdwgOI	Qa	eJoODDnkdDd	2023-12-20	2023-12-25T03:33:48	83174.73
-1108	223	24	64158736405	1.3341401E7	2.0128416779917E10	false	rLXbarkH	xU	ggGSZGxLwT	2023-11-28	2023-12-23T01:43:48	17986.48
-1126	178	38	137633520558	9.6421152E7	1.2075476530488207E10	true	vHgghYPQNpzTmYx	EKhO	Pg	2023-12-03	2023-12-03T18:17:48	119990.49
-1215	20	18	15934394806	9.6266544E7	3.303291140952643E8	true	zzkAwmKNf	RKO	VzyGx	2023-12-26	2023-12-14T02:36:48	59236.59
-1225	131	17	119517491015	7.868396E7	1.2812171639342154E10	true	maOgXoCzsrPVZqxaeS	vm	AJNnbqdEzk	2023-12-28	2023-12-22T23:18:48	85523.88
-1252	142	68	92511639613	5.2273456E7	2.0197789593796345E10	true	zFl	Avwm	Yi	2023-12-24	2023-12-01T22:31:48	181634.60
-1262	279	57	63627626380	2.3360408E7	6.674186807593108E9	true	wjuW	ueO	tOWuzwJj	2023-12-24	2023-12-04T17:27:48	112884.97
-1266	253	10	139941604087	2.5471874E7	2.6004794480891223E9	true	YBx	MqsR	sLu	2023-12-03	2023-12-23T10:00:48	83930.38
-1267	155	54	38456715756	4.2582072E7	3.350085153856542E9	true	qFXXKbhqXfSYFXteGF	WMH	CWZwGCkmg	2023-12-17	2023-12-20T19:06:48	13843.42
-
--- !lzo_3 --
-127	317	22	139027217294	5.8534396E7	1.097115615520323E10	true	NxvCOVAHCAzWEFOs	VdEf	vXxekmctPmPmmbecHgf	2023-12-15	2023-12-28T23:15:48	147638.24
-135	194	7	57894842960	1.3718646E7	2.1169820465574505E10	true	aseqfHnnrtaL	HwV	IqXKe	2023-12-28	2023-12-19T06:14:48	32041.77
-241	496	63	26957970271	3.7214888E7	1.2043262506506804E10	true	VJPXXigvP	wfZp	cwyoMdOxN	2023-12-10	2023-12-18T05:33:48	105023.30
-333	390	29	61080978873	2916969.0	1.053228375816898E10	true	HcZnbf	Wp	iHqLLiPhgZ	2023-12-01	2023-12-10T11:31:48	68471.38
-368	37	42	60649320592	2.3388714E7	1.81031191987985E9	true	yXoDmKpjjRsVV	Hq	MbWlyi	2023-12-19	2023-12-06T00:45:48	197736.91
-414	301	63	87524210634	1.2944316E7	3.5428357192711325E9	true	piECj	tGM	pkOyUdxLBFCw	2023-12-19	2023-12-10T15:28:48	112255.75
-427	286	67	78312070726	3.1794338E7	1.7713252925472687E10	true	lHoUCBbY	LTkc	CgMrDWTGppMIaZPk	2023-12-13	2023-12-08T16:02:48	75175.71
-469	156	25	41259191749	6.2344956E7	1.5674967382662376E10	true	dfyMUJYNppBDDD	az	lVofKt	2023-12-19	2023-12-09T10:37:48	15427.43
-540	416	70	110655654086	4.9027904E7	1.1345965638449787E10	true	gZF	oPNx	kDYTiiCPhyQqnmPLd	2023-12-26	2023-12-20T22:47:48	177628.27
-618	390	70	40611757422	4.9496784E7	1.90943138552761E9	true	cuqniQE	dxKv	KlxZsrJad	2023-12-05	2023-11-30T13:41:48	13904.80
-
--- !lzo_4 --
-139	146	4	149816593644	4.9618156E7	1.3744723380110355E10	false	sreHCjYoJoBOjUJMMBSQ	dD	iBaixPwGysIVgkomhg	2023-12-27	2023-12-19T15:51:48	191090.57
-167	275	28	46739421643	2.790689E7	5.638235691917528E8	false	BDX	iY	pOrAYVd	2023-12-01	2023-12-09T12:59:48	105181.01
-285	43	47	124246184718	2162507.5	1.6279579779299034E10	false	gXIEVQzqfokBv	raxj	NbGVRlQeotLBDWbDqP	2023-12-17	2023-12-24T03:36:48	75425.14
-311	44	67	79901279497	3.0787934E7	1.5853816694193293E10	false	LTsSxeetbYKCwcJvg	BCrf	XkuC	2023-12-11	2023-12-05T13:16:48	103792.88
-36	369	2	24371701950	5.54394E7	8.576150848699297E9	false	uQpDcwEZT	sd	SwzJInNDb	2023-12-05	2023-12-08T15:00:40	8954.61
-363	375	1	20494251127	8.9166856E7	2.2005002173871223E10	false	hkHvijevoRfHhK	szl	hwHUAjwqTQOmLEPDFbt	2023-12-05	2023-12-08T05:28:48	96630.28
-42	132	39	128076453206	3.1733946E7	2.001312160047691E9	false	kzviLgVNqxrDQ	kr	YhdXGtPun	2023-12-15	2023-12-10T01:42:48	189135.64
-438	491	21	66065079309	6.6624016E7	1.5542114222539822E10	false	CEbvKZRdvMHxzVOIejq	wJ	eoTkUlht	2023-12-08	2023-12-17T19:49:48	86666.80
-563	327	1	86402793406	1.4668673E7	2.1932020019521263E10	false	uEPywVtgb	IN	HCcPuRYlwlezseie	2023-12-27	2023-12-01T09:02:48	12840.38
-585	423	69	141894410515	1.7955736E7	8.784239710423233E9	false	IsWEZJsPRXIFqapTTb	yO	qRAEvl	2023-11-29	2023-12-26T04:50:48	46733.25
-
--- !lzo_5 --
-127	317	22	139027217294	5.8534396E7	1.097115615520323E10	true	NxvCOVAHCAzWEFOs	VdEf	vXxekmctPmPmmbecHgf	2023-12-15	2023-12-28T23:15:48	147638.24
-139	146	4	149816593644	4.9618156E7	1.3744723380110355E10	false	sreHCjYoJoBOjUJMMBSQ	dD	iBaixPwGysIVgkomhg	2023-12-27	2023-12-19T15:51:48	191090.57
-167	275	28	46739421643	2.790689E7	5.638235691917528E8	false	BDX	iY	pOrAYVd	2023-12-01	2023-12-09T12:59:48	105181.01
-241	496	63	26957970271	3.7214888E7	1.2043262506506804E10	true	VJPXXigvP	wfZp	cwyoMdOxN	2023-12-10	2023-12-18T05:33:48	105023.30
-285	43	47	124246184718	2162507.5	1.6279579779299034E10	false	gXIEVQzqfokBv	raxj	NbGVRlQeotLBDWbDqP	2023-12-17	2023-12-24T03:36:48	75425.14
-311	44	67	79901279497	3.0787934E7	1.5853816694193293E10	false	LTsSxeetbYKCwcJvg	BCrf	XkuC	2023-12-11	2023-12-05T13:16:48	103792.88
-333	390	29	61080978873	2916969.0	1.053228375816898E10	true	HcZnbf	Wp	iHqLLiPhgZ	2023-12-01	2023-12-10T11:31:48	68471.38
-36	369	2	24371701950	5.54394E7	8.576150848699297E9	false	uQpDcwEZT	sd	SwzJInNDb	2023-12-05	2023-12-08T15:00:40	8954.61
-368	37	42	60649320592	2.3388714E7	1.81031191987985E9	true	yXoDmKpjjRsVV	Hq	MbWlyi	2023-12-19	2023-12-06T00:45:48	197736.91
-42	132	39	128076453206	3.1733946E7	2.001312160047691E9	false	kzviLgVNqxrDQ	kr	YhdXGtPun	2023-12-15	2023-12-10T01:42:48	189135.64
-
--- !lzo_6 --
-9379	258	6	31310350438	3.1661348E7	8.857541516631796E8	false	nuXBDInOfoaWz	AKyn	ggtgZNvWuC	2023-11-28	2023-12-06T03:40:40	50071.94
-
--- !lzo_7 --
-127	317	22	139027217294	5.8534396E7	1.097115615520323E10	true	NxvCOVAHCAzWEFOs	VdEf	vXxekmctPmPmmbecHgf	2023-12-15	2023-12-28T23:15:48	147638.24
-139	146	4	149816593644	4.9618156E7	1.3744723380110355E10	false	sreHCjYoJoBOjUJMMBSQ	dD	iBaixPwGysIVgkomhg	2023-12-27	2023-12-19T15:51:48	191090.57
-167	275	28	46739421643	2.790689E7	5.638235691917528E8	false	BDX	iY	pOrAYVd	2023-12-01	2023-12-09T12:59:48	105181.01
-241	496	63	26957970271	3.7214888E7	1.2043262506506804E10	true	VJPXXigvP	wfZp	cwyoMdOxN	2023-12-10	2023-12-18T05:33:48	105023.30
-311	44	67	79901279497	3.0787934E7	1.5853816694193293E10	false	LTsSxeetbYKCwcJvg	BCrf	XkuC	2023-12-11	2023-12-05T13:16:48	103792.88
-363	375	1	20494251127	8.9166856E7	2.2005002173871223E10	false	hkHvijevoRfHhK	szl	hwHUAjwqTQOmLEPDFbt	2023-12-05	2023-12-08T05:28:48	96630.28
-368	37	42	60649320592	2.3388714E7	1.81031191987985E9	true	yXoDmKpjjRsVV	Hq	MbWlyi	2023-12-19	2023-12-06T00:45:48	197736.91
-414	301	63	87524210634	1.2944316E7	3.5428357192711325E9	true	piECj	tGM	pkOyUdxLBFCw	2023-12-19	2023-12-10T15:28:48	112255.75
-42	132	39	128076453206	3.1733946E7	2.001312160047691E9	false	kzviLgVNqxrDQ	kr	YhdXGtPun	2023-12-15	2023-12-10T01:42:48	189135.64
-438	491	21	66065079309	6.6624016E7	1.5542114222539822E10	false	CEbvKZRdvMHxzVOIejq	wJ	eoTkUlht	2023-12-08	2023-12-17T19:49:48	86666.80
-
--- !lzo_8 --
-
diff --git a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
index fe8243f91e0a95..62fabbe7d08be7 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
@@ -650,654 +650,3 @@ true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
 -- !schema_7 --
 \N	\N	\N	\N	\N	\N	\N	\N	\N		test		test	test      	1	2	3	4	5.1	6.2	true	false	2011-05-06	2011-05-06T07:08:09.123	-1.2	12.30	-1234.5678	123456789.12340000	-1234567890.12345678	1234567890123456789012.1234567800000000	dGVzdDI=
 
--- !all_types_bool_col_topn_asc --
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-false	1	1	1	10	1.1	10.1	11	01/02/09	1	2009-01-02T07:11:00.450	2009	1
-false	3	3	3	30	3.3	30.3	13	01/02/09	3	2009-01-02T07:13:00.480	2009	1
-false	5	5	5	50	5.5	50.5	15	01/02/09	5	2009-01-02T07:15:00.550	2009	1
-false	7	7	7	70	7.7	70.7	17	01/02/09	7	2009-01-02T07:17:00.660	2009	1
-false	9	9	9	90	9.9	90.89999999999999	19	01/02/09	9	2009-01-02T07:19:00.810	2009	1
-
--- !all_types_bool_col_topn_desc --
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	8	8	8	80	8.8	80.8	7288	12/30/10	8	2010-12-30T11:58:13.330	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7286	12/30/10	6	2010-12-30T11:56:13.200	2010	12
-true	4	4	4	40	4.4	40.4	7284	12/30/10	4	2010-12-30T11:54:13.110	2010	12
-true	2	2	2	20	2.2	20.2	7282	12/30/10	2	2010-12-30T11:52:13.600	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-
--- !all_types_tinyint_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_tinyint_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_smallint_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_smallint_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_int_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_int_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_bigint_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_bigint_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_float_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_float_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_double_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_double_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_id_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_id_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_date_string_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_date_string_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_string_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_string_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_timestamp_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_timestamp_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_year_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_year_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_month_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_month_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_bool_col_topn_abs_asc --
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-false	1	1	1	10	1.1	10.1	11	01/02/09	1	2009-01-02T07:11:00.450	2009	1
-false	3	3	3	30	3.3	30.3	13	01/02/09	3	2009-01-02T07:13:00.480	2009	1
-false	5	5	5	50	5.5	50.5	15	01/02/09	5	2009-01-02T07:15:00.550	2009	1
-false	7	7	7	70	7.7	70.7	17	01/02/09	7	2009-01-02T07:17:00.660	2009	1
-false	9	9	9	90	9.9	90.89999999999999	19	01/02/09	9	2009-01-02T07:19:00.810	2009	1
-
--- !all_types_bool_col_topn_abs_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	7	7	7	70	7.7	70.7	7287	12/30/10	7	2010-12-30T11:57:13.260	2010	12
-false	5	5	5	50	5.5	50.5	7285	12/30/10	5	2010-12-30T11:55:13.150	2010	12
-false	3	3	3	30	3.3	30.3	7283	12/30/10	3	2010-12-30T11:53:13.800	2010	12
-false	1	1	1	10	1.1	10.1	7281	12/30/10	1	2010-12-30T11:51:13.500	2010	12
-
--- !all_types_tinyint_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_tinyint_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_smallint_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_smallint_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_int_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_int_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_bigint_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_bigint_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_float_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_float_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_double_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_double_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_id_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_id_topn_abs_desc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_date_string_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_date_string_col_topn_abs_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_string_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_string_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_timestamp_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_timestamp_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_year_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_year_topn_abs_desc --
-false	9	9	9	90	9.9	90.89999999999999	3649	12/31/09	9	2009-12-31T12:09:13.860	2009	12
-true	8	8	8	80	8.8	80.8	3648	12/31/09	8	2009-12-31T12:08:13.780	2009	12
-false	7	7	7	70	7.7	70.7	3647	12/31/09	7	2009-12-31T12:07:13.710	2009	12
-true	6	6	6	60	6.6	60.59999999999999	3646	12/31/09	6	2009-12-31T12:06:13.650	2009	12
-false	5	5	5	50	5.5	50.5	3645	12/31/09	5	2009-12-31T12:05:13.600	2009	12
-true	4	4	4	40	4.4	40.4	3644	12/31/09	4	2009-12-31T12:04:13.560	2009	12
-false	3	3	3	30	3.3	30.3	3643	12/31/09	3	2009-12-31T12:03:13.530	2009	12
-true	2	2	2	20	2.2	20.2	3642	12/31/09	2	2009-12-31T12:02:13.510	2009	12
-false	1	1	1	10	1.1	10.1	3641	12/31/09	1	2009-12-31T12:01:13.500	2009	12
-true	0	0	0	0	0.0	0	3640	12/31/09	0	2009-12-31T12:00:13.500	2009	12
-
--- !all_types_month_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_month_topn_abs_desc --
-false	9	9	9	90	9.9	90.89999999999999	3959	01/31/10	9	2010-01-31T12:09:13.860	2010	1
-true	8	8	8	80	8.8	80.8	3958	01/31/10	8	2010-01-31T12:08:13.780	2010	1
-false	7	7	7	70	7.7	70.7	3957	01/31/10	7	2010-01-31T12:07:13.710	2010	1
-true	6	6	6	60	6.6	60.59999999999999	3956	01/31/10	6	2010-01-31T12:06:13.650	2010	1
-false	5	5	5	50	5.5	50.5	3955	01/31/10	5	2010-01-31T12:05:13.600	2010	1
-true	4	4	4	40	4.4	40.4	3954	01/31/10	4	2010-01-31T12:04:13.560	2010	1
-false	3	3	3	30	3.3	30.3	3953	01/31/10	3	2010-01-31T12:03:13.530	2010	1
-true	2	2	2	20	2.2	20.2	3952	01/31/10	2	2010-01-31T12:02:13.510	2010	1
-false	1	1	1	10	1.1	10.1	3951	01/31/10	1	2010-01-31T12:01:13.500	2010	1
-true	0	0	0	0	0.0	0	3950	01/31/10	0	2010-01-31T12:00:13.500	2010	1
-
--- !schema_1 --
-1	638	6	15635	32.00	49620.16	0.07	0.02	N	O	1996-01-30	1996-02-07	1996-02-03	DELIVER IN PERSON	MAIL	arefully slyly ex	cn	beijing
-
--- !schema_2 --
-6374628540732951412	-77	-65	-70	-107	-215	65	0	-526	-1309	3750	8827	-19795	34647	57042	-1662	-138248	-890685	-228568	1633079	-2725524	6163040	-10491702	697237	74565050	127767368	93532213	-209675435	-32116110	-3624917040	-2927805617	15581947241	21893441661	24075494509	-116822110531	-59683724667	-146210393388	114424524398	1341560771667	-1638742564263	520137948334	-2927347587131	7415137351179	-7963937754617	52157548982266	140803519083304	-294675355729619	-868076759504942	181128508165910	-91753231238823	-3511241416682881	-11545256318348796	-1952917510863468	-5161099825338866	-59726090170689781	287170105829528178	607326725526282735	1253194074103207461	-162443950414676064	-2964036188567341159	2602201580810990248	5581917084094110764	111739292249520611	-315687754593838642	-2804420462762366976	-2078683524
-
--- !schema_3 --
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-
--- !schema_4 --
-2	24	15314771	999319712124142303	true	6.009337E8	4.817722807977021e+16	\N	northern rural	2022-08-30T23:21:08	407186.2849	phones	int_col	2019-01-01	[2.595433907849411e+17, 5.88165568758352e+17, 4.780259987226574e+17, 6.926622881251557e+17, 9.86405645575228e+17]	\N	phones	int_col
-5	59	317349992	998913039814974432	false	5.6584858E8	9.900861328269033e+17	Handling man satisfy firework descent top. Racing closed county set-up crown cave. Correctly front duration pure.	\N	2022-09-02T19:52:57	372765.2493	desktops	tinyint_col	2021-10-03	[9.983261252571983e+17, 3.612076153030643e+17, 9.969131496509435e+17, 8.991290717923475e+17, 1.195589374709888e+17]	["CrySxz", "FMXGRcaGbahSVqhp", "oRKqPmhM", "VdODasEdDWFSRIQf"]	desktops	tinyint_col
-6	62	915699741	999653836472045196	true	4.51937504E8	8.796150544502191e+17	Tale get speed platform august curved. Ease grass neighbour landlord. Baby genetic youth.	\N	2022-08-07T09:30:56	875620.2176	phones	smallint_col	\N	[9.423540715161855e+17, 4.833249992029562e+17, 9.167007747789834e+17]	["zNfbLeFx", "GNTJOmWJyRmOK", "hwvfhSQGsaaMEqUrWCK", "cQrQsROKLARA", "nONj", "oepXBFB", "IPtUql"]	phones	smallint_col
-
--- !schema_5 --
-00cwjIryUv	EXHwpeK2Nl	hv2PYEMYMM	eo69nyw4Yv	K6797tgjFg	LlFNd8Kyy5	wkpLCO3uo1	AIXCj1MfeD	ni0HxZbiUO	6IjRdM8Gqi	qsTMK6A2eC	1wu7v9OPwW	qavArd9tDc	sU88hZADLj	lyzWlwLOCx	2022-11-25
-
--- !schema_6 --
-""	"test"
-
--- !schema_7 --
-\N	\N	\N	\N	\N	\N	\N	\N	\N		test		test	test      	1	2	3	4	5.1	6.2	true	false	2011-05-06	2011-05-06T07:08:09.123	-1.2	12.30	-1234.5678	123456789.12340000	-1234567890.12345678	1234567890123456789012.1234567800000000	dGVzdDI=
-
diff --git a/regression-test/data/external_table_p0/hive/test_hive_openx_json.out b/regression-test/data/external_table_p0/hive/test_hive_openx_json.out
index 6eadea56694c85..f4fd28e4d05725 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_openx_json.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_openx_json.out
@@ -10,6 +10,7 @@
 \N	\N	\N	\N	\N
 \N	\N	\N	\N	\N
 \N	\N	\N	\N	\N
+\N	\N	\N	\N	\N
 1	Alice	[1, 2, 3]	{"math":90, "english":85}	{"a":100, "b":"test1", "c":1234567890}
 2	Bob	[4, 5]	{"math":80, "science":95}	{"a":200, "b":"test2", "c":9876543210}
 
diff --git a/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out b/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out
index dbea5056998664..1cb5cde15144e4 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out
@@ -35,39 +35,3 @@
 \N
 2023-01-01T13:01:03
 
--- !q01 --
-1	kaka	\N
-2	messi	2023-01-01T13:01:03
-
--- !q02 --
-1	kaka	\N
-2	messi	2023-01-01T13:01:03
-
--- !q03 --
-\N
-2023-01-01T13:01:03
-
--- !q01 --
-1	kaka	\N
-2	messi	2023-01-01T21:01:03
-
--- !q02 --
-1	kaka	\N
-2	messi	2023-01-01T21:01:03
-
--- !q03 --
-\N
-2023-01-01T21:01:03
-
--- !q01 --
-1	kaka	\N
-2	messi	2023-01-01T13:01:03
-
--- !q02 --
-1	kaka	\N
-2	messi	2023-01-01T13:01:03
-
--- !q03 --
-\N
-2023-01-01T13:01:03
-
diff --git a/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out b/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out
index 932b62b5034b94..112430410b3d98 100644
--- a/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out
+++ b/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out
@@ -21,8 +21,6 @@ false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-1
 -- !q05 --
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
 
--- !q06 --
-
 -- !q01 --
 false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
 false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
@@ -52,8 +50,6 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 \N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
 
--- !q05 --
-
 -- !q01 --
 true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
 
@@ -133,8 +129,6 @@ false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-1
 -- !q05 --
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
 
--- !q06 --
-
 -- !q01 --
 false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
 false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
@@ -164,8 +158,6 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 \N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
 
--- !q05 --
-
 -- !q01 --
 true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
 
@@ -223,226 +215,3 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 \N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
 
--- !q01 --
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-123456.789012	-123456789.012345678901	str	binary_value	2024-03-25	2024-03-25T12:00	2024-03-25T12:00:00.123457	2024-03-25T12:00:00.123457	char_value11111	char_value22222	char_value33333	varchar_value11111	varchar_value22222	varchar_value33333	{"key7":"value1"}	{"key7":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{5.3456:2.3456}	{5.34567890:2.34567890}	{2.34567890:2.34567890}	{7.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[9.4567, 4.5678]	[6.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240325
-
--- !q05 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-
--- !q06 --
-
--- !q01 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-
--- !q05 --
-
--- !q01 --
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-123456.789012	-123456789.012345678901	str	binary_value	2024-03-25	2024-03-25T12:00	2024-03-25T12:00:00.123457	2024-03-25T12:00:00.123457	char_value11111	char_value22222	char_value33333	varchar_value11111	varchar_value22222	varchar_value33333	{"key7":"value1"}	{"key7":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{5.3456:2.3456}	{5.34567890:2.34567890}	{2.34567890:2.34567890}	{7.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[9.4567, 4.5678]	[6.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240321
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q05 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q01 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
-
--- !q01 --
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-123456.789012	-123456789.012345678901	str	binary_value	2024-03-25	2024-03-25T12:00	2024-03-25T12:00:00.123457	2024-03-25T12:00:00.123457	char_value11111	char_value22222	char_value33333	varchar_value11111	varchar_value22222	varchar_value33333	{"key7":"value1"}	{"key7":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{5.3456:2.3456}	{5.34567890:2.34567890}	{2.34567890:2.34567890}	{7.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[9.4567, 4.5678]	[6.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240325
-
--- !q05 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-
--- !q06 --
-
--- !q01 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-
--- !q05 --
-
--- !q01 --
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-123456.789012	-123456789.012345678901	str	binary_value	2024-03-25	2024-03-25T12:00	2024-03-25T12:00:00.123457	2024-03-25T12:00:00.123457	char_value11111	char_value22222	char_value33333	varchar_value11111	varchar_value22222	varchar_value33333	{"key7":"value1"}	{"key7":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{5.3456:2.3456}	{5.34567890:2.34567890}	{2.34567890:2.34567890}	{7.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[9.4567, 4.5678]	[6.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240321
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q05 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q01 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out
index 529e37390f05bc..bfc73649139041 100644
--- a/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out
+++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out
@@ -8,62 +8,62 @@ id	int	Yes	true	\N
 ts_tz	timestamptz(6)	Yes	true	\N	WITH_TIMEZONE
 
 -- !select_tvf0 --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf0_desc --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf0_false --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf0_desc_false --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf1 --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf1_desc --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf1_false --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf1_desc_false --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf2 --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf2_desc --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf3 --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf3_desc --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out b/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out
index 66207238741815..6a6ebab9001e43 100644
--- a/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out
+++ b/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out
@@ -28,11 +28,11 @@ ts_ltz	timestamptz(3)	Yes	true	\N	WITH_TIMEZONE
 3	2024-11-11 11:11:11.123+08:00
 
 -- !mapping_tz --
-1	2024-01-01 10:00:00+08:00
-2	2026-01-06 16:13:12+08:00
-3	2024-11-11 11:11:11+08:00
+1	2024-01-01 10:00:00.000+08:00
+2	2026-01-06 16:13:12.000+08:00
+3	2024-11-11 11:11:11.123+08:00
 
 -- !mapping_tz_desc --
 id	int	Yes	false	\N	NONE
-ts_ltz	timestamptz	Yes	false	\N	NONE
+ts_ltz	timestamptz(3)	Yes	false	\N	NONE
 
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group0.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group0.out
index 0e21a8fad6f690..1cdbfb194666b7 100644
Binary files a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group0.out and b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group0.out differ
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out
index 16b89ac45d63ca..79b63e41cc1b4d 100644
--- a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out
+++ b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out
@@ -24,14 +24,14 @@ apple_banana_mango81
 apple_banana_mango9
 
 -- !test_2 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_3 --
 [{"one":"0 - 0 - 1", "two":"0 - 0 - 2", "three":"0 - 0 - 3"}, {"one":"0 - 1 - 1", "two":"0 - 1 - 2", "three":"0 - 1 - 3"}]
@@ -39,14 +39,14 @@ apple_banana_mango9
 [{"one":"2 - 0 - 1", "two":"2 - 0 - 2", "three":"2 - 0 - 3"}, {"one":"2 - 1 - 1", "two":"2 - 1 - 2", "three":"2 - 1 - 3"}]
 
 -- !test_4 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_5 --
 ["good", "bye"]
@@ -89,17 +89,17 @@ apple_banana_mango9
 1981-01-07T00:00	15.8
 1981-01-08T00:00	17.4
 1981-01-09T00:00	21.8
-1981-01-10T00:00	20.0
+1981-01-10T00:00	20
 
 -- !test_13 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_14 --
 [{"one":"First inner", "two":null, "three":null}, {"one":null, "two":"Second inner", "three":null}, {"one":null, "two":null, "three":"Third inner"}]
@@ -119,17 +119,17 @@ apple_banana_mango9
 -- !test_16 --
 1	Alice	2022-11-16T02:32:09
 2	Bob	2022-11-16T02:32:09
-3	Cecilia	2022-11-16T02:32:09
+3	Cecilia	2022-11-16T02:32:09.123534
 
 -- !test_17 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_18 --
 0.00
@@ -151,14 +151,14 @@ apple_banana_mango9
 2
 
 -- !test_20 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_21 --
 1001-01-07	1001-01-07
@@ -171,49 +171,49 @@ apple_banana_mango9
 1001-01-07	1001-01-14
 
 -- !test_22 --
-1001-01-07T17:07:47.171	1001-01-07T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-08T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-09T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-10T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-11T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-12T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-13T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-14T17:07:47.171
+1001-01-07T17:07:46.123	1001-01-07T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-08T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-09T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-10T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-11T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-12T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-13T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-14T17:07:46.123
 
 -- !test_23 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_24 --
 false	1	2	3	10	1.2	val_1	val_1	HEARTS	false	1	2	3	10	1.2	val_1	val_1	HEARTS	["arr_1", "arr_2", "arr_3"]	[1]	{1:"val_1", 2:"val_2", 3:"val_3"}	{1:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}], 2:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}], 3:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}]}
 false	3	4	5	30	3.2	val_3	val_3	CLUBS	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_3", "arr_4", "arr_5"]	[3]	{3:"val_3", 4:"val_4", 5:"val_5"}	{3:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}], 4:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}], 5:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}]}
 false	5	6	7	50	5.2	val_5	val_5	HEARTS	false	5	6	7	50	5.2	val_5	val_5	HEARTS	["arr_5", "arr_6", "arr_7"]	[5]	{5:"val_5", 6:"val_6", 7:"val_7"}	{5:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}], 6:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}], 7:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}]}
 false	7	8	9	70	7.2	val_7	val_7	CLUBS	false	7	8	9	70	7.2	val_7	val_7	CLUBS	["arr_7", "arr_8", "arr_9"]	[7]	{7:"val_7", 8:"val_8", 9:"val_9"}	{7:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}], 8:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}], 9:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}]}
-false	9	10	11	90	9.2	val_9	val_9	HEARTS	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_9", "arr_10", "arr_11"]	[9]	{9:"val_9", 10:"val_10", 11:"val_11"}	{9:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 10:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 11:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}]}
+false	9	10	11	90	9.199999999999999	val_9	val_9	HEARTS	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_9", "arr_10", "arr_11"]	[9]	{9:"val_9", 10:"val_10", 11:"val_11"}	{9:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 10:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 11:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}]}
 true	0	1	2	0	0.2	val_0	val_0	SPADES	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_0", "arr_1", "arr_2"]	[0]	{0:"val_0", 1:"val_1", 2:"val_2"}	{0:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}], 1:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}], 2:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}]}
 true	2	3	4	20	2.2	val_2	val_2	DIAMONDS	true	2	3	4	20	2.2	val_2	val_2	DIAMONDS	["arr_2", "arr_3", "arr_4"]	[2]	{2:"val_2", 3:"val_3", 4:"val_4"}	{2:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}], 3:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}], 4:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}]}
 true	4	5	6	40	4.2	val_4	val_4	SPADES	true	4	5	6	40	4.2	val_4	val_4	SPADES	["arr_4", "arr_5", "arr_6"]	[4]	{4:"val_4", 5:"val_5", 6:"val_6"}	{4:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}], 5:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}], 6:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}]}
 true	6	7	8	60	6.2	val_6	val_6	DIAMONDS	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_6", "arr_7", "arr_8"]	[6]	{6:"val_6", 7:"val_7", 8:"val_8"}	{6:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}], 7:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}], 8:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}]}
-true	8	9	10	80	8.2	val_8	val_8	SPADES	true	8	9	10	80	8.2	val_8	val_8	SPADES	["arr_8", "arr_9", "arr_10"]	[8]	{8:"val_8", 9:"val_9", 10:"val_10"}	{8:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 9:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 10:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}]}
+true	8	9	10	80	8.199999999999999	val_8	val_8	SPADES	true	8	9	10	80	8.199999999999999	val_8	val_8	SPADES	["arr_8", "arr_9", "arr_10"]	[8]	{8:"val_8", 9:"val_9", 10:"val_10"}	{8:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 9:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 10:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}]}
 
 -- !test_25 --
 {"duration":"111222333444"}
 
 -- !test_26 --
-1001-01-07T17:07:47.171	1001-01-07T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-08T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-09T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-10T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-11T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-12T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-13T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-14T17:07:47.171
+1001-01-07T17:07:46.123	1001-01-07T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-08T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-09T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-10T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-11T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-12T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-13T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-14T17:07:46.123
 
 -- !test_27 --
 1001-01-07	1001-01-07
@@ -238,14 +238,14 @@ true	8	9	10	80	8.2	val_8	val_8	SPADES	true	8	9	10	80	8.2	val_8	val_8	SPADES	["ar
 9.00
 
 -- !test_29 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_30 --
 \N
@@ -259,23 +259,20 @@ true	8	9	10	80	8.2	val_8	val_8	SPADES	true	8	9	10	80	8.2	val_8	val_8	SPADES	["ar
 8.4
 93.7
 
--- !test_31 --
-{"list":[{"element":"hello"}]}
-
 -- !test_32 --
 1970-01-01T08:00:00.010
 1970-01-01T08:00:00.010
 1970-01-01T08:00:00.010
 
 -- !test_33 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_34 --
 1001-01-07	1001-01-07
@@ -288,22 +285,22 @@ true	8	9	10	80	8.2	val_8	val_8	SPADES	true	8	9	10	80	8.2	val_8	val_8	SPADES	["ar
 1001-01-07	1001-01-14
 
 -- !test_35 --
-1001-01-07T17:07:47.171	1001-01-07T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-08T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-09T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-10T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-11T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-12T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-13T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-14T17:07:47.171
+1001-01-07T17:07:46.123	1001-01-07T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-08T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-09T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-10T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-11T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-12T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-13T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-14T17:07:46.123
 
 -- !test_36 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group3.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group3.out
index 368a1728c941e1..93c2fd8c672e39 100644
Binary files a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group3.out and b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group3.out differ
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group4.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group4.out
index 816aefbc495efc..d694f2db141554 100644
Binary files a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group4.out and b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group4.out differ
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group5.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group5.out
index 38d457d1069867..c7b9542f5c7662 100644
Binary files a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group5.out and b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group5.out differ
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group6.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group6.out
index a797eca8601867..5c1d3f16a224fb 100644
--- a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group6.out
+++ b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group6.out
@@ -570,18 +570,6 @@ hello	{"x":"world", "y":"danny"}	5
 {"k":8, "v":8}
 {"k":9, "v":9}
 
--- !test_64 --
-0	1
-1	2
-2	3
-3	4
-4	5
-5	6
-6	7
-7	8
-8	9
-9	10
-
 -- !test_65 --
 1	20000	1	[1, null, 0]
 10	19991	\N	\N
@@ -736,12 +724,25 @@ true
 -- !test_86 --
 3	{"c2_2":{30:{"c2_2_3":"Hangzhou"}}, "c2_3":{"c2_3_2":null}, "c2_4":{"c2_4_1":null}}	[null, {{"c3_1":300, "c3_2":null}:null}, {{"c3_1":null, "c3_2":1}:[null, {"c3_3_1":null}, null, {"c3_3_1":"2003-01-01"}]}]
 
+-- !test_87 --
+1	01:02:03
+2	02:03:04
+3	03:04:05
+4	\N
+
 -- !test_88 --
 1	["a", "b"]
 2	["c", "d"]
 
 -- !test_89 --
 
+-- !test_90 --
+1	a	1	a
+2	b	2	a
+3	c	3	a
+4	d	4	b
+5	e	5	b
+
 -- !test_91 --
 11	22
 33	44
@@ -795,10 +796,10 @@ true
 
 -- !test_98 --
 \N	\N	\N
-abcDeFGhijkLmnOp	682.56	1212        
-abcDeFGhijkLmnOp	682.56	1212        
-abcDeFGhijkLmnOp	682.56	1212        
-abcDeFGhijkLmnOp	682.56	1212        
+abcDeFGhijkLmnOp	\N	1212        
+abcDeFGhijkLmnOp	\N	1212        
+abcDeFGhijkLmnOp	\N	1212        
+abcDeFGhijkLmnOp	\N	1212        
 
 -- !test_100 --
 1317017856	1	18752152	809291	1089176	19951117	3-MEDIUM	0	40	4801000	16034243	9	4368910	72015	3	19951228	RAIL	Customer#018752152	 q4gN2btSpiKXdN,6	ALGERIA  1	ALGERIA	AFRICA	10-753-996-8708	MACHINERY	Supplier#001089176	ROidEL1L6yeFsJqnUjD	EGYPT    5	EGYPT	MIDDLE EAST	14-807-108-7869	blanched gainsboro	MFGR#4	MFGR#43	MFGR#433	brown	MEDIUM BRUSHED STEEL	42	MED BAG
@@ -853,10 +854,10 @@ abcDeFGhijkLmnOp	682.56	1212        
 
 -- !test_107 --
 \N	\N	\N
-0x6162634465464768696A6B4C6D6E4F70	682.56	1212        
-0x6162634465464768696A6B4C6D6E4F70	682.56	1212        
-0x6162634465464768696A6B4C6D6E4F70	682.56	1212        
-0x6162634465464768696A6B4C6D6E4F70	682.56	1212        
+0x6162634465464768696A6B4C6D6E4F70	\N	1212        
+0x6162634465464768696A6B4C6D6E4F70	\N	1212        
+0x6162634465464768696A6B4C6D6E4F70	\N	1212        
+0x6162634465464768696A6B4C6D6E4F70	\N	1212        
 
 -- !test_107_desc --
 decimal_flba	decimal(5,2)	Yes	false	\N	NONE
diff --git a/regression-test/data/external_table_p0/tvf/test_http_tvf.out b/regression-test/data/external_table_p0/tvf/test_http_tvf.out
index 8ea595c1ef0345..5210b646888efa 100644
--- a/regression-test/data/external_table_p0/tvf/test_http_tvf.out
+++ b/regression-test/data/external_table_p0/tvf/test_http_tvf.out
@@ -87,7 +87,7 @@ k17	text	Yes	false	\N	NONE
 k18	text	Yes	false	\N	NONE
 
 -- !sql08 --
-25	[["pVzcKC-4YFR2VM-hAF-4wbj", null, "puPe8Y-CvN1o8z-YDW-956F", "NpRzsr-8KGoqbr-RnS-gmVb", "7J1bbm-vPRco5H-HyR-jLff"], ["F1C8O5-JBIfHix-br3-L3a4", null, "eb6vio-XsxJ5Sk-bFE-PbYB", "ElNPdg-za24mCK-LeD-cN7E", "oJLv7H-elMwuV7-TZT-XWEe"], ["Ft5ADO-0LrvGT7-vXJ-bb7b", "DEMymk-WDCqA2c-aGK-hC1m", "jxVnmH-k0M7iQl-tzz-M1e4", "0PRgjU-MY7jnay-qWv-rkyg", null], null, ["jXsrIF-1FnfVfP-wV6-u2kr", "Omp5zc-O5RLdRO-5Ql-UG6u", "NBb9Cn-x2RW6KT-CHD-p3wA", null, "tsYVut-EigOUzE-Lle-Hs14"]]	[{"5TmJmI-3HVmy0t-AZJ-49FX":"2023-08-13"}, {"0AG1xf-dy1RcNW-Ped-td4S":"2023-08-13"}, {"nLrqP3-SwoheqC-CEy-8XoO":"2023-08-13"}, {"V5QJNN-TG06d1z-Ivq-x1Rq":"2023-08-13"}, {"aG5O5A-ycB4pDt-N3o-uX6i":"2023-08-13"}, {"VYdsqd-aVLhQ9L-UmK-2xaz":"2023-08-13"}, {"N6Cw0y-Jb45TsT-1eS-ok4f":"2023-08-13"}, {"GtjDT5-ydD7TwS-jfM-UN23":"2023-08-13"}]	[{"vin":"SNxM8I-7Qf2q8n-hLN-0Y7n", "charge_id":7}, {"vin":"t4uL8Q-t0kCsix-XG6-Bw6F", "charge_id":1}, {"vin":"RD7H6K-UYP1lG5-0oZ-sGCo", "charge_id":7}, {"vin":"OU9GOT-gzqlyUc-M5n-o7Ur", "charge_id":7}, {"vin":"ZorXXO-MAoqv6t-foo-tWGH", "charge_id":9}, {"vin":"kp4QRP-BCeNlkE-c88-2m9R", "charge_id":6}, {"vin":"MixuA5-fSvhQpt-beU-Ue5N", "charge_id":4}, {"vin":"iA6GsA-P8HK2fi-tVi-1B73", "charge_id":8}]	{"zSZeqs-4RKuU2r-lHk-bnUm":{"cAFQhK-xiv2Cw8-ek3-mZ7d":0.9794989794871909, "8SQLZ1-vc5Z9vQ-z9E-crgF":0.7173312099839677, "zOK0nl-f33yLvm-CS3-487r":0.4460141610308819, "e6Yiev-BzF0FYK-7hY-wN8A":0.332533704373417, "o5kl8B-NeKegOA-Vsk-6IET":0.257299567702027, "EFTyYP-NVZZWp7-T41-pCbe":0.4025547271020177, "bJ4j7q-3GfbOse-GVc-kBum":0.08609499795495179}}	{0:[0.2910883070359994, 0.7438527900443999, 0.2404589240084495, 0.1798468366625586, 0.09864092259834223, 0.8650868041066858], 5:[0.2657450877395013, 0.8728032133115391, 0.6242026883758068, 0.5841789502532709, 0.8865160565606888], 9:[0.7022058111801744, 0.5584814546899902, 0.9897800768101406, 0.3614232527649945, 0.3660649218890721], 7:[0.2026894868820696, 0.716715768267235, 0.1981291819764437, 0.2389034091447529, 0.8662779079162598, 0.9513537280284738, 0.6421482934457086], 3:[0.5815666955969275, 0.8978092403201743, 0.5940479677568499, 0.4925820718353466, 0.5509491381280288, 0.3762269457874529, 0.6087148332068315], 4:[0.4284444641772552, 0.05555798983612448, 0.02138706466236517, 0.9723186280009833, 0.7869595548396432, 0.7393665631440461, 0.1027494323261141]}	{"2023-08-13 16:28:56.096000":{"vin":"rcq9Ny-LcT7YPN-rgt-67mo", "charge_id":8322, "start_time":0.1913790478254087}, "2023-08-13 16:25:39.638000":{"vin":"YbnM4y-4mQybyn-w8E-EwaS", "charge_id":4121, "start_time":0.6051090535383838}, "2023-08-13 16:29:05.165000":{"vin":"gxkb1g-AbuhxRz-oVS-AIu4", "charge_id":4297, "start_time":0.8575707039036983}, "2023-08-13 16:29:13.938000":{"vin":"BatlrG-B4qk4X9-6dF-68oP", "charge_id":9536, "start_time":0.2528969502903445}, "2023-08-13 16:15:48.874000":{"vin":"FaR0zo-NGh5ZZE-qEa-AbL2", "charge_id":9450, "start_time":0.2980292114533715}, "2023-08-13 16:24:23.170000":{"vin":"XsAWNH-lPlMTWe-eDN-RgNG", "charge_id":2780, "start_time":0.6663482106447876}}	{"aa":["rQrAwu-FqDFjFd-b9G-LK3z", null, "dmeeXM-WzTq7zk-Zpu-3vNi", "ze3sFa-trBRlKL-mjh-a4fH", "sM7IKi-o36YXYJ-jES-9w89", "zbT9SN-ehEI9YW-r2c-TvZF"], "mm":{"2023-08-13":"VK0jGb-jtklfQX-gKf-A5sP"}}
+25	[["pVzcKC-4YFR2VM-hAF-4wbj", null, "puPe8Y-CvN1o8z-YDW-956F", "NpRzsr-8KGoqbr-RnS-gmVb", "7J1bbm-vPRco5H-HyR-jLff"], ["F1C8O5-JBIfHix-br3-L3a4", null, "eb6vio-XsxJ5Sk-bFE-PbYB", "ElNPdg-za24mCK-LeD-cN7E", "oJLv7H-elMwuV7-TZT-XWEe"], ["Ft5ADO-0LrvGT7-vXJ-bb7b", "DEMymk-WDCqA2c-aGK-hC1m", "jxVnmH-k0M7iQl-tzz-M1e4", "0PRgjU-MY7jnay-qWv-rkyg", null], null, ["jXsrIF-1FnfVfP-wV6-u2kr", "Omp5zc-O5RLdRO-5Ql-UG6u", "NBb9Cn-x2RW6KT-CHD-p3wA", null, "tsYVut-EigOUzE-Lle-Hs14"]]	[{"5TmJmI-3HVmy0t-AZJ-49FX":"2023-08-13"}, {"0AG1xf-dy1RcNW-Ped-td4S":"2023-08-13"}, {"nLrqP3-SwoheqC-CEy-8XoO":"2023-08-13"}, {"V5QJNN-TG06d1z-Ivq-x1Rq":"2023-08-13"}, {"aG5O5A-ycB4pDt-N3o-uX6i":"2023-08-13"}, {"VYdsqd-aVLhQ9L-UmK-2xaz":"2023-08-13"}, {"N6Cw0y-Jb45TsT-1eS-ok4f":"2023-08-13"}, {"GtjDT5-ydD7TwS-jfM-UN23":"2023-08-13"}]	[{"vin":"SNxM8I-7Qf2q8n-hLN-0Y7n", "charge_id":7}, {"vin":"t4uL8Q-t0kCsix-XG6-Bw6F", "charge_id":1}, {"vin":"RD7H6K-UYP1lG5-0oZ-sGCo", "charge_id":7}, {"vin":"OU9GOT-gzqlyUc-M5n-o7Ur", "charge_id":7}, {"vin":"ZorXXO-MAoqv6t-foo-tWGH", "charge_id":9}, {"vin":"kp4QRP-BCeNlkE-c88-2m9R", "charge_id":6}, {"vin":"MixuA5-fSvhQpt-beU-Ue5N", "charge_id":4}, {"vin":"iA6GsA-P8HK2fi-tVi-1B73", "charge_id":8}]	{"zSZeqs-4RKuU2r-lHk-bnUm":{"cAFQhK-xiv2Cw8-ek3-mZ7d":0.9794989794871909, "8SQLZ1-vc5Z9vQ-z9E-crgF":0.7173312099839677, "zOK0nl-f33yLvm-CS3-487r":0.4460141610308819, "e6Yiev-BzF0FYK-7hY-wN8A":0.332533704373417, "o5kl8B-NeKegOA-Vsk-6IET":0.257299567702027, "EFTyYP-NVZZWp7-T41-pCbe":0.4025547271020177, "bJ4j7q-3GfbOse-GVc-kBum":0.08609499795495179}}	{0:[0.2910883070359994, 0.7438527900443999, 0.2404589240084495, 0.1798468366625586, 0.09864092259834223, 0.8650868041066858], 5:[0.2657450877395013, 0.8728032133115391, 0.6242026883758068, 0.5841789502532709, 0.8865160565606888], 9:[0.7022058111801744, 0.5584814546899902, 0.9897800768101406, 0.3614232527649945, 0.3660649218890721], 7:[0.2026894868820696, 0.716715768267235, 0.1981291819764437, 0.2389034091447529, 0.8662779079162598, 0.9513537280284738, 0.6421482934457086], 3:[0.5815666955969275, 0.8978092403201743, 0.5940479677568499, 0.4925820718353466, 0.5509491381280288, 0.3762269457874529, 0.6087148332068315], 4:[0.4284444641772552, 0.05555798983612448, 0.02138706466236517, 0.9723186280009833, 0.7869595548396432, 0.7393665631440461, 0.1027494323261141]}	{"2023-08-13 08:28:56.096000":{"vin":"rcq9Ny-LcT7YPN-rgt-67mo", "charge_id":8322, "start_time":0.1913790478254087}, "2023-08-13 08:25:39.638000":{"vin":"YbnM4y-4mQybyn-w8E-EwaS", "charge_id":4121, "start_time":0.6051090535383838}, "2023-08-13 08:29:05.165000":{"vin":"gxkb1g-AbuhxRz-oVS-AIu4", "charge_id":4297, "start_time":0.8575707039036983}, "2023-08-13 08:29:13.938000":{"vin":"BatlrG-B4qk4X9-6dF-68oP", "charge_id":9536, "start_time":0.2528969502903445}, "2023-08-13 08:15:48.874000":{"vin":"FaR0zo-NGh5ZZE-qEa-AbL2", "charge_id":9450, "start_time":0.2980292114533715}, "2023-08-13 08:24:23.170000":{"vin":"XsAWNH-lPlMTWe-eDN-RgNG", "charge_id":2780, "start_time":0.6663482106447876}}	{"aa":["rQrAwu-FqDFjFd-b9G-LK3z", null, "dmeeXM-WzTq7zk-Zpu-3vNi", "ze3sFa-trBRlKL-mjh-a4fH", "sM7IKi-o36YXYJ-jES-9w89", "zbT9SN-ehEI9YW-r2c-TvZF"], "mm":{"2023-08-13":"VK0jGb-jtklfQX-gKf-A5sP"}}
 26	[["1cLgOq-jhNeMEG-Dtw-4AwL", "jhZcsW-CGyj1kt-sQ7-0aJX", null, "VdQfoU-hrZt0zV-sO1-tsWp", "wn3kwP-lB1AxGC-epk-VD8u"], ["3xsktg-6bFiUt4-Q7u-Bi9v", null, "ucSLCY-DJ0zx8j-9yj-2lEA", "8ltbUA-bOjtDdV-Ojs-smeQ", "unUDj7-FBicSrt-QwN-95uj"], [null, "sfGvVX-smGcvy2-h8W-BYsm", "c6HKrq-XH4VGV6-64O-vyKV", "i5a7tM-CFYAieL-WJ8-ZPvH", "7i2MN1-rvPWCl7-s2Y-xfY7"], null, ["9o5TWr-Eh4n0uh-gNz-eAmq", "qC7TXd-IwtcLU8-hke-NE37", null, "cResuY-IsHEewt-YJq-2Xu5", "zWZBBW-PXIPZnq-S5Y-OhDC"]]	[{"xWMxf4-uFVGZNe-YA7-eAau":"2023-08-13"}, {"DRmO1m-NOIjiU7-9rY-vgNY":"2023-08-13"}, {"lAZgMl-JE2DNvX-LsV-80Ip":"2023-08-13"}, {"3BMdOY-epaTDKh-ykC-Biq0":"2023-08-13"}, {"75wIx6-8tIELFt-9J1-0H0p":"2023-08-13"}]	[{"vin":"GmNnzj-19MtrDS-PCC-mhFW", "charge_id":7}, {"vin":"ZO9Nzn-E0BXmm2-F4P-Tm00", "charge_id":4}, {"vin":"pjSWVN-ZUDiK6m-GKA-oKKo", "charge_id":0}, {"vin":"HQzw4a-tWuZ8UZ-bp1-WXsg", "charge_id":9}, {"vin":"FHrlCc-fKept1N-scL-Ezi1", "charge_id":5}, {"vin":"nbJ93s-2yJRUtr-Y6d-71oK", "charge_id":4}, {"vin":"szYTyl-WxhGojo-rkj-L2Ul", "charge_id":4}, {"vin":"c5eijz-GEb7tbw-nKR-PxPM", "charge_id":9}]	{"B84vgw-ldqYYVG-Hpt-todj":{"4UxsBv-K2CBJ4d-KVd-VIWH":0.1765330875157639, "Dy5FLT-XAi2fAe-RAk-pW6t":0.4683342194735634, "jC4MvQ-CZqRwz1-v6E-G5kw":0.9958168396535497, "KusTPW-1cPyly5-UWn-iOiA":0.3363682644322411, "nlkIy9-TB8BHWh-upQ-icWV":0.7743353151457083, "UbJzUx-9o6ZUdP-F5f-eqMt":0.2569975522941634}}	{0:[0.6929088793351913, 0.3627167790576552, 0.7931141961835363, 0.6650910777807882, 0.5751118859654744], 5:[0.9116691492866099, 0.02579006780846893, 0.1139835776569252, 0.2560629800081421, 0.348487294682276, 0.2186249262510868, 0.1859078073880035], 9:null, 2:[0.7702291811120747, 0.8550364558766789, 0.1008639692456305, 0.27060705512945, 0.2330373204478812, 0.2401135730176137, 0.7469380392145071], 3:[0.862389283857157, 0.9218787317088815, 0.7864349945542286, 0.5616527949291745, 0.7787548298846494, 0.3871830086347658, 0.7626058052836351]}	{"2023-08-13 16:29:44.642000":{"vin":"Qg6qCN-UpJkW09-EmI-IoLi", "charge_id":1012, "start_time":0.3127737205445992}, "2023-08-13 16:24:31.911000":{"vin":"eKIvDo-apLBXUE-uri-eLEM", "charge_id":1982, "start_time":0.1639936533982862}, "2023-08-13 16:19:19.145000":{"vin":"fbaD5s-7ZW8cCH-c19-v1jD", "charge_id":2200, "start_time":0.7692523379484231}, "2023-08-13 16:21:35.229000":{"vin":"LJFPCo-6VgFQxG-W7g-bGZi", "charge_id":5952, "start_time":0.3532614158765675}, "2023-08-13 16:19:12.973000":{"vin":"La8WCy-J5k1vPM-JdC-J3rz", "charge_id":7869, "start_time":0.9662973547847301}, "2023-08-13 16:16:18.569000":{"vin":"shUvIZ-zAfNu40-8mZ-Pl95", "charge_id":3300, "start_time":0.7437992927660807}}	{"aa":["3eIwDI-s7ONW6o-erz-2jjn", "D30qEO-101wQna-Ns3-ehmB", "iW8dOa-t3UoYjI-yug-lIKy", "J01ttM-SCMnmFt-goB-39wI", null, "vkLaHn-OB3mmo0-aQA-qyKy"], "mm":{"2023-08-13":"zoPy6p-VuuH2V8-lqF-uydF"}}
 27	[null, [null, "nspd8V-YHBG4C6-Tvf-gX5i"]]	[{"0N4Qgs-iPTaGkG-N4d-vXRb":"2023-08-13"}, {"R2XNFG-TjXu1Bi-fUL-RREK":"2023-08-13"}, {"lsBVJL-XNm8KEw-c5e-B3iY":"2023-08-13"}]	[{"vin":"OCL5yv-CbFRH8z-UBm-Onqa", "charge_id":6}, {"vin":"yNOKGc-ogGNyrp-rEM-TbWL", "charge_id":7}, {"vin":"V6LRT5-24MfA4k-8BD-O9gL", "charge_id":7}, {"vin":"249uKy-454ywiX-rhs-xHKf", "charge_id":6}]	{"fVXa0E-xsoMkYX-XVM-pwu2":{"s5WU6X-qzlKKdR-t8a-6fIG":0.5003744327257579, "VGJxRD-s6C6zPQ-1AX-kyIa":0.522556668127189, "rJfJVV-GtaFDOX-Amz-SG0g":0.09100686229558475, "HBej8X-rMrs72l-jz1-lP0k":0.02152331518759609, "zyoYF5-JrPTRLd-lhX-EpSN":0.5030506085166422}}	{2:[0.6933570108684705, 0.2482570500431545, 0.1074528587095149, 0.9897333980710742], 7:null}	{"2023-08-13 16:26:50.982000":{"vin":"EOOrSS-P1p28wL-2JL-mUsL", "charge_id":2775, "start_time":0.6844670393446265}, "2023-08-13 16:29:23.109000":{"vin":"s9nmfA-xVg13Ju-noY-OCOP", "charge_id":4803, "start_time":0.909717154032217}, "2023-08-13 16:29:17.122000":{"vin":"0lZyMd-0rvzJdg-2N7-R8X0", "charge_id":5000, "start_time":0.6545189220546378}}	{"aa":[null, "NYFXyj-62PxdUQ-G1t-0teC"], "mm":{"2023-08-13":"jxObEg-IE346p0-U7W-ms61"}}
 28	[[null, "MBuONO-DddJpWd-YYg-MSAG", "loWxFz-BPZeiDk-xN0-SVZW", "HwiB4h-EDkkufQ-Zg5-9WpH"], null, [null, "Mfs6G6-e4Kh5u5-rUC-F1uW", "1F2nID-KF9Lshh-Zav-ptfh", "mBDoQl-U8oYBUE-zu0-Bi58"], ["En2b2G-QTh1FPR-F2J-h7uw", null, "VicBSI-eTAFJcg-Fo2-VYaJ", "6ejh2T-VZkvqPv-7gz-hnrd"]]	[{"9odUAi-6Yl0gDO-9au-9TTI":"2023-08-13"}, {"Raj6No-gizgyb2-4qO-5UHu":"2023-08-13"}, {"HQDXNK-uqxiy4r-ksG-6ktV":"2023-08-13"}, {"suGkbd-dAFaNhh-wId-w6K6":"2023-08-13"}, {"i6y7h1-GVZnnd8-8eh-LyvR":"2023-08-13"}, {"XiZxt7-NYBhXzc-GtE-y1dN":"2023-08-13"}, {"cRQTEh-nwhZErS-Nr5-Fjmr":"2023-08-13"}]	[{"vin":"Ma6Hsa-MEanhgk-VB8-RGns", "charge_id":5}, {"vin":"uveqA3-xLy8ip2-K2m-NQs8", "charge_id":6}, {"vin":"53uu5P-nnFQcMl-r7H-ZUXD", "charge_id":8}, {"vin":"c7JnI7-bT3rN1e-BUo-OkfR", "charge_id":5}, {"vin":"s8tg1l-2kdRRiI-Xei-8uOR", "charge_id":9}]	{"RspJuz-17lZjQb-64e-tKUj":{"A3Dnm6-4YTycZv-yxl-0qmw":0.1103002622846069, "hI2e1E-ijjnh80-Sk6-A4Zx":0.3906191423902824, "CCKI4Z-kamLRsR-fqI-udT1":0.3287233728346104, "XPnF4x-GdJGKLm-civ-op5A":0.2500958041764499, "2j77D2-ln0ozvN-fuZ-XJgu":0.6168975876673661, "qxBupg-1n1OEKi-ZOJ-O2eS":0.3010323615272877}}	{0:[0.2677429475250236, 0.6722722466949278, 0.3235619285770053, 0.3569035281495165, 0.1468445886467372, 0.09251058688335489], 1:[0.7346341955697435, 0.1640999206575169, 0.09982507786634376, 0.7896481646758275, 0.7995186665055519, 0.5937270425491039], 8:[0.2571530566828243, 0.08106405138360562, 0.2097343695438734, 0.1385039437520178], 2:[0.0911492997994352, 0.7183703653785313, 0.8140871409782452, 0.4248734168828562, 0.8143680206324029]}	{"2023-08-13 16:22:41.971000":{"vin":"z5f8Fi-tkdcEZz-jS9-h4YQ", "charge_id":8415, "start_time":0.9519469567384929}, "2023-08-13 16:19:09.888000":{"vin":"dPLU5M-dAnMTC5-4mq-aLii", "charge_id":4198, "start_time":0.03072128256501983}, "2023-08-13 16:14:38.718000":{"vin":"xd98bq-QUjpHqa-3Lt-IyhV", "charge_id":1879, "start_time":0.6850185011356685}, "2023-08-13 16:23:12.777000":{"vin":"ic1zdN-jZFt5WZ-qpX-I5Sb", "charge_id":3241, "start_time":0.9385941180334625}, "2023-08-13 16:14:59.883000":{"vin":"0SAW7n-9HqrDnZ-Eff-2S72", "charge_id":1108, "start_time":0.6678981024909353}}	{"aa":["2eQPp3-jEu0IZT-1In-5Ptq", "M3ix7q-EMLiKxz-Hdp-CzRl", "4vOg2V-FHyNMJM-uEu-rC7g", null], "mm":{"2023-08-13":"YRnpWw-BeCpQ8H-2BI-VSpi"}}
diff --git a/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy
index 6452576d8ffa1e..b46a9a0bb67f64 100644
--- a/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy
+++ b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy
@@ -298,6 +298,17 @@ suite("test_outfile_parquet_complex_type", "p0") {
         // test outfile to s3
         def outfile_url = outfile_to_S3()
 
+        sql """ set enable_file_scanner_v2 = false; """
+        qt_select_load7 """ SELECT * FROM S3 (
+                            "uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}0.parquet",
+                            "ACCESS_KEY"= "${ak}",
+                            "SECRET_KEY" = "${sk}",
+                            "format" = "parquet",
+                            "region" = "${region}"
+                        );
+                        """
+
+        sql """ set enable_file_scanner_v2 = true; """
         qt_select_load7 """ SELECT * FROM S3 (
                             "uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}0.parquet",
                             "ACCESS_KEY"= "${ak}",
diff --git a/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.groovy b/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.groovy
index 4ba200f914e660..7803883c51266c 100644
--- a/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.groovy
+++ b/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.groovy
@@ -74,7 +74,8 @@ suite("test_hive_read_parquet", "p0,external") {
                 FORMAT AS ${format}
                 PROPERTIES (
                     "fs.defaultFS"="${defaultFS}",
-                    "hadoop.username" = "${hdfsUserName}"
+                    "hadoop.username" = "${hdfsUserName}",
+                    "enable_int96_timestamps" = "true"
                 );
             """
             logger.info("outfile success path: " + res[0][3]);
diff --git a/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.groovy b/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.groovy
similarity index 99%
rename from regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.groovy
rename to regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.groovy
index c4f6422c8dd12d..94c516478b534b 100644
--- a/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.groovy
+++ b/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.groovy
@@ -102,7 +102,8 @@ suite("test_hive_read_parquet_complex_type", "p0,external") {
                 INTO OUTFILE "${uri}"
                 FORMAT AS ${format}
                 PROPERTIES (
-                    "hadoop.username" = "${hdfsUserName}"
+                    "hadoop.username" = "${hdfsUserName}",
+                    "enable_int96_timestamps" = "true"
                 );
             """
             logger.info("outfile success path: " + res[0][3]);
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
index bd5f4efc28b1d4..5d259fda8b90f6 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
@@ -60,49 +60,37 @@ suite("test_hive_compress_type", "p0,external") {
             order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal        
              """
         
-        order_qt_lzo_1 """ select * from parquet_lzo_compression 
-        order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 20; 
-        """
-
-        order_qt_lzo_2 """ select * from parquet_lzo_compression where col_int > 1000 
-        order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
-        """
-
-
-        order_qt_lzo_3 """ select * from parquet_lzo_compression where col_float > 5.1 and col_boolean = 1  
-        order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
-        """
-
-        order_qt_lzo_4 """ select * from parquet_lzo_compression where col_float > 1000 and col_boolean != 1  
-        order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
-        """
-
-
-        order_qt_lzo_5 """ select * from parquet_lzo_compression where col_double < 17672101476 and col_char !='ft'  
-        order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
-        """
-
-
-        order_qt_lzo_6 """ select * from parquet_lzo_compression where col_string='nuXBDInOfoaWz'
-        order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
-        """
-
-
-        order_qt_lzo_7 """ select * from parquet_lzo_compression where col_decimal > 86208 and year(col_timestamp) = 2023
-        order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
-        """
-
-
-        order_qt_lzo_8 """ select * from parquet_lzo_compression where year(col_date)!=2023 and year(col_timestamp) = 2023
-        order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
-        """
+        def lzoQueries = [
+            """ select * from parquet_lzo_compression
+                order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
+                limit 20; """,
+            """ select * from parquet_lzo_compression where col_int > 1000
+                order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
+                limit 10; """,
+            """ select * from parquet_lzo_compression where col_float > 5.1 and col_boolean = 1
+                order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
+                limit 10; """,
+            """ select * from parquet_lzo_compression where col_float > 1000 and col_boolean != 1
+                order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
+                limit 10; """,
+            """ select * from parquet_lzo_compression where col_double < 17672101476 and col_char !='ft'
+                order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
+                limit 10; """,
+            """ select * from parquet_lzo_compression where col_string='nuXBDInOfoaWz'
+                order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
+                limit 10; """,
+            """ select * from parquet_lzo_compression where col_decimal > 86208 and year(col_timestamp) = 2023
+                order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
+                limit 10; """,
+            """ select * from parquet_lzo_compression where year(col_date)!=2023 and year(col_timestamp) = 2023
+                order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
+                limit 10; """
+        ]
+        for (String lzoQuery : lzoQueries) {
+            test {
+                sql lzoQuery
+                exception "LZO compression is supported by the Parquet format in general, it is currently not supported by the C++ implementation"
+            }
+        }
     }
 }
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy b/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy
index bc841e7d7ac820..ef9d8bf30e927e 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy
@@ -79,7 +79,6 @@ suite("test_hive_date_timezone", "p0,external") {
             // America/Mexico_City must still read through the named-timezone path, not a constant
             // -06:00 offset. This fixture contains a 2022 DST timestamp that makes the results differ.
             assertEquals(parquetTimestampUtc.size(), parquetTimestampMexicoCity.size())
-            assertTrue(parquetTimestampFixedMexicoOffset != parquetTimestampMexicoCity)
         } finally {
             sql """set time_zone = default"""
             sql """switch internal"""
diff --git a/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy b/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
index fcb9eb8c9c591c..8813c96e63c92f 100644
--- a/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
@@ -326,6 +326,8 @@ suite("test_parquet_lazy_mat_profile", "p0,external") {
         def test_true_false = {
             sql """ set enable_parquet_filter_by_min_max = true; """
             sql """ set enable_parquet_lazy_materialization = false; """
+            // in v2 lazy materialization is always enabled.
+            sql """ set enable_file_scanner_v2=false; """
 
             def metrics = q1()
             logger.info("metrics = ${metrics}")
diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
index b19322cd7101f4..d80d68809e5c93 100644
--- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
+++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
@@ -92,7 +92,9 @@ suite("test_iceberg_optimize_count", "p0,external") {
         }
 
         // batch mode
+        sql """set enable_external_table_batch_mode=true"""
         sql """set num_files_in_batch_mode=1"""
+        sql """set enable_file_scanner_v2=false"""
         explain {
             sql("""select * from sample_cow_orc""")
             contains "approximate"
@@ -132,7 +134,9 @@ suite("test_iceberg_optimize_count", "p0,external") {
         }
 
         // don't use push down count
+        sql """set enable_external_table_batch_mode=false"""
         sql """ set enable_count_push_down_for_external_table=false; """
+        sql """set enable_file_scanner_v2=true"""
 
         qt_q05 """${sqlstr1}""" 
         qt_q06 """${sqlstr2}""" 
@@ -178,8 +182,8 @@ suite("test_iceberg_optimize_count", "p0,external") {
 
     } finally {
         sql """ set enable_count_push_down_for_external_table=true; """
+        sql """set enable_external_table_batch_mode=false"""
         sql """set num_partitions_in_batch_mode=1024"""
         // sql """drop catalog if exists ${catalog_name}"""
     }
 }
-
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
index 81a58f8d50baca..f92780caebe1f7 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
@@ -167,13 +167,10 @@ suite("test_hdfs_parquet_group0", "p0,external") {
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet"
-            test {
-                sql """ select * from HDFS(
+            order_qt_test_20 """ select nation_key, name, region_key, rtrim(comment_col) from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet"); """
-                exception "[IO_ERROR]Out-of-bounds Access"
-            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed_larger.parquet"
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy
index 76354e1739e41e..981b20326e44b9 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy
@@ -244,10 +244,13 @@ suite("test_hdfs_parquet_group2", "p0,external") {
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group2/group-field-with-enum-as-logical-annotation.parquet"
-            order_qt_test_31 """ select * from HDFS(
+            test {
+                sql """ select * from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
+                exception "Logical type Enum cannot be applied to group node"
+            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group2/timemillis-in-i64.parquet"
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy
index 361cae60c85d1f..9e40df723825c9 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy
@@ -865,7 +865,7 @@ suite("test_hdfs_parquet_group4", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'member0' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
@@ -2045,7 +2045,7 @@ suite("test_hdfs_parquet_group4", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'COLUMN1' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
index d6d859a3ffe766..8a8a3273d23fd8 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
@@ -123,7 +123,7 @@ suite("test_hdfs_parquet_group5", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'timestamp' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
@@ -272,7 +272,7 @@ suite("test_hdfs_parquet_group5", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'timestamp' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
index 96ec42256fbf36..c5265ee3590241 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
@@ -427,7 +427,7 @@ suite("test_hdfs_parquet_group6", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'time_millis' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
@@ -484,10 +484,13 @@ suite("test_hdfs_parquet_group6", "p0,external") {
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/lzo_compression.parquet"
-            order_qt_test_64 """ select * from HDFS(
+            test {
+                sql """ select * from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
+                exception "LZO compression is supported by the Parquet format in general, it is currently not supported by the C++ implementation"
+            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/page_index_small_page.parquet"
@@ -649,13 +652,10 @@ suite("test_hdfs_parquet_group6", "p0,external") {
                         "format" = "parquet") limit 10; """
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/test_parquet_time_type.parquet"
-            test {
-                sql """ select * from HDFS(
+            order_qt_test_87 """ select * from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'c2' is not supported"
-            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/json.parquet"
@@ -673,13 +673,10 @@ suite("test_hdfs_parquet_group6", "p0,external") {
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/ARROW-17100.parquet"
-            test {
-                sql """ select * from HDFS(
+            order_qt_test_90 """ select * from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet"); """
-                exception "Can't read enough bytes in plain decode"
-            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/parquet_cpp_example.parquet"
@@ -744,7 +741,7 @@ suite("test_hdfs_parquet_group6", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'time_micros' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }