diff --git a/be/src/core/column/column_array_view.h b/be/src/core/column/column_array_view.h index cc74d6e3c7088d..841a339a5abfca 100644 --- a/be/src/core/column/column_array_view.h +++ b/be/src/core/column/column_array_view.h @@ -120,6 +120,8 @@ struct ColumnArrayView { return false; } + bool is_nullable() const { return outer_null_map != nullptr; } + // Index-based access: uses offsets[actual - 1] (PaddedPODArray sentinel guarantees [-1] is valid) ArrayDataView operator[](size_t idx) const { size_t actual = is_const ? 0 : idx; diff --git a/be/src/exprs/function/array/function_array_index.h b/be/src/exprs/function/array/function_array_index.h index 742bb944494510..0984e803792859 100644 --- a/be/src/exprs/function/array/function_array_index.h +++ b/be/src/exprs/function/array/function_array_index.h @@ -32,6 +32,7 @@ #include "core/call_on_type_index.h" #include "core/column/column.h" #include "core/column/column_array.h" +#include "core/column/column_array_view.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_vector.h" @@ -218,121 +219,40 @@ class FunctionArrayIndex : public IFunction { } private: - ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, - const IColumn& nested_column, const IColumn& right_column, - const UInt8* right_nested_null_map, - const UInt8* outer_null_map) const { - // check array nested column type and get data - const auto& str_offs = reinterpret_cast(nested_column).get_offsets(); - const auto& str_chars = reinterpret_cast(nested_column).get_chars(); - - // check right column type and get data - const auto& right_offs = reinterpret_cast(right_column).get_offsets(); - const auto& right_chars = reinterpret_cast(right_column).get_chars(); - - // prepare return data - auto dst = PrimitiveTypeTraits::ColumnType::create(offsets.size(), 0); - auto& dst_data = dst->get_data(); - auto dst_null_column = ColumnUInt8::create(offsets.size(), 0); - auto& dst_null_data = dst_null_column->get_data(); - - // process - for (size_t row = 0; row < offsets.size(); ++row) { - if (outer_null_map && outer_null_map[row]) { - dst_null_data[row] = true; - continue; - } - dst_null_data[row] = false; - typename PrimitiveTypeTraits::CppType res = 0; - size_t off = offsets[row - 1]; - size_t len = offsets[row] - off; - - size_t right_off = right_offs[row - 1]; - size_t right_len = right_offs[row] - right_off; - for (size_t pos = 0; pos < len; ++pos) { - // match null value - if (right_nested_null_map && right_nested_null_map[row] && nested_null_map && - nested_null_map[pos + off]) { - ConcreteAction::apply(res, pos); - if constexpr (!ConcreteAction::resume_execution) { - break; - } - } - // some is null while another is not - if (right_nested_null_map && nested_null_map && - right_nested_null_map[row] != nested_null_map[pos + off]) { - continue; - } - if (nested_null_map && nested_null_map[pos + off]) { - continue; - } - size_t str_pos = str_offs[pos + off - 1]; - size_t str_len = str_offs[pos + off] - str_pos; - const char* left_raw_v = reinterpret_cast(&str_chars[str_pos]); - const char* right_raw_v = reinterpret_cast(&right_chars[right_off]); - // StringRef operator == using vec impl - if (StringRef(left_raw_v, str_len) == StringRef(right_raw_v, right_len)) { - ConcreteAction::apply(res, pos); - if constexpr (!ConcreteAction::resume_execution) { - break; - } - } - } - dst_data[row] = res; - } - - if (outer_null_map == nullptr) { - return dst; - } - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); - } - - template - ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, - const IColumn& nested_column, const IColumn& right_column, - const UInt8* right_nested_null_map, - const UInt8* outer_null_map) const { - // check array nested column type and get data - const auto& nested_data = - reinterpret_cast(nested_column).get_data(); - - // check right column type and get data - const auto& right_data = reinterpret_cast(right_column).get_data(); - + template + ColumnPtr _execute_view(const ColumnArrayView& array_view, + const ColumnView& right_view) const { // prepare return data - auto dst = PrimitiveTypeTraits::ColumnType::create(offsets.size(), 0); + auto dst = PrimitiveTypeTraits::ColumnType::create(array_view.size(), 0); auto& dst_data = dst->get_data(); - auto dst_null_column = ColumnUInt8::create(offsets.size(), 0); + auto dst_null_column = ColumnUInt8::create(array_view.size(), 0); auto& dst_null_data = dst_null_column->get_data(); // process - for (size_t row = 0; row < offsets.size(); ++row) { - if (outer_null_map && outer_null_map[row]) { + for (size_t row = 0; row < array_view.size(); ++row) { + if (array_view.is_null_at(row)) { dst_null_data[row] = true; continue; } dst_null_data[row] = false; typename PrimitiveTypeTraits::CppType res = 0; - size_t off = offsets[row - 1]; - size_t len = offsets[row] - off; - for (size_t pos = 0; pos < len; ++pos) { + const auto array_data = array_view[row]; + for (size_t pos = 0; pos < array_data.size(); ++pos) { // match null value - if (right_nested_null_map && right_nested_null_map[row] && nested_null_map && - nested_null_map[pos + off]) { + if (right_view.is_null_at(row) && array_data.is_null_at(pos)) { ConcreteAction::apply(res, pos); if constexpr (!ConcreteAction::resume_execution) { break; } } // some is null while another is not - if (right_nested_null_map && nested_null_map && - right_nested_null_map[row] != nested_null_map[pos + off]) { + if (right_view.is_null_at(row) != array_data.is_null_at(pos)) { continue; } - if (nested_null_map && nested_null_map[pos + off]) { + if (array_data.is_null_at(pos)) { continue; } - if (nested_data[pos + off] == right_data[row]) { + if (array_data.value_at(pos) == right_view.value_at(row)) { ConcreteAction::apply(res, pos); if constexpr (!ConcreteAction::resume_execution) { break; @@ -342,89 +262,40 @@ class FunctionArrayIndex : public IFunction { dst_data[row] = res; } - if (outer_null_map == nullptr) { + if (!array_view.is_nullable()) { return dst; } return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); } - template - ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, - const UInt8* nested_null_map, const IColumn& nested_column, - const IColumn& right_column, - const UInt8* right_nested_null_map, - const UInt8* outer_null_map) const { - if (is_column(right_column)) { - return _execute_number( - offsets, nested_null_map, nested_column, right_column, right_nested_null_map, - outer_null_map); - } - return nullptr; - } - Status _execute_dispatch(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { - // extract array offsets and nested data - auto left_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); if (block.get_by_position(arguments[0]).type->get_primitive_type() != TYPE_ARRAY) { return Status::InvalidArgument(get_name() + " first argument must be array, but got " + block.get_by_position(arguments[0]).type->get_name()); } - const ColumnArray* array_column = nullptr; - const UInt8* array_null_map = nullptr; - if (const auto* nullable_array = check_and_get_column(left_column.get())) { - array_column = - reinterpret_cast(&nullable_array->get_nested_column()); - array_null_map = nullable_array->get_null_map_column().get_data().data(); - } else { - array_column = reinterpret_cast(left_column.get()); - } - const auto& offsets = array_column->get_offsets(); - const UInt8* nested_null_map = nullptr; - ColumnPtr nested_column = nullptr; - if (const auto* nested_null_column = - check_and_get_column(&array_column->get_data())) { - nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - nested_column = nested_null_column->get_nested_column_ptr(); - } else { - nested_column = array_column->get_data_ptr(); - } - - // get right column - ColumnPtr right_full_column = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - ColumnPtr right_column = right_full_column; - const UInt8* right_nested_null_map = nullptr; - if (const auto* nested_null_column = - check_and_get_column(right_column.get())) { - right_column = nested_null_column->get_nested_column_ptr(); - right_nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - } // execute auto array_type = remove_nullable(block.get_by_position(arguments[0]).type); auto left_element_type = remove_nullable( assert_cast(array_type.get())->get_nested_type()); auto right_type = remove_nullable(block.get_by_position(arguments[1]).type); + auto left_element_primitive_type = left_element_type->get_primitive_type(); + auto right_primitive_type = right_type->get_primitive_type(); ColumnPtr return_column = nullptr; - if (is_string_type(right_type->get_primitive_type()) && - is_string_type(left_element_type->get_primitive_type())) { - return_column = _execute_string(offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - } else if (right_type->get_primitive_type() == left_element_type->get_primitive_type()) { + if (right_primitive_type == left_element_primitive_type || + (is_string_type(right_primitive_type) && is_string_type(left_element_primitive_type))) { auto call = [&](const auto& type) -> bool { using DispatchType = std::decay_t; - auto col = _execute_number_expanded( - offsets, nested_null_map, *nested_column, *right_column, - right_nested_null_map, array_null_map); - if (col) { - return_column = std::move(col); - return true; - } - return false; + constexpr PrimitiveType PType = DispatchType::PType; + auto array_view = + ColumnArrayView::create(block.get_by_position(arguments[0]).column); + auto right_view = + ColumnView::create(block.get_by_position(arguments[1]).column); + return_column = _execute_view(array_view, right_view); + return true; }; - dispatch_switch_scalar(right_type->get_primitive_type(), call); + dispatch_switch_all(left_element_primitive_type, call); } if (return_column) { diff --git a/be/src/exprs/function/array/function_array_remove.h b/be/src/exprs/function/array/function_array_remove.h index fe05b64d0b2222..5f471da86e9947 100644 --- a/be/src/exprs/function/array/function_array_remove.h +++ b/be/src/exprs/function/array/function_array_remove.h @@ -19,12 +19,9 @@ #include #include -#include #include #include -#include -#include #include #include @@ -37,6 +34,7 @@ #include "core/call_on_type_index.h" #include "core/column/column.h" #include "core/column/column_array.h" +#include "core/column/column_array_view.h" #include "core/column/column_decimal.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" @@ -92,207 +90,30 @@ class FunctionArrayRemove : public IFunction { } private: - template - ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, - const IColumn& right_column, const UInt8* nested_null_map, - const UInt8* right_nested_null_map, - const ColumnUInt8* array_null_map) const { - // check array nested column type and get data - const auto& src_data = reinterpret_cast(nested_column).get_data(); - - // check target column type and get data - const auto& target_data = reinterpret_cast(right_column).get_data(); - - PaddedPODArray* dst_null_map = nullptr; - MutableColumnPtr array_nested_column = nullptr; - IColumn* dst_column = nullptr; - if (nested_null_map) { - auto dst_nested_column = - ColumnNullable::create(nested_column.clone_empty(), ColumnUInt8::create()); - array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column->get_nested_column_ptr().get(); - dst_null_map = &dst_nested_column->get_null_map_data(); - dst_null_map->reserve(offsets.back()); - } else { - auto dst_nested_column = nested_column.clone_empty(); - array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column.get(); - } - - auto& dst_data = reinterpret_cast(*dst_column).get_data(); - dst_data.reserve(offsets.back()); - - auto dst_offsets_column = ColumnArray::ColumnOffsets::create(); - auto& dst_offsets = dst_offsets_column->get_data(); - dst_offsets.reserve(offsets.size()); - - size_t cur = 0; - for (size_t row = 0; row < offsets.size(); ++row) { - size_t off = offsets[row - 1]; - size_t len = offsets[row] - off; - - if (len == 0) { - // case: array:[], target:1 ==> [] - dst_offsets.push_back(cur); - continue; - } - - size_t cur_count = 0; - for (size_t pos = 0; pos < len; ++pos) { - // left is null, right is null - if (nested_null_map && nested_null_map[off + pos] && right_nested_null_map && - right_nested_null_map[row]) { - continue; - } - - // left is null, right is not null - if (nested_null_map && nested_null_map[off + pos]) { - // case: array:[Null], target:1 ==> [Null] - dst_data.push_back(typename NestedColumnType::value_type()); - ++cur_count; - dst_null_map->push_back(1); - continue; - } - - // left is not null, right is null - if (right_nested_null_map && right_nested_null_map[row]) { - dst_data.push_back(src_data[off + pos]); - ++cur_count; - if (nested_null_map) { - dst_null_map->push_back(0); - } - continue; - } - - // left is not null, right is not null - if (src_data[off + pos] == target_data[row]) { - continue; - } else { - dst_data.push_back(src_data[off + pos]); - ++cur_count; - if (nested_null_map) { - dst_null_map->push_back(0); - } - } - } - - cur += cur_count; - dst_offsets.push_back(cur); - } - - auto dst = - ColumnArray::create(std::move(array_nested_column), std::move(dst_offsets_column)); - if (array_null_map) { - auto dst_null_column = ColumnUInt8::create(); - dst_null_column->insert_range_from(*array_null_map, 0, offsets.size()); - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); - } else { - return dst; - } - } - - ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, - const IColumn& right_column, const UInt8* nested_null_map, - const UInt8* right_nested_null_map, - const ColumnUInt8* array_null_map) const { - // check array nested column type and get data - const auto& src_offs = reinterpret_cast(nested_column).get_offsets(); - const auto& src_chars = reinterpret_cast(nested_column).get_chars(); - - // check right column type and get data - const auto& target_offs = reinterpret_cast(right_column).get_offsets(); - const auto& target_chars = reinterpret_cast(right_column).get_chars(); - - PaddedPODArray* dst_null_map = nullptr; - MutableColumnPtr array_nested_column = nullptr; - IColumn* dst_column = nullptr; - if (nested_null_map) { - auto dst_nested_column = - ColumnNullable::create(nested_column.clone_empty(), ColumnUInt8::create()); - array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column->get_nested_column_ptr().get(); - dst_null_map = &dst_nested_column->get_null_map_data(); - dst_null_map->reserve(offsets.back()); - } else { - auto dst_nested_column = nested_column.clone_empty(); - array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column.get(); - } - - auto& dst_offs = reinterpret_cast(*dst_column).get_offsets(); - auto& dst_chars = reinterpret_cast(*dst_column).get_chars(); - dst_offs.reserve(src_offs.size()); - dst_chars.reserve(src_offs.back()); + template + ColumnPtr _execute_view(const IColumn& array_data_column, + const ColumnArrayView& array_view, + const ColumnView& right_view) const { + auto array_nested_column = array_data_column.clone_empty(); + array_nested_column->reserve(array_data_column.size()); auto dst_offsets_column = ColumnArray::ColumnOffsets::create(); auto& dst_offsets = dst_offsets_column->get_data(); - dst_offsets.reserve(offsets.size()); + dst_offsets.reserve(array_view.size()); size_t cur = 0; - for (size_t row = 0; row < offsets.size(); ++row) { - size_t off = offsets[row - 1]; - size_t len = offsets[row] - off; - - if (len == 0) { - // case: array:[], target:'str' ==> [] - dst_offsets.push_back(cur); - continue; - } - - size_t target_off = target_offs[row - 1]; - size_t target_len = target_offs[row] - target_off; - + for (size_t row = 0; row < array_view.size(); ++row) { + const auto array_data = array_view[row]; size_t cur_count = 0; - for (size_t pos = 0; pos < len; ++pos) { - // left is null, right is null - if (nested_null_map && nested_null_map[off + pos] && right_nested_null_map && - right_nested_null_map[row]) { - continue; - } - - // left is null, right is not null - if (nested_null_map && nested_null_map[off + pos]) { - // case: array:[Null], target:'str' ==> [Null] - // dst_chars.push_back(0); - dst_offs.push_back(dst_offs.back()); - ++cur_count; - dst_null_map->push_back(1); + for (size_t pos = 0; pos < array_data.size(); ++pos) { + // Keep null values unless the remove target is also null. + if (array_data.is_null_at(pos) && right_view.is_null_at(row)) { continue; } - size_t src_pos = src_offs[pos + off - 1]; - size_t src_len = src_offs[pos + off] - src_pos; - - // left is not null, right is null - if (right_nested_null_map && right_nested_null_map[row]) { - const size_t old_size = dst_chars.size(); - const size_t new_size = old_size + src_len; - dst_chars.resize(new_size); - memcpy(&dst_chars[old_size], &src_chars[src_pos], src_len); - dst_offs.push_back(new_size); - ++cur_count; - if (nested_null_map) { - dst_null_map->push_back(0); - } - continue; - } - - // left is not null, right is not null - const char* src_raw_v = reinterpret_cast(&src_chars[src_pos]); - const char* target_raw_v = reinterpret_cast(&target_chars[target_off]); - - if (std::string_view(src_raw_v, src_len) == - std::string_view(target_raw_v, target_len)) { - continue; - } else { - const size_t old_size = dst_chars.size(); - const size_t new_size = old_size + src_len; - dst_chars.resize(new_size); - memcpy(&dst_chars[old_size], &src_chars[src_pos], src_len); - dst_offs.push_back(new_size); + if (array_data.is_null_at(pos) || right_view.is_null_at(row) || + !(array_data.value_at(pos) == right_view.value_at(row))) { + array_nested_column->insert_from(array_data_column, array_data.offset + pos); ++cur_count; - if (nested_null_map) { - dst_null_map->push_back(0); - } } } @@ -302,116 +123,53 @@ class FunctionArrayRemove : public IFunction { auto dst = ColumnArray::create(std::move(array_nested_column), std::move(dst_offsets_column)); - if (array_null_map) { - auto dst_null_column = ColumnUInt8::create(); - dst_null_column->insert_range_from(*array_null_map, 0, offsets.size()); - return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); - } else { + if (!array_view.is_nullable()) { return dst; } - } - template - ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, - const IColumn& nested_column, const IColumn& right_column, - const UInt8* nested_null_map, - const UInt8* right_nested_null_map, - const ColumnUInt8* array_null_map) const { - if (is_column(right_column)) { - return _execute_number( - offsets, nested_column, right_column, nested_null_map, right_nested_null_map, - array_null_map); + auto dst_null_column = ColumnUInt8::create(array_view.size(), 0); + auto& dst_null_map = dst_null_column->get_data(); + for (size_t row = 0; row < array_view.size(); ++row) { + dst_null_map[row] = array_view.is_null_at(row); } - return nullptr; + return ColumnNullable::create(std::move(dst), std::move(dst_null_column)); } ColumnPtr _execute_dispatch(const ColumnsWithTypeAndName& arguments, size_t input_rows_count) const { // check array nested column type and get data - auto left_column = arguments[0].column->convert_to_full_column_if_const(); + const auto& [left_column, is_const] = unpack_if_const(arguments[0].column); const ColumnArray* array_column = nullptr; - const ColumnUInt8* array_null_map = nullptr; if (const auto* nullable_array = check_and_get_column(left_column.get())) { array_column = reinterpret_cast(&nullable_array->get_nested_column()); - array_null_map = &nullable_array->get_null_map_column(); nullable_array->sanity_check(); } else { array_column = reinterpret_cast(left_column.get()); } - const auto& offsets = array_column->get_offsets(); - const UInt8* nested_null_map = nullptr; - ColumnPtr nested_column = nullptr; - if (const auto* nested_null_column = - check_and_get_column(&array_column->get_data())) { - nested_null_column->sanity_check(); - nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - nested_column = nested_null_column->get_nested_column_ptr(); - } else { - nested_column = array_column->get_data_ptr(); - } - - // get right column - ColumnPtr right_full_column = arguments[1].column->convert_to_full_column_if_const(); - ColumnPtr right_column = right_full_column; - const UInt8* right_nested_null_map = nullptr; - if (const auto* nested_null_column = - check_and_get_column(right_column.get())) { - right_column = nested_null_column->get_nested_column_ptr(); - right_nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - } + DCHECK(is_const ? array_column->get_offsets().size() == 1 + : array_column->get_offsets().size() == input_rows_count); // execute auto array_type = remove_nullable(arguments[0].type); auto left_element_type = remove_nullable( assert_cast(array_type.get())->get_nested_type()); auto right_type = remove_nullable(arguments[1].type); + auto left_element_primitive_type = left_element_type->get_primitive_type(); + auto right_primitive_type = right_type->get_primitive_type(); ColumnPtr res = nullptr; - if (is_string_type(right_type->get_primitive_type()) && - is_string_type(left_element_type->get_primitive_type())) { - res = _execute_string(offsets, *nested_column, *right_column, nested_null_map, - right_nested_null_map, array_null_map); - } else if (is_number(right_type->get_primitive_type()) && - is_number(left_element_type->get_primitive_type())) { + if (right_primitive_type == left_element_primitive_type || + (is_string_type(right_primitive_type) && is_string_type(left_element_primitive_type))) { auto call = [&](const auto& type) -> bool { using DispatchType = std::decay_t; - res = _execute_number_expanded( - offsets, *nested_column, *right_column, nested_null_map, - right_nested_null_map, array_null_map); + constexpr PrimitiveType PType = DispatchType::PType; + auto array_view = ColumnArrayView::create(arguments[0].column); + auto right_view = ColumnView::create(arguments[1].column); + res = _execute_view(array_column->get_data(), array_view, right_view); return true; }; - if (!dispatch_switch_number(left_element_type->get_primitive_type(), call)) { - throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, - "not support left type " + left_element_type->get_name()); - } - } else if (is_date_v2_or_datetime_v2(right_type->get_primitive_type()) && - is_date_v2_or_datetime_v2(left_element_type->get_primitive_type())) { - if (left_element_type->get_primitive_type() == PrimitiveType::TYPE_DATEV2) { - res = _execute_number_expanded(offsets, *nested_column, *right_column, - nested_null_map, right_nested_null_map, - array_null_map); - } else if (left_element_type->get_primitive_type() == PrimitiveType::TYPE_DATETIMEV2) { - res = _execute_number_expanded( - offsets, *nested_column, *right_column, nested_null_map, - right_nested_null_map, array_null_map); - } - } else if (is_timestamptz_type(right_type->get_primitive_type()) && - is_timestamptz_type(left_element_type->get_primitive_type())) { - res = _execute_number_expanded( - offsets, *nested_column, *right_column, nested_null_map, right_nested_null_map, - array_null_map); - } else if (is_ip(right_type->get_primitive_type()) && - is_ip(left_element_type->get_primitive_type())) { - if (left_element_type->get_primitive_type() == TYPE_IPV4) { - res = _execute_number_expanded(offsets, *nested_column, *right_column, - nested_null_map, right_nested_null_map, - array_null_map); - } else if (left_element_type->get_primitive_type() == TYPE_IPV6) { - res = _execute_number_expanded(offsets, *nested_column, *right_column, - nested_null_map, right_nested_null_map, - array_null_map); - } + dispatch_switch_all(left_element_primitive_type, call); } return res; } diff --git a/be/src/exprs/function/array/function_arrays_overlap.h b/be/src/exprs/function/array/function_arrays_overlap.h index d71643945ca796..c515c08a1ff50f 100644 --- a/be/src/exprs/function/array/function_arrays_overlap.h +++ b/be/src/exprs/function/array/function_arrays_overlap.h @@ -31,6 +31,7 @@ #include "core/block/column_numbers.h" #include "core/block/column_with_type_and_name.h" #include "core/column/column.h" +#include "core/column/column_array_view.h" #include "core/column/column_nullable.h" #include "core/column/column_vector.h" #include "core/data_type/data_type.h" @@ -57,28 +58,28 @@ template class ColumnStr; using ColumnString = ColumnStr; -template +template struct OverlapSetImpl { - using ElementNativeType = typename NativeType::Type; + using ArrayView = ArrayDataView; + using ElementNativeType = + typename NativeType::ColumnType::value_type>::Type; using Set = phmap::flat_hash_set>; Set set; bool has_null = false; - void insert_array(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - const auto& vec = assert_cast(*column).get_data(); - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + void insert_array(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { has_null = true; continue; } - set.insert(vec[i]); + set.insert(array.value_at(i)); } } - bool find_any(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - const auto& vec = assert_cast(*column).get_data(); - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + bool find_any(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { if (has_null) { return true; } else { @@ -86,7 +87,7 @@ struct OverlapSetImpl { } } - if (set.contains(vec[i])) { + if (set.contains(array.value_at(i))) { return true; } } @@ -95,27 +96,26 @@ struct OverlapSetImpl { }; template <> -struct OverlapSetImpl { +struct OverlapSetImpl { + using ArrayView = ArrayDataView; using ElementNativeType = Int128; using Set = phmap::flat_hash_set>; Set set; bool has_null = false; - void insert_array(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - const auto& vec = assert_cast(*column).get_data(); - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + void insert_array(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { has_null = true; continue; } - set.insert(vec[i].value()); + set.insert(array.value_at(i).value()); } } - bool find_any(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - const auto& vec = assert_cast(*column).get_data(); - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + bool find_any(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { if (has_null) { return true; } else { @@ -123,7 +123,7 @@ struct OverlapSetImpl { } } - if (set.contains(vec[i].value())) { + if (set.contains(array.value_at(i).value())) { return true; } } @@ -132,24 +132,25 @@ struct OverlapSetImpl { }; template <> -struct OverlapSetImpl { +struct OverlapSetImpl { + using ArrayView = ArrayDataView; using Set = phmap::flat_hash_set>; Set set; bool has_null = false; - void insert_array(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + void insert_array(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { has_null = true; continue; } - set.insert(column->get_data_at(i)); + set.insert(array.value_at(i)); } } - bool find_any(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { - for (size_t i = start; i < start + size; ++i) { - if (nullmap[i]) { + bool find_any(const ArrayView& array) { + for (size_t i = 0; i < array.size(); ++i) { + if (array.is_null_at(i)) { if (has_null) { return true; } else { @@ -157,7 +158,7 @@ struct OverlapSetImpl { } } - if (set.contains(column->get_data_at(i))) { + if (set.contains(array.value_at(i))) { return true; } } @@ -279,31 +280,15 @@ class FunctionArraysOverlap : public IFunction { "with rows: {}", get_name(), req_id, input_rows_count); }); - auto left_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto right_column = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - ColumnArrayExecutionData left_exec_data; - ColumnArrayExecutionData right_exec_data; - Status ret = Status::InvalidArgument( "execute failed, unsupported types for function {}({}, {})", get_name(), block.get_by_position(arguments[0]).type->get_name(), block.get_by_position(arguments[1]).type->get_name()); - - // extract array column - if (!extract_column_array_info(*left_column, left_exec_data) || - !extract_column_array_info(*right_column, right_exec_data)) { - return ret; - } // prepare return column auto dst_nested_col = ColumnUInt8::create(input_rows_count, 0); auto dst_null_map = ColumnUInt8::create(input_rows_count, 0); UInt8* dst_null_map_data = dst_null_map->get_data().data(); - RETURN_IF_ERROR(_execute_nullable(left_exec_data, dst_null_map_data)); - RETURN_IF_ERROR(_execute_nullable(right_exec_data, dst_null_map_data)); - // execute overlap check auto array_type = remove_nullable(block.get_by_position(arguments[0]).type); auto left_element_type = remove_nullable( @@ -311,12 +296,15 @@ class FunctionArraysOverlap : public IFunction { auto call = [&](const auto& type) -> bool { using DispatchType = std::decay_t; - ret = _execute_internal( - left_exec_data, right_exec_data, dst_null_map_data, - dst_nested_col->get_data().data()); + constexpr PrimitiveType PType = DispatchType::PType; + auto left_view = + ColumnArrayView::create(block.get_by_position(arguments[0]).column); + auto right_view = + ColumnArrayView::create(block.get_by_position(arguments[1]).column); + ret = _execute_internal(left_view, right_view, dst_null_map_data, + dst_nested_col->get_data().data()); return true; }; - if (!dispatch_switch_all(left_element_type->get_primitive_type(), call)) { ret = Status::InvalidArgument("execute failed, not support type {} in function {}", left_element_type->get_name(), get_name()); @@ -331,13 +319,14 @@ class FunctionArraysOverlap : public IFunction { } private: - static Status _execute_nullable(const ColumnArrayExecutionData& data, UInt8* dst_nullmap_data) { - for (ssize_t row = 0; row < data.offsets_ptr->size(); ++row) { + template + static Status _execute_nullable(const ColumnArrayView& data, UInt8* dst_nullmap_data) { + for (ssize_t row = 0; row < data.size(); ++row) { if (dst_nullmap_data[row]) { continue; } - if (data.array_nullmap_data && data.array_nullmap_data[row]) { + if (data.is_null_at(row)) { dst_nullmap_data[row] = 1; continue; } @@ -345,44 +334,34 @@ class FunctionArraysOverlap : public IFunction { return Status::OK(); } - template - Status _execute_internal(const ColumnArrayExecutionData& left_data, - const ColumnArrayExecutionData& right_data, UInt8* dst_nullmap_data, + template + Status _execute_internal(const ColumnArrayView& left_data, + const ColumnArrayView& right_data, UInt8* dst_nullmap_data, UInt8* dst_data) const { - using ExecutorImpl = OverlapSetImpl; - for (ssize_t row = 0; row < left_data.offsets_ptr->size(); ++row) { + using ExecutorImpl = OverlapSetImpl; + RETURN_IF_ERROR(_execute_nullable(left_data, dst_nullmap_data)); + RETURN_IF_ERROR(_execute_nullable(right_data, dst_nullmap_data)); + for (ssize_t row = 0; row < left_data.size(); ++row) { // arrays_overlap(null, null) -> null if (dst_nullmap_data[row]) { continue; } dst_nullmap_data[row] = 0; - ssize_t left_start = (*left_data.offsets_ptr)[row - 1]; - ssize_t left_size = (*left_data.offsets_ptr)[row] - left_start; - ssize_t right_start = (*right_data.offsets_ptr)[row - 1]; - ssize_t right_size = (*right_data.offsets_ptr)[row] - right_start; - if (left_size == 0 || right_size == 0) { + const auto left_array = left_data[row]; + const auto right_array = right_data[row]; + if (left_array.size() == 0 || right_array.size() == 0) { dst_data[row] = 0; continue; } - const auto* small_data = &left_data; - const auto* large_data = &right_data; - - ssize_t small_start = left_start; - ssize_t large_start = right_start; - ssize_t small_size = left_size; - ssize_t large_size = right_size; - if (right_size < left_size) { - std::swap(small_data, large_data); - std::swap(small_start, large_start); - std::swap(small_size, large_size); - } + const auto& small_data = + right_array.size() < left_array.size() ? right_array : left_array; + const auto& large_data = + right_array.size() < left_array.size() ? left_array : right_array; ExecutorImpl impl; - impl.insert_array(small_data->nested_col.get(), small_data->nested_nullmap_data, - small_start, small_size); - dst_data[row] = impl.find_any(large_data->nested_col.get(), - large_data->nested_nullmap_data, large_start, large_size); + impl.insert_array(small_data); + dst_data[row] = impl.find_any(large_data); } return Status::OK(); } diff --git a/be/test/exprs/function/function_array_index_test.cpp b/be/test/exprs/function/function_array_index_test.cpp index 806c1b3a727e13..a39570c58ae7b0 100644 --- a/be/test/exprs/function/function_array_index_test.cpp +++ b/be/test/exprs/function/function_array_index_test.cpp @@ -221,4 +221,29 @@ TEST(function_array_index_test, array_position) { } } +TEST(function_array_index_test, const_arguments) { + TestArray vec = {Int32(1), Null(), Int32(3), Int32(1)}; + + { + InputTypeSet input_types = {Consted {PrimitiveType::TYPE_ARRAY}, PrimitiveType::TYPE_INT, + PrimitiveType::TYPE_INT}; + DataSet data_set = {{{vec, Int32(1)}, UInt8(1)}}; + static_cast( + check_function("array_contains", input_types, data_set)); + } + { + InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_INT, + Consted {PrimitiveType::TYPE_INT}}; + DataSet data_set = {{{vec, Null()}, Int64(2)}}; + static_cast( + check_function("array_position", input_types, data_set)); + } + { + InputTypeSet input_types = {Consted {PrimitiveType::TYPE_ARRAY}, PrimitiveType::TYPE_INT, + Consted {PrimitiveType::TYPE_INT}}; + DataSet data_set = {{{vec, Int32(1)}, Int64(2)}}; + static_cast(check_function("countequal", input_types, data_set)); + } +} + } // namespace doris diff --git a/be/test/exprs/function/function_array_remove_test.cpp b/be/test/exprs/function/function_array_remove_test.cpp new file mode 100644 index 00000000000000..772a93943df529 --- /dev/null +++ b/be/test/exprs/function/function_array_remove_test.cpp @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "core/block/block.h" +#include "core/column/column_const.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_nullable.h" +#include "exprs/function/function_test_util.h" +#include "exprs/function/simple_function_factory.h" + +namespace doris { + +static void check_array_remove_case(DataTypePtr array_type, DataTypePtr element_type, + const TestArray& array, const AnyType& target, + const TestArray& expected, bool const_array, + bool const_target) { + MutableColumnPtr array_column = array_type->create_column(); + ASSERT_TRUE(insert_cell(array_column, array_type, array)); + MutableColumnPtr target_column = element_type->create_column(); + ASSERT_TRUE(insert_cell(target_column, element_type, target)); + + constexpr size_t row_size = 1; + if (const_array) { + array_column = ColumnConst::create(std::move(array_column), row_size); + } + if (const_target) { + target_column = ColumnConst::create(std::move(target_column), row_size); + } + + Block block; + block.insert({std::move(array_column), array_type, "array"}); + block.insert({std::move(target_column), element_type, "target"}); + + DataTypePtr return_type = array_type; + FunctionBasePtr func = SimpleFunctionFactory::instance().get_function( + "array_remove", block.get_columns_with_type_and_name(), return_type); + ASSERT_NE(func, nullptr); + + ColumnNumbers arguments = {0, 1}; + std::vector arg_types = {array_type, element_type}; + std::vector> constant_cols = {nullptr, nullptr}; + if (const_array) { + constant_cols[0] = std::make_shared(block.get_by_position(0).column); + } + if (const_target) { + constant_cols[1] = std::make_shared(block.get_by_position(1).column); + } + + FunctionUtils fn_utils(return_type, arg_types, false); + auto* fn_ctx = fn_utils.get_fn_ctx(); + fn_ctx->set_constant_cols(constant_cols); + ASSERT_TRUE(func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL).ok()); + ASSERT_TRUE(func->open(fn_ctx, FunctionContext::THREAD_LOCAL).ok()); + + block.insert({nullptr, return_type, "result"}); + auto result_idx = block.columns() - 1; + ASSERT_TRUE(func->execute(fn_ctx, block, arguments, result_idx, row_size).ok()); + static_cast(func->close(fn_ctx, FunctionContext::THREAD_LOCAL)); + static_cast(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + + MutableColumnPtr expected_column = return_type->create_column(); + ASSERT_TRUE(insert_cell(expected_column, return_type, expected)); + Field result_value; + block.get_by_position(result_idx).column->get(0, result_value); + Field expected_value; + expected_column->get(0, expected_value); + EXPECT_EQ(result_value, expected_value) + << "result: " << return_type->to_string(*block.get_by_position(result_idx).column, 0) + << ", expected: " << return_type->to_string(*expected_column, 0); +} + +TEST(function_array_remove_test, const_arguments) { + { + auto element_type = make_nullable(std::make_shared()); + auto array_type = std::make_shared(element_type); + TestArray array = {Int32(1), Null(), Int32(2), Int32(1)}; + TestArray expected = {Null(), Int32(2)}; + check_array_remove_case(array_type, element_type, array, Int32(1), expected, true, false); + } + { + auto element_type = make_nullable(std::make_shared()); + auto array_type = std::make_shared(element_type); + TestArray array = {std::string("abc"), Null(), std::string("def"), std::string("abc")}; + TestArray expected = {Null(), std::string("def")}; + check_array_remove_case(array_type, element_type, array, std::string("abc"), expected, + false, true); + } + { + auto element_type = make_nullable(std::make_shared()); + auto array_type = std::make_shared(element_type); + TestArray array = {Int32(1), Null(), Int32(2), Null()}; + TestArray expected = {Int32(1), Int32(2)}; + check_array_remove_case(array_type, element_type, array, Null(), expected, true, true); + } +} + +} // namespace doris diff --git a/be/test/exprs/function/function_arrays_overlap_test.cpp b/be/test/exprs/function/function_arrays_overlap_test.cpp index e3e3599b2eb840..9aa0dd1dd730fe 100644 --- a/be/test/exprs/function/function_arrays_overlap_test.cpp +++ b/be/test/exprs/function/function_arrays_overlap_test.cpp @@ -152,4 +152,26 @@ TEST(function_arrays_overlap_test, arrays_overlap) { } } +TEST(function_arrays_overlap_test, const_arguments) { + { + InputTypeSet input_types = {Consted {PrimitiveType::TYPE_ARRAY}, PrimitiveType::TYPE_INT, + PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_INT}; + TestArray left = {Int32(1), Null(), Int32(3)}; + TestArray right = {Int32(4), Null()}; + DataSet data_set = {{{left, right}, UInt8(1)}}; + static_cast( + check_function("arrays_overlap", input_types, data_set)); + } + { + InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR, + Consted {PrimitiveType::TYPE_ARRAY}, + PrimitiveType::TYPE_VARCHAR}; + TestArray left = {std::string("abc"), std::string("def")}; + TestArray right = {std::string("xyz"), std::string("abc")}; + DataSet data_set = {{{left, right}, UInt8(1)}}; + static_cast( + check_function("arrays_overlap", input_types, data_set)); + } +} + } // namespace doris