Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,19 @@ jobs:
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt-get install -y ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt-get update
sudo apt-get install -y libarrow-dev
sudo apt-get install -y libarrow-dev libboost-coroutine-dev libboost-context-dev

- name: Install dependencies (macOS)
if: matrix.os == 'macos-latest'
run: |
brew install apache-arrow protobuf
brew install apache-arrow protobuf boost

- uses: msys2/setup-msys2@v2
if: matrix.os == 'windows-latest'
with:
msystem: ucrt64
path-type: inherit
install: mingw-w64-ucrt-x86_64-arrow mingw-w64-ucrt-x86_64-protobuf
install: mingw-w64-ucrt-x86_64-arrow mingw-w64-ucrt-x86_64-protobuf mingw-w64-ucrt-x86_64-boost

- name: Configure CMake
if: matrix.os != 'windows-latest'
Expand Down Expand Up @@ -163,7 +163,7 @@ jobs:
cmake .. -G Ninja \
-DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchain.cmake \
-DCMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/arrow-install \
-DBUILD_EXAMPLES=ON -DBUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release
-DBUILD_EXAMPLES=ON -DCMAKE_BUILD_TYPE=Release

- name: Build
run: ninja -C build
2 changes: 1 addition & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt-get install -y ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt-get update
sudo apt-get install -y libarrow-dev
sudo apt-get install -y libarrow-dev libboost-coroutine-dev libboost-context-dev

- name: Configure CMake
run: |
Expand Down
79 changes: 79 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,57 @@ if(BUILD_TESTS)
)
list(APPEND TEST_TARGETS lancedb_index_tests)

# Add test executable for async table tests (runs without valgrind)
add_executable(lancedb_table_async_tests
tests/test_main.cpp
tests/test_common.cpp
tests/test_table_async.cpp
)
target_link_libraries(lancedb_table_async_tests
PRIVATE
lancedb
Catch2::Catch2
Threads::Threads
${ARROW_LIBRARIES}
)
target_include_directories(lancedb_table_async_tests
PRIVATE ${ARROW_INCLUDE_DIRS}
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests
)
target_compile_options(lancedb_table_async_tests PRIVATE ${ARROW_CFLAGS_OTHER})
set_target_properties(lancedb_table_async_tests PROPERTIES
BUILD_RPATH ${RUST_TARGET_DIR}
)
list(APPEND TEST_TARGETS lancedb_table_async_tests)

# Add test executable for coroutine table tests (runs with valgrind)
find_package(Boost REQUIRED COMPONENTS coroutine context)
add_executable(lancedb_table_coro_tests
tests/test_main.cpp
tests/test_common.cpp
tests/test_table_coro.cpp
)
target_link_libraries(lancedb_table_coro_tests
PRIVATE
lancedb
Catch2::Catch2
Threads::Threads
${ARROW_LIBRARIES}
Boost::coroutine
Boost::context
)
target_include_directories(lancedb_table_coro_tests
PRIVATE ${ARROW_INCLUDE_DIRS}
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests
)
target_compile_options(lancedb_table_coro_tests PRIVATE ${ARROW_CFLAGS_OTHER})
set_target_properties(lancedb_table_coro_tests PROPERTIES
BUILD_RPATH ${RUST_TARGET_DIR}
)
list(APPEND TEST_TARGETS lancedb_table_coro_tests)

# Add test executable for vector tests (runs without valgrind)
add_executable(lancedb_vector_index_tests
tests/test_main.cpp
Expand Down Expand Up @@ -503,13 +554,41 @@ if(BUILD_TESTS)
--suppressions=${CMAKE_CURRENT_SOURCE_DIR}/valgrind.supp
$<TARGET_FILE:lancedb_query_tests>
)
# Run async table tests with valgrind
add_test(NAME lancedb_table_async_tests
COMMAND ${TEST_ENV_PREFIX} ${VALGRIND_EXECUTABLE}
--tool=memcheck
--leak-check=full
--show-leak-kinds=definite
--errors-for-leak-kinds=definite
--track-origins=yes
--error-exitcode=1
--log-file=${CMAKE_BINARY_DIR}/valgrind_table_async.txt
--suppressions=${CMAKE_CURRENT_SOURCE_DIR}/valgrind.supp
$<TARGET_FILE:lancedb_table_async_tests>
)
# Run coroutine table tests with valgrind
add_test(NAME lancedb_table_coro_tests
COMMAND ${TEST_ENV_PREFIX} ${VALGRIND_EXECUTABLE}
--tool=memcheck
--leak-check=full
--show-leak-kinds=definite
--errors-for-leak-kinds=definite
--track-origins=yes
--error-exitcode=1
--log-file=${CMAKE_BINARY_DIR}/valgrind_table_coro.txt
--suppressions=${CMAKE_CURRENT_SOURCE_DIR}/valgrind.supp
$<TARGET_FILE:lancedb_table_coro_tests>
)
else()
message(WARNING "Valgrind not found, running tests without memory checking")
add_test(NAME lancedb_connection_tests COMMAND ${TEST_ENV_PREFIX} $<TARGET_FILE:lancedb_connection_tests>)
add_test(NAME lancedb_table_tests COMMAND ${TEST_ENV_PREFIX} $<TARGET_FILE:lancedb_table_tests>)
add_test(NAME lancedb_table_meta_tests COMMAND ${TEST_ENV_PREFIX} $<TARGET_FILE:lancedb_table_meta_tests>)
add_test(NAME lancedb_index_tests COMMAND ${TEST_ENV_PREFIX} $<TARGET_FILE:lancedb_index_tests>)
add_test(NAME lancedb_query_tests COMMAND ${TEST_ENV_PREFIX} $<TARGET_FILE:lancedb_query_tests>)
add_test(NAME lancedb_table_async_tests COMMAND ${TEST_ENV_PREFIX} $<TARGET_FILE:lancedb_table_async_tests>)
add_test(NAME lancedb_table_coro_tests COMMAND ${TEST_ENV_PREFIX} $<TARGET_FILE:lancedb_table_coro_tests>)
endif()

# Run vector index tests WITHOUT valgrind (too slow under valgrind)
Expand Down
52 changes: 27 additions & 25 deletions examples/full.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ int main() {
std::cerr << "failed to create connection builder" << std::endl;
return 1;
}
LanceDBConnection* db = lancedb_connect_builder_execute(builder);
LanceDBConnection* db = lancedb_connect_builder_execute(builder, nullptr);
if (!db) {
std::cerr << "failed to connect to database" << std::endl;
return 1;
Expand Down Expand Up @@ -135,7 +135,7 @@ int main() {
LanceDBTable* table = nullptr;
if (const LanceDBError result = lancedb_table_create(db, table_name.c_str(),
reinterpret_cast<FFI_ArrowSchema*>(&c_schema),
nullptr, &table, nullptr); result != LANCEDB_SUCCESS) {
nullptr, &table, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "error creating table: " << table_name << ", error: " << lancedb_error_to_message(result) << std::endl;
lancedb_connection_free(db);
if (c_schema.release) {
Expand All @@ -149,7 +149,7 @@ int main() {
// try to create a table that already exists
if (const LanceDBError result = lancedb_table_create(db, "my_table",
reinterpret_cast<FFI_ArrowSchema*>(&c_schema),
nullptr, &table, &error_message); result != LANCEDB_SUCCESS) {
nullptr, &table, nullptr, &error_message); result != LANCEDB_SUCCESS) {
std::cout << "failed to create table that already exists (expected), error: '" <<
lancedb_error_to_message(result) <<
"' message: " << std::endl << error_message << std::endl;
Expand All @@ -162,7 +162,7 @@ int main() {
// try to create a table with invalid name
if (const LanceDBError result = lancedb_table_create(db, "invalid table name",
reinterpret_cast<FFI_ArrowSchema*>(&c_schema),
nullptr, &table, &error_message); result != LANCEDB_SUCCESS) {
nullptr, &table, nullptr, &error_message); result != LANCEDB_SUCCESS) {
std::cout << "failed to create table with invalid name (expected), error: '" <<
lancedb_error_to_message(result) <<
"' message: " << std::endl << error_message << std::endl;
Expand All @@ -179,7 +179,7 @@ int main() {
// try to create a table with invalid input (null schema)
if (const LanceDBError result = lancedb_table_create(db, "invalid_table",
nullptr,
nullptr, &table, &error_message); result != LANCEDB_SUCCESS) {
nullptr, &table, nullptr, &error_message); result != LANCEDB_SUCCESS) {
std::cout << "failed to create table with null schema (expected), error: '" <<
lancedb_error_to_message(result) <<
"' message: " << std::endl << error_message << std::endl;
Expand All @@ -190,7 +190,7 @@ int main() {
}

// open the table to work with it
LanceDBTable* tbl = lancedb_connection_open_table(db, table_name.c_str());
LanceDBTable* tbl = lancedb_connection_open_table(db, table_name.c_str(), nullptr);
if (!tbl) {
std::cerr << "failed to open table: " << table_name << std::endl;
lancedb_connection_free(db);
Expand All @@ -204,7 +204,7 @@ int main() {
.force_update_statistics = 0 // don't force update statistics
};
if (const LanceDBError result = lancedb_table_create_scalar_index(
tbl, key_columns, 1, LANCEDB_INDEX_BTREE, &scalar_config, nullptr); result != LANCEDB_SUCCESS) {
tbl, key_columns, 1, LANCEDB_INDEX_BTREE, &scalar_config, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "failed to create scalar index on 'key' column, error: '" <<
lancedb_error_to_message(result) << "'" << std::endl;
} else {
Expand All @@ -214,7 +214,7 @@ int main() {
// try to create the same index again without replace flag
scalar_config.replace = 0;
if (const LanceDBError result = lancedb_table_create_scalar_index(
tbl, key_columns, 1, LANCEDB_INDEX_BTREE, &scalar_config, &error_message); result != LANCEDB_SUCCESS) {
tbl, key_columns, 1, LANCEDB_INDEX_BTREE, &scalar_config, nullptr, &error_message); result != LANCEDB_SUCCESS) {
std::cout << "failed to create scalar index on 'key' column (expected), error: '" <<
lancedb_error_to_message(result) <<
"' message: " << std::endl << error_message << std::endl;
Expand Down Expand Up @@ -266,7 +266,7 @@ int main() {
reinterpret_cast<FFI_ArrowSchema*>(&c_schema),
&batch_reader, nullptr); error == LANCEDB_SUCCESS) {
// add data to table
if (const LanceDBError result = lancedb_table_add(tbl, batch_reader, nullptr); result != LANCEDB_SUCCESS) {
if (const LanceDBError result = lancedb_table_add(tbl, batch_reader, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "failed to write record batch to table, error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "wrote " << num_rows << " rows to table" << std::endl;
Expand Down Expand Up @@ -295,7 +295,7 @@ int main() {
.replace = 1 // replace existing index
};
if (const LanceDBError result = lancedb_table_create_vector_index(
tbl, data_columns, 1, LANCEDB_INDEX_IVF_FLAT, &vector_config, nullptr); result != LANCEDB_SUCCESS) {
tbl, data_columns, 1, LANCEDB_INDEX_IVF_FLAT, &vector_config, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "failed to create vector index on 'data' column, error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "created vector index on 'data' column" << std::endl;
Expand All @@ -309,7 +309,7 @@ int main() {
};
for (size_t i = 0; i < 3; i++) {
if (const LanceDBError result = lancedb_table_create_scalar_index(
tbl, &tag_columns[i], 1, LANCEDB_INDEX_BITMAP, &bitmap_config, nullptr); result != LANCEDB_SUCCESS) {
tbl, &tag_columns[i], 1, LANCEDB_INDEX_BITMAP, &bitmap_config, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "failed to create bitmap index on '" << tag_columns[i] << "' column, error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "created bitmap index on '" << tag_columns[i] << "' column" << std::endl;
Expand All @@ -336,7 +336,7 @@ int main() {
"data",
reinterpret_cast<FFI_ArrowArray***>(&c_arrays_ptr),
reinterpret_cast<FFI_ArrowSchema**>(&c_schema_ptr),
&count_out, nullptr); result != LANCEDB_SUCCESS) {
&count_out, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "error querying nearest to vector, error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "query returned " << count_out << " results" << std::endl;
Expand Down Expand Up @@ -375,7 +375,7 @@ int main() {
} else {
std::cout << "set query distance type to: L2" << std::endl;
// execute the query
if (LanceDBQueryResult* query_result = lancedb_vector_query_execute(query); query_result) {
if (LanceDBQueryResult* query_result = lancedb_vector_query_execute(query, nullptr); query_result) {
std::cout << "executed query" << std::endl;
// get the result as arrow arrays
struct ArrowArray** c_arrays_ptr;
Expand All @@ -386,6 +386,7 @@ int main() {
reinterpret_cast<FFI_ArrowArray***>(&c_arrays_ptr),
reinterpret_cast<FFI_ArrowSchema**>(&c_schema_ptr),
&count_out,
nullptr,
nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "error converting query result to arrow, error: " << lancedb_error_to_message(result) << std::endl;
lancedb_query_result_free(query_result);
Expand All @@ -410,19 +411,19 @@ int main() {
// list all tables in the database and loop through them
char** table_names;
size_t name_count;
if (const LanceDBError result = lancedb_connection_table_names(db, &table_names, &name_count, nullptr); result != LANCEDB_SUCCESS) {
if (const LanceDBError result = lancedb_connection_table_names(db, &table_names, &name_count, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "error listing table names, error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << name_count << " tables found" << std::endl;
for (size_t i = 0; i < name_count; i++) {
if (LanceDBTable* tbl = lancedb_connection_open_table(db, table_names[i]); tbl) {
if (LanceDBTable* tbl = lancedb_connection_open_table(db, table_names[i], nullptr); tbl) {

// get the schema of the table
struct ArrowSchema* c_schema_ptr;
if (const LanceDBError result = lancedb_table_arrow_schema(
tbl,
reinterpret_cast<FFI_ArrowSchema**>(&c_schema_ptr),
nullptr); result == LANCEDB_SUCCESS) {
nullptr, nullptr); result == LANCEDB_SUCCESS) {
if (auto schema = arrow::ImportSchema(c_schema_ptr); schema.ok()) {
std::cout << "table: " << table_names[i] << ", schema:" << std::endl;
std::cout << (*schema)->ToString() << std::endl;
Expand All @@ -437,14 +438,14 @@ int main() {
// list all indices of the table
char** indices;
size_t indices_count;
if (const LanceDBError result = lancedb_table_list_indices(tbl, &indices, &indices_count, nullptr); result != LANCEDB_SUCCESS) {
if (const LanceDBError result = lancedb_table_list_indices(tbl, &indices, &indices_count, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "failed to list indices, error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "found " << indices_count << " indices:" << std::endl;
for (size_t i = 0; i < indices_count; i++) {
std::cout << " - " << indices[i] << std::endl;
// delete the index
if (const LanceDBError result = lancedb_table_drop_index(tbl, indices[i], nullptr); result != LANCEDB_SUCCESS) {
if (const LanceDBError result = lancedb_table_drop_index(tbl, indices[i], nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << " error dropping index: " << indices[i] << ", error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << " dropped index: " << indices[i] << std::endl;
Expand All @@ -454,27 +455,27 @@ int main() {
}

// optimize the table after index deletion
if (const LanceDBError result = lancedb_table_optimize(tbl, LANCEDB_OPTIMIZE_ALL, nullptr); result != LANCEDB_SUCCESS) {
if (const LanceDBError result = lancedb_table_optimize(tbl, LANCEDB_OPTIMIZE_ALL, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "error optimizing table: " << table_names[i] << ", error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "optimized table: " << table_names[i] << std::endl;
}

// number of rows in the table
auto row_count = lancedb_table_count_rows(tbl);
auto row_count = lancedb_table_count_rows(tbl, nullptr);
std::cout << "table: " << table_names[i] << " has: " << row_count << " rows" << std::endl;

// delete some rows
const auto delete_predicates = {"key = \"key_10\"", "key = \"key_20\"", "key = \"key_30\"", "key = \"kaboom\""};
for (const auto& predicate : delete_predicates) {
if (const LanceDBError result = lancedb_table_delete(tbl, predicate, nullptr); result != LANCEDB_SUCCESS) {
if (const LanceDBError result = lancedb_table_delete(tbl, predicate, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "error deleting row with predicate: " << predicate << ", error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "deleted row with predicate: " << predicate << std::endl;
}
}
// check number of rows in the table after deletion
row_count = lancedb_table_count_rows(tbl);
row_count = lancedb_table_count_rows(tbl, nullptr);
std::cout << "after deletion table: " << table_names[i] << " has: " << row_count << " rows" << std::endl;

// perform table upsert with 3 new rows and 3 updated rows
Expand Down Expand Up @@ -535,6 +536,7 @@ int main() {
on_columns.data(),
1,
&config,
nullptr,
&error_message); result != LANCEDB_SUCCESS) {
std::cerr << "failed to upsert record batch to table, error: " << lancedb_error_to_message(result) << ", message: " << error_message << std::endl;
lancedb_free_string(error_message);
Expand All @@ -554,11 +556,11 @@ int main() {
}

// check number of rows in the table after upsert
row_count = lancedb_table_count_rows(tbl);
row_count = lancedb_table_count_rows(tbl, nullptr);
std::cout << "after upsert table: " << table_names[i] << " has: " << row_count << " rows" << std::endl;

// drop the table
if (LanceDBError result = lancedb_connection_drop_table(db, table_names[i], nullptr, nullptr); result != LANCEDB_SUCCESS) {
if (LanceDBError result = lancedb_connection_drop_table(db, table_names[i], nullptr, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "error dropping table: " << table_names[i] << ", error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "dropped table: " << table_names[i] << std::endl;
Expand All @@ -571,7 +573,7 @@ int main() {
}

lancedb_free_table_names(table_names, name_count);
if (const LanceDBError result = lancedb_connection_drop_all_tables(db, nullptr, nullptr); result != LANCEDB_SUCCESS) {
if (const LanceDBError result = lancedb_connection_drop_all_tables(db, nullptr, nullptr, nullptr); result != LANCEDB_SUCCESS) {
std::cerr << "error dropping all tables, error: " << lancedb_error_to_message(result) << std::endl;
} else {
std::cout << "dropped all tables" << std::endl;
Expand Down
Loading
Loading