From 1e98a87b2f8daf09b36f98056992d4ca7aaf54cb Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 21 Jan 2026 12:04:40 +0000 Subject: [PATCH 01/20] feat: Implement CF.RESERVE command with bucket-based storage --- src/commands/cmd_cuckoo_filter.cc | 109 ++++++++++++++++ src/storage/redis_metadata.cc | 44 +++++++ src/storage/redis_metadata.h | 37 +++++- src/types/cuckoo_filter.h | 61 +++++++++ src/types/redis_cuckoo_chain.cc | 113 +++++++++++++++++ src/types/redis_cuckoo_chain.h | 53 ++++++++ tests/cppunit/types/cuckoo_filter_test.cc | 147 ++++++++++++++++++++++ 7 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 src/commands/cmd_cuckoo_filter.cc create mode 100644 src/types/cuckoo_filter.h create mode 100644 src/types/redis_cuckoo_chain.cc create mode 100644 src/types/redis_cuckoo_chain.h create mode 100644 tests/cppunit/types/cuckoo_filter_test.cc diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc new file mode 100644 index 00000000000..eb5e6ce3c94 --- /dev/null +++ b/src/commands/cmd_cuckoo_filter.cc @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "command_parser.h" +#include "commander.h" +#include "error_constants.h" +#include "server/server.h" +#include "types/redis_cuckoo_chain.h" + +namespace redis { + +class CommandCFReserve : public Commander { + public: + Status Parse(const std::vector &args) override { + // CF.RESERVE key capacity [BUCKETSIZE bs] [MAXITERATIONS mi] [EXPANSION ex] + if (args.size() < 3) { + return {Status::RedisParseErr, "wrong number of arguments"}; + } + + // Parse capacity (required) + auto parse_capacity = ParseInt(args[2], 10); + if (!parse_capacity) { + return {Status::RedisParseErr, "invalid capacity"}; + } + capacity_ = *parse_capacity; + if (capacity_ <= 0) { + return {Status::RedisParseErr, "capacity must be larger than 0"}; + } + + // Parse optional parameters + CommandParser parser(args, 3); + while (parser.Good()) { + if (parser.EatEqICase("BUCKETSIZE")) { + auto parse_bucket_size = parser.TakeInt(); + if (!parse_bucket_size.IsOK()) { + return {Status::RedisParseErr, "invalid bucket size"}; + } + bucket_size_ = parse_bucket_size.GetValue(); + if (bucket_size_ == 0 || bucket_size_ > 255) { + return {Status::RedisParseErr, "bucket size must be between 1 and 255"}; + } + } else if (parser.EatEqICase("MAXITERATIONS")) { + auto parse_max_iterations = parser.TakeInt(); + if (!parse_max_iterations.IsOK()) { + return {Status::RedisParseErr, "invalid max iterations"}; + } + max_iterations_ = parse_max_iterations.GetValue(); + if (max_iterations_ == 0) { + return {Status::RedisParseErr, "max iterations must be larger than 0"}; + } + } else if (parser.EatEqICase("EXPANSION")) { + auto parse_expansion = parser.TakeInt(); + if (!parse_expansion.IsOK()) { + return {Status::RedisParseErr, "invalid expansion factor"}; + } + expansion_ = parse_expansion.GetValue(); + } else { + return {Status::RedisParseErr, errInvalidSyntax}; + } + } + + return Commander::Parse(args); + } + + Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { + redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); + auto s = cuckoo_db.Reserve(ctx, args_[1], capacity_, bucket_size_, max_iterations_, expansion_); + + if (!s.ok()) { + if (s.IsInvalidArgument()) { + // Return error message to client + return {Status::RedisExecErr, s.ToString()}; + } + return {Status::RedisExecErr, "failed to create cuckoo filter"}; + } + + *output = redis::SimpleString("OK"); + return Status::OK(); + } + + private: + uint64_t capacity_ = kCFDefaultCapacity; + uint8_t bucket_size_ = kCFDefaultBucketSize; + uint16_t max_iterations_ = kCFDefaultMaxIterations; + uint8_t expansion_ = kCFDefaultExpansion; +}; + +// Register the CF.RESERVE command +REDIS_REGISTER_COMMANDS(CuckooFilter, + MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1)) + +} // namespace redis \ No newline at end of file diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 16fb190fe26..a004917f329 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -567,3 +567,47 @@ rocksdb::Status TimeSeriesMetadata::Decode(Slice *input) { return rocksdb::Status::OK(); } + + +void CuckooChainMetadata::Encode(std::string *dst) const { + Metadata::Encode(dst); + + PutFixed16(dst, n_filters); + PutFixed16(dst, expansion); + PutFixed64(dst, base_capacity); + PutFixed8(dst, bucket_size); + PutFixed16(dst, max_iterations); + PutFixed64(dst, num_deleted_items); +} + +rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { + if (auto s = Metadata::Decode(input); !s.ok()) { + return s; + } + + if (input->size() < 21) { + return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); + } + + GetFixed16(input, &n_filters); + GetFixed16(input, &expansion); + GetFixed64(input, &base_capacity); + GetFixed8(input, &bucket_size); + GetFixed16(input, &max_iterations); + GetFixed64(input, &num_deleted_items); + + return rocksdb::Status::OK(); +} + +uint64_t CuckooChainMetadata::GetTotalCapacity() const { + if (expansion == 0 || n_filters == 1) { + return base_capacity; + } + + // Calculate total capacity across all filters + uint64_t total = 0; + for (uint16_t i = 0; i < n_filters; i++) { + total += base_capacity * std::pow(expansion, i); + } + return total; +} diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 2db3e9d0cec..4bdd66b500b 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -54,12 +54,13 @@ enum RedisType : uint8_t { kRedisHyperLogLog = 11, kRedisTDigest = 12, kRedisTimeSeries = 13, + kRedisCuckooFilter = 14, kRedisTypeMax }; inline constexpr const std::array RedisTypeNames = { "none", "string", "hash", "list", "set", "zset", "bitmap", - "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries"}; + "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; struct RedisTypes { RedisTypes(std::initializer_list list) { @@ -306,6 +307,40 @@ class BloomChainMetadata : public Metadata { bool IsScaling() const { return expansion != 0; }; }; +class CuckooChainMetadata : public Metadata { + public: + /// The number of sub-filters in the chain + uint16_t n_filters; + + /// Expansion factor for new filters + /// When a filter is full, a new one is created with capacity = base_capacity * expansion^n + uint16_t expansion; + + /// The capacity of the first filter + uint64_t base_capacity; + + /// Number of fingerprints per bucket + uint8_t bucket_size; + + /// Maximum number of cuckoo kicks before considering filter full + uint16_t max_iterations; + + /// Track number of deleted items for maintenance + uint64_t num_deleted_items; + + explicit CuckooChainMetadata(bool generate_version = true) + : Metadata(kRedisCuckooFilter, generate_version), + n_filters(0), expansion(0), base_capacity(0), + bucket_size(0), max_iterations(0), num_deleted_items(0) {} + + void Encode(std::string *dst) const override; + using Metadata::Decode; + rocksdb::Status Decode(Slice *input) override; + + uint64_t GetTotalCapacity() const; + bool IsScaling() const { return expansion > 0; } +}; + enum class JsonStorageFormat : uint8_t { JSON = 0, CBOR = 1, diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h new file mode 100644 index 00000000000..f305f8c4480 --- /dev/null +++ b/src/types/cuckoo_filter.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include +#include +#include + +namespace redis { + +// Cuckoo filter implementation from the paper: +// "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. +// This is a bucket-based storage implementation where each bucket is stored +// as an independent key-value pair in RocksDB +class CuckooFilter { + public: + // Calculate the optimal number of buckets for the filter + static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { + // A load factor of 95.5% is chosen for the cuckoo filter + uint32_t num_buckets = static_cast(capacity / bucket_size / 0.955); + // Round up to next power of 2 for better hash distribution + if (num_buckets == 0) num_buckets = 1; + uint32_t power = 1; + while (power < num_buckets) power <<= 1; + return power; + } + + // Generate fingerprint from hash (8-bit fingerprint, non-zero) + static uint8_t GenerateFingerprint(uint64_t hash) { + uint8_t fp = hash & 0xFF; + return fp == 0 ? 1 : fp; // Ensure non-zero fingerprint + } + + // Calculate alternate bucket index using XOR + static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { + // Use a simple hash of the fingerprint for the XOR operation + uint32_t fp_hash = fingerprint * 0x5bd1e995; // MurmurHash2 constant + return (bucket_idx ^ fp_hash) % num_buckets; + } +}; + +} // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc new file mode 100644 index 00000000000..5f75e57fdc8 --- /dev/null +++ b/src/types/redis_cuckoo_chain.cc @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "redis_cuckoo_chain.h" + +#include + +#include + +#include "cuckoo_filter.h" + +namespace redis { + +rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, + CuckooChainMetadata *metadata) { + return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); +} + +std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t bucket_index) { + // Create a sub-key that includes both filter index and bucket index + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, bucket_index); + + // Create the internal key using the storage encoding + std::string bucket_key = InternalKey(ns_key, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); + return bucket_key; +} + +rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, + uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion) { + // Validate parameters + if (capacity == 0) { + return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); + } + + if (bucket_size == 0 || bucket_size > 255) { + return rocksdb::Status::InvalidArgument("bucket_size must be between 1 and 255"); + } + + if (max_iterations == 0) { + return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); + } + + std::string ns_key = AppendNamespacePrefix(user_key); + + // Check if the key already exists + CuckooChainMetadata metadata; + rocksdb::Status s = getCuckooChainMetadata(ctx, ns_key, &metadata); + if (s.ok()) { + return rocksdb::Status::InvalidArgument("the key already exists"); + } + if (!s.IsNotFound()) { + return s; // Return other errors + } + + // Initialize metadata for the new cuckoo filter + metadata.size = 0; + metadata.base_capacity = capacity; + metadata.bucket_size = bucket_size; + metadata.max_iterations = max_iterations; + metadata.expansion = expansion; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + + // Calculate the number of buckets needed for this filter + uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + + LOG(INFO) << "Creating cuckoo filter with capacity=" << capacity + << ", bucket_size=" << bucket_size + << ", num_buckets=" << num_buckets + << ", max_iterations=" << max_iterations + << ", expansion=" << static_cast(expansion); + + // Create a write batch for atomic operation + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.RESERVE", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + + // Store the metadata + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + // Note: With bucket-based storage, we don't pre-allocate all buckets + // Buckets will be created lazily on first write + // This saves memory for sparse filters + + // Optionally, we could create the first few buckets to ensure the filter is ready + // But for now, we'll keep it fully lazy for maximum memory efficiency + + return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); +} + +} // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h new file mode 100644 index 00000000000..f606f5f8c39 --- /dev/null +++ b/src/types/redis_cuckoo_chain.h @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include "cuckoo_filter.h" +#include "storage/redis_db.h" +#include "storage/redis_metadata.h" + +namespace redis { + +// Default values for Cuckoo Filter +const uint32_t kCFDefaultCapacity = 1024; +const uint8_t kCFDefaultBucketSize = 4; // 4 fingerprints per bucket +const uint16_t kCFDefaultMaxIterations = 500; +const uint8_t kCFDefaultExpansion = 2; + +class CuckooChain : public Database { + public: + CuckooChain(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {} + + // CF.RESERVE command - creates a new cuckoo filter with specified capacity + rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, + uint16_t max_iterations, uint8_t expansion); + + private: + // Get metadata for the cuckoo filter + rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); + + // Generate key for a specific bucket in bucket-based storage + // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} + std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t bucket_index); +}; + +} // namespace redis \ No newline at end of file diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc new file mode 100644 index 00000000000..8a4a43c0f8f --- /dev/null +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include +#include + +#include "storage/redis_db.h" +#include "storage/redis_metadata.h" +#include "test_base.h" +#include "types/redis_cuckoo_chain.h" + +class RedisCuckooFilterTest : public TestBase { + protected: + explicit RedisCuckooFilterTest() : TestBase() { + cuckoo_ = std::make_unique(storage_.get(), namespace_); + } + ~RedisCuckooFilterTest() override = default; + + void SetUp() override { + key_ = "test_cuckoo_filter_key"; + } + + void TearDown() override { + [[maybe_unused]] auto s = cuckoo_->Del(*ctx_, key_); + } + + std::unique_ptr cuckoo_; + std::string key_; +}; + +TEST_F(RedisCuckooFilterTest, ReserveBasic) { + // Test basic reserve operation + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + uint16_t max_iterations = 500; + uint8_t expansion = 2; + + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "Failed to reserve cuckoo filter: " << s.ToString(); +} + +TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { + // First reserve should succeed + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Second reserve with same key should fail + s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); +} + +TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { + // Test with zero capacity + auto s = cuckoo_->Reserve(*ctx_, key_, 0, 4, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + + // Test with zero bucket size + s = cuckoo_->Reserve(*ctx_, "key2", 1000, 0, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + + // Test with zero max iterations + s = cuckoo_->Reserve(*ctx_, "key3", 1000, 4, 0, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); +} + +TEST_F(RedisCuckooFilterTest, ReserveVariousCapacities) { + // Test with different capacities + std::vector capacities = {100, 1000, 10000, 100000}; + + for (size_t i = 0; i < capacities.size(); ++i) { + std::string test_key = key_ + "_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Failed for capacity " << capacities[i]; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveWithDifferentBucketSizes) { + // Test with different valid bucket sizes + std::vector bucket_sizes = {1, 2, 4, 8, 16}; + + for (size_t i = 0; i < bucket_sizes.size(); ++i) { + std::string test_key = key_ + "_bucket_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, bucket_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "Failed for bucket size " << static_cast(bucket_sizes[i]); + } +} + +TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { + // Test the static helper function + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + + // Should be a power of 2 + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; + + // Should be able to hold the capacity with 95.5% load factor + uint32_t expected_min = static_cast(capacity / bucket_size / 0.955); + ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; +} + +TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { + // Test fingerprint generation ensures non-zero values + for (uint64_t hash = 0; hash < 256; ++hash) { + uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); + ASSERT_NE(fp, 0) << "Fingerprint should never be zero"; + } +} + +TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { + uint32_t num_buckets = 1024; + + // Test that alternate bucket is different from original + for (uint32_t bucket = 0; bucket < 100; ++bucket) { + for (uint8_t fp = 1; fp < 10; ++fp) { + uint32_t alt_bucket = redis::CuckooFilter::GetAltBucketIndex(bucket, fp, num_buckets); + ASSERT_LT(alt_bucket, num_buckets) << "Alternate bucket out of range"; + + // Applying alternate twice should give back original + uint32_t double_alt = redis::CuckooFilter::GetAltBucketIndex(alt_bucket, fp, num_buckets); + ASSERT_EQ(double_alt, bucket) << "Double alternate should give original bucket"; + } + } +} \ No newline at end of file From 577ee71e710f18297b0affdcb8f22872bbba1d9b Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Thu, 22 Jan 2026 10:42:41 +0000 Subject: [PATCH 02/20] feat: Implement CF.RESERVE command with bucket-based storage --- src/commands/commander.h | 1 + src/types/redis_cuckoo_chain.cc | 11 ++++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/commands/commander.h b/src/commands/commander.h index 8b012ff27f1..d34129f58c4 100644 --- a/src/commands/commander.h +++ b/src/commands/commander.h @@ -92,6 +92,7 @@ enum class CommandCategory : uint8_t { Bit, BloomFilter, Cluster, + CuckooFilter, Function, Geo, Hash, diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 5f75e57fdc8..803fd5ffb7d 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -20,10 +20,10 @@ #include "redis_cuckoo_chain.h" -#include - #include +#include "logging.h" + #include "cuckoo_filter.h" namespace redis { @@ -84,11 +84,8 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Calculate the number of buckets needed for this filter uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); - LOG(INFO) << "Creating cuckoo filter with capacity=" << capacity - << ", bucket_size=" << bucket_size - << ", num_buckets=" << num_buckets - << ", max_iterations=" << max_iterations - << ", expansion=" << static_cast(expansion); + info("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", + capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); From 77b4c4ad612d907da268dfc8947a1edb7507bde6 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 23 Jan 2026 10:16:45 +0000 Subject: [PATCH 03/20] feat: Implement CF.RESERVE command with bucket-based storage --- src/types/cuckoo_filter.h | 41 +++++++++++--- src/types/redis_cuckoo_chain.cc | 9 ++- tests/cppunit/types/cuckoo_filter_test.cc | 68 +++++++++++++++++++---- 3 files changed, 98 insertions(+), 20 deletions(-) diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index f305f8c4480..eb10eba4758 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -25,12 +25,20 @@ #include #include +#include "vendor/murmurhash2.h" + namespace redis { // Cuckoo filter implementation from the paper: // "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. // This is a bucket-based storage implementation where each bucket is stored // as an independent key-value pair in RocksDB +// +// Hash calculation follows RedisBloom's design: +// - fp = hash % 255 + 1 (fingerprint, non-zero, range: 1-255) +// - h1 = hash (primary hash) +// - h2 = h1 ^ (fp * 0x5bd1e995) (alternate hash via XOR) +// - bucket_index = hash % num_buckets (only apply modulo when indexing) class CuckooFilter { public: // Calculate the optimal number of buckets for the filter @@ -44,18 +52,37 @@ class CuckooFilter { return power; } - // Generate fingerprint from hash (8-bit fingerprint, non-zero) + // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) + // Following RedisBloom: fp = hash % 255 + 1 static uint8_t GenerateFingerprint(uint64_t hash) { - uint8_t fp = hash & 0xFF; - return fp == 0 ? 1 : fp; // Ensure non-zero fingerprint + return static_cast(hash % 255 + 1); + } + + // Calculate alternate hash using XOR (following RedisBloom) + // h2 = h1 ^ (fp * 0x5bd1e995) + // This preserves symmetry: GetAltHash(fp, GetAltHash(fp, h)) == h + static uint64_t GetAltHash(uint8_t fingerprint, uint64_t hash) { + return hash ^ (static_cast(fingerprint) * 0x5bd1e995); } - // Calculate alternate bucket index using XOR + // Legacy function for backward compatibility with tests + // Converts bucket index to hash, applies GetAltHash, then converts back to bucket index static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { - // Use a simple hash of the fingerprint for the XOR operation - uint32_t fp_hash = fingerprint * 0x5bd1e995; // MurmurHash2 constant - return (bucket_idx ^ fp_hash) % num_buckets; + // Treat bucket_idx as a hash value for the calculation + uint64_t hash = bucket_idx; + uint64_t alt_hash = GetAltHash(fingerprint, hash); + // Convert back to bucket index + return static_cast(alt_hash % num_buckets); } + + // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom) + // This is the entry point for hashing items before they are inserted/checked in the filter + static uint64_t Hash(const char* data, size_t length) { + return HllMurMurHash64A(data, static_cast(length), 0); + } + + // Convenience overload for std::string + static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } }; } // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 803fd5ffb7d..952af9f2a9a 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -63,8 +63,10 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key std::string ns_key = AppendNamespacePrefix(user_key); // Check if the key already exists - CuckooChainMetadata metadata; - rocksdb::Status s = getCuckooChainMetadata(ctx, ns_key, &metadata); + // Read without snapshot to ensure we see any committed data + std::string raw_value; + rocksdb::ReadOptions read_options; // No snapshot + auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); if (s.ok()) { return rocksdb::Status::InvalidArgument("the key already exists"); } @@ -72,6 +74,9 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return s; // Return other errors } + // Initialize metadata for the new cuckoo filter + CuckooChainMetadata metadata; + // Initialize metadata for the new cuckoo filter metadata.size = 0; metadata.base_capacity = capacity; diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 8a4a43c0f8f..69183713cd8 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -29,7 +29,7 @@ class RedisCuckooFilterTest : public TestBase { protected: explicit RedisCuckooFilterTest() : TestBase() { - cuckoo_ = std::make_unique(storage_.get(), namespace_); + cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); } ~RedisCuckooFilterTest() override = default; @@ -123,25 +123,71 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { } TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { - // Test fingerprint generation ensures non-zero values - for (uint64_t hash = 0; hash < 256; ++hash) { + // Test fingerprint generation ensures non-zero values in range [1, 255] + // Following RedisBloom: fp = hash % 255 + 1 + for (uint64_t hash = 0; hash < 1000; ++hash) { uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); - ASSERT_NE(fp, 0) << "Fingerprint should never be zero"; + ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; + ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } + + // Verify the formula: fp = hash % 255 + 1 + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(0), 1); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(254), 255); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(255), 1); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(256), 2); } TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { uint32_t num_buckets = 1024; - // Test that alternate bucket is different from original - for (uint32_t bucket = 0; bucket < 100; ++bucket) { + // Test GetAltHash symmetry at hash level (following RedisBloom design) + // h2 = GetAltHash(fp, h1) + // h1 = GetAltHash(fp, h2) <- this is the symmetry property + for (uint64_t hash = 0; hash < 100; ++hash) { for (uint8_t fp = 1; fp < 10; ++fp) { - uint32_t alt_bucket = redis::CuckooFilter::GetAltBucketIndex(bucket, fp, num_buckets); - ASSERT_LT(alt_bucket, num_buckets) << "Alternate bucket out of range"; + uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fp, hash); + + // Applying GetAltHash twice should return original hash + uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fp, alt_hash); + ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; - // Applying alternate twice should give back original - uint32_t double_alt = redis::CuckooFilter::GetAltBucketIndex(alt_bucket, fp, num_buckets); - ASSERT_EQ(double_alt, bucket) << "Double alternate should give original bucket"; + // Both hashes should map to valid bucket indices + uint32_t bucket1 = hash % num_buckets; + uint32_t bucket2 = alt_hash % num_buckets; + ASSERT_LT(bucket1, num_buckets) << "Bucket 1 out of range"; + ASSERT_LT(bucket2, num_buckets) << "Bucket 2 out of range"; } } +} + +TEST_F(RedisCuckooFilterTest, HashFunction) { + // Test that Hash function produces consistent 64-bit values + std::string test_item = "hello"; + uint64_t hash1 = redis::CuckooFilter::Hash(test_item); + uint64_t hash2 = redis::CuckooFilter::Hash(test_item.data(), test_item.size()); + + // Both methods should produce the same result + ASSERT_EQ(hash1, hash2) << "Hash methods should be consistent"; + + // Hash should be deterministic + uint64_t hash3 = redis::CuckooFilter::Hash(test_item); + ASSERT_EQ(hash1, hash3) << "Hash should be deterministic"; + + // Different items should produce different hashes (with high probability) + uint64_t hash_world = redis::CuckooFilter::Hash("world"); + ASSERT_NE(hash1, hash_world) << "Different items should have different hashes"; + + // Empty string produces hash value 0 (this is expected with MurmurHash) + uint64_t hash_empty = redis::CuckooFilter::Hash(""); + ASSERT_EQ(hash_empty, 0) << "Empty string should produce hash value 0 with MurmurHash"; + + // Even with hash=0, fingerprint should be non-zero + uint8_t fp_empty = redis::CuckooFilter::GenerateFingerprint(hash_empty); + ASSERT_EQ(fp_empty, 1) << "Fingerprint of hash=0 should be 1 (0 % 255 + 1)"; + + // Test that hash can be used with fingerprint generation + uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash1); + ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; + ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } \ No newline at end of file From 025ee864df24a0e3f7504e1f204b763a3d0e9683 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 23 Jan 2026 10:58:29 +0000 Subject: [PATCH 04/20] feat: Implement CF.RESERVE command with bucket-based storage --- src/types/redis_cuckoo_chain.cc | 6 + tests/cppunit/types/cuckoo_filter_test.cc | 172 +++++++++++++++++++++- 2 files changed, 172 insertions(+), 6 deletions(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 952af9f2a9a..8ab69e38555 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -52,6 +52,12 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } + // RedisBloom requires minimum capacity to ensure at least one bucket can be created + // With load factor 0.955, capacity=1 and bucket_size=4 results in 0 buckets + if (capacity < 2) { + return rocksdb::Status::InvalidArgument("capacity must be at least 2"); + } + if (bucket_size == 0 || bucket_size > 255) { return rocksdb::Status::InvalidArgument("bucket_size must be between 1 and 255"); } diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 69183713cd8..56000f930d6 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -31,14 +31,17 @@ class RedisCuckooFilterTest : public TestBase { explicit RedisCuckooFilterTest() : TestBase() { cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); } - ~RedisCuckooFilterTest() override = default; - - void SetUp() override { - key_ = "test_cuckoo_filter_key"; + ~RedisCuckooFilterTest() override { + // Ensure cuckoo_ is destroyed before storage_ + cuckoo_.reset(); } - void TearDown() override { - [[maybe_unused]] auto s = cuckoo_->Del(*ctx_, key_); + void SetUp() override { + // Use a unique key for each test to avoid conflicts + // Include test name to make debugging easier + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + key_ = std::string("cf_test_") + test_info->name(); } std::unique_ptr cuckoo_; @@ -190,4 +193,161 @@ TEST_F(RedisCuckooFilterTest, HashFunction) { uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash1); ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; +} + + +TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { + // Test capacity = 1 (too small, following RedisBloom behavior) + // With load factor 0.955 and bucket_size=4, this would result in 0 buckets + auto s = cuckoo_->Reserve(*ctx_, key_, 1, 4, 500, 2); + ASSERT_FALSE(s.ok()) << "Should reject capacity = 1"; + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("at least 2"), std::string::npos) << "Error message should mention minimum capacity"; +} + +TEST_F(RedisCuckooFilterTest, ReserveBucketSizeBoundary) { + // Test valid upper boundary (bucket_size is uint8_t, max = 255) + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 255, 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=255 should be valid"; + + // Test bucket_size = 1 (minimum valid value) + s = cuckoo_->Reserve(*ctx_, "key_bs1", 1000, 1, 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=1 should be valid"; + + // Test common power-of-2 bucket sizes + std::vector valid_sizes = {2, 4, 8, 16, 32, 64, 128}; + for (size_t i = 0; i < valid_sizes.size(); ++i) { + std::string test_key = "key_bs_" + std::to_string(valid_sizes[i]); + s = cuckoo_->Reserve(*ctx_, test_key, 1000, valid_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=" << static_cast(valid_sizes[i]) << " should be valid"; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + uint16_t max_iterations = 500; + uint8_t expansion = 2; + + // Create the filter + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "First reserve should succeed"; + + // Verify metadata was stored by trying to reserve again with same key + // This should fail with "already exists" error + s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); + ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; + ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos) + << "Error message should mention 'already exists'"; + + // Verify we can still create filters with different keys + s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "Should be able to create filter with different key"; + + // Verify the original key still exists (can't create it again) + s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_FALSE(s.ok()) << "Original key should still exist"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); +} + + +TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { + // expansion=0 means no auto-expansion when filter is full + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0); + ASSERT_TRUE(s.ok()) << "expansion=0 should be valid (no auto-growth)"; + + // Verify different expansion values + std::vector expansions = {0, 1, 2, 4, 8}; + for (size_t i = 0; i < expansions.size(); ++i) { + std::string test_key = "key_exp_" + std::to_string(expansions[i]); + s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, 500, expansions[i]); + ASSERT_TRUE(s.ok()) << "expansion=" << static_cast(expansions[i]) << " should be valid"; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { + // Test with very large capacity + uint64_t large_capacity = 10000000; // 10 million + auto s = cuckoo_->Reserve(*ctx_, key_, large_capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Should handle large capacity"; + + // Verify num_buckets calculation doesn't overflow + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4); + ASSERT_GT(num_buckets, 0) << "Should not overflow to 0"; + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Should be power of 2"; + + // Test even larger capacity (100 million) + uint64_t huge_capacity = 100000000; + s = cuckoo_->Reserve(*ctx_, "huge_key", huge_capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Should handle 100M capacity"; + + num_buckets = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4); + ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; +} + +TEST_F(RedisCuckooFilterTest, ReserveMaxIterationsBoundary) { + // Test different max_iterations values + std::vector iterations = {1, 10, 100, 500, 1000, 5000, 65535}; + + for (size_t i = 0; i < iterations.size(); ++i) { + std::string test_key = "key_iter_" + std::to_string(iterations[i]); + auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, iterations[i], 2); + ASSERT_TRUE(s.ok()) << "max_iterations=" << iterations[i] << " should be valid"; + } + + // Test max_iterations = 0 (should fail) + auto s = cuckoo_->Reserve(*ctx_, "iter_zero", 1000, 4, 0, 2); + ASSERT_FALSE(s.ok()) << "max_iterations=0 should fail"; +} + +TEST_F(RedisCuckooFilterTest, ReserveEdgeCaseCapacities) { + // Test minimum valid capacity + auto s = cuckoo_->Reserve(*ctx_, "min_cap", 2, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "capacity=2 should be valid (minimum)"; + + // Test small but valid capacities + std::vector small_capacities = {2, 3, 4, 5, 10, 50, 100}; + for (size_t i = 0; i < small_capacities.size(); ++i) { + std::string test_key = "small_" + std::to_string(small_capacities[i]); + s = cuckoo_->Reserve(*ctx_, test_key, small_capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()) << "capacity=" << small_capacities[i] << " should be valid"; + + // Verify at least one bucket is created + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4); + ASSERT_GE(num_buckets, 1) << "Should have at least 1 bucket for capacity=" << small_capacities[i]; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { + // Test various parameter combinations to ensure robustness + struct TestCase { + uint64_t capacity; + uint8_t bucket_size; + uint16_t max_iterations; + uint8_t expansion; + bool should_succeed; + std::string description; + }; + + std::vector test_cases = { + {1000, 4, 500, 2, true, "Standard parameters"}, + {2, 1, 1, 0, true, "Minimum all parameters"}, + {100000, 255, 65535, 255, true, "Maximum all parameters"}, + {1000, 2, 100, 1, true, "Small bucket, moderate iterations"}, + {50000, 8, 1000, 4, true, "Large capacity, large bucket"}, + {10, 16, 50, 0, true, "Small capacity, large bucket, no expansion"}, + }; + + for (size_t i = 0; i < test_cases.size(); ++i) { + const auto& tc = test_cases[i]; + std::string test_key = "combo_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, tc.capacity, tc.bucket_size, tc.max_iterations, tc.expansion); + + if (tc.should_succeed) { + ASSERT_TRUE(s.ok()) << "Test case failed: " << tc.description; + } else { + ASSERT_FALSE(s.ok()) << "Test case should have failed: " << tc.description; + } + } } \ No newline at end of file From 1fe2c6fb448106b53046b728dcf0276c35912429 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Mon, 2 Feb 2026 11:44:33 +0000 Subject: [PATCH 05/20] feat: Implement CF.ADD command with kick-out insertion --- src/commands/cmd_cuckoo_filter.cc | 37 ++- src/types/redis_cuckoo_chain.cc | 307 +++++++++++++++++++++- src/types/redis_cuckoo_chain.h | 12 + tests/cppunit/types/cuckoo_filter_test.cc | 212 +++++++++++++++ 4 files changed, 565 insertions(+), 3 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index eb5e6ce3c94..82364a60a37 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -102,8 +102,41 @@ class CommandCFReserve : public Commander { uint8_t expansion_ = kCFDefaultExpansion; }; -// Register the CF.RESERVE command +class CommandCFAdd : public Commander { + public: + Status Parse(const std::vector &args) override { + // CF.ADD key item + if (args.size() != 3) { + return {Status::RedisParseErr, "wrong number of arguments"}; + } + return Commander::Parse(args); + } + + Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { + redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); + bool added = false; + auto s = cuckoo_db.Add(ctx, args_[1], args_[2], &added); + + if (!s.ok()) { + if (s.IsNotFound()) { + return {Status::RedisExecErr, "key not found"}; + } + if (s.IsAborted()) { + // Filter is full + return {Status::RedisExecErr, s.ToString()}; + } + return {Status::RedisExecErr, "failed to add item to cuckoo filter"}; + } + + // Return 1 if added, 0 if already exists (though we don't check for duplicates in this version) + *output = redis::Integer(added ? 1 : 0); + return Status::OK(); + } +}; + +// Register the CF.RESERVE and CF.ADD commands REDIS_REGISTER_COMMANDS(CuckooFilter, - MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1)) + MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), + MakeCmdAttr("cf.add", 3, "write", 1, 1, 1)) } // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 8ab69e38555..e5a311d78da 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -21,6 +21,7 @@ #include "redis_cuckoo_chain.h" #include +#include #include "logging.h" @@ -95,7 +96,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Calculate the number of buckets needed for this filter uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); - info("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", + INFO("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); // Create a write batch for atomic operation @@ -118,4 +119,308 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } +// Helper function: calculate integer power (avoid std::pow for integers) +static uint64_t intPow(uint64_t base, uint16_t exp) { + uint64_t result = 1; + for (uint16_t i = 0; i < exp; ++i) { + result *= base; + } + return result; +} + +// Helper function: try to find empty slot in a bucket and insert fingerprint +static bool tryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uint8_t fingerprint, size_t *slot_idx) { + for (size_t i = 0; i < bucket_size; ++i) { + if (static_cast(bucket_data[i]) == 0) { + bucket_data[i] = fingerprint; + *slot_idx = i; + return true; + } + } + return false; +} + +// Helper function: read bucket from storage and ensure correct size +static rocksdb::Status readBucket(engine::Storage *storage, engine::Context &ctx, + const std::string &bucket_key, uint8_t bucket_size, + std::string *bucket_data) { + rocksdb::ReadOptions read_opts = ctx.DefaultScanOptions(); + auto s = storage->Get(ctx, read_opts, bucket_key, bucket_data); + if (!s.ok() && !s.IsNotFound()) { + return s; + } + if (s.IsNotFound()) { + bucket_data->clear(); + } + if (bucket_data->size() < bucket_size) { + bucket_data->resize(bucket_size, 0); + } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { + std::string ns_key = AppendNamespacePrefix(user_key); + + // Get metadata - use read options without snapshot to see latest data + CuckooChainMetadata metadata(false); + std::string raw_value; + rocksdb::ReadOptions read_options; // No snapshot + auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); + if (!s.ok()) { + if (s.IsNotFound()) { + return rocksdb::Status::NotFound("key not found"); + } + return s; + } + + // Decode metadata + Slice slice(raw_value); + s = metadata.Decode(&slice); + if (!s.ok()) { + return s; + } + + // Validate metadata + if (metadata.n_filters == 0) { + return rocksdb::Status::Corruption("invalid metadata: n_filters is 0"); + } + + // Calculate hash and fingerprint for the item + uint64_t hash = CuckooFilter::Hash(item.data(), item.size()); + uint8_t fingerprint = CuckooFilter::GenerateFingerprint(hash); + + // Try to insert in each sub-filter (starting from the first/smallest one) + // This follows RedisBloom's behavior and is more efficient + for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { + // Calculate capacity for this filter using integer power + uint64_t filter_capacity = metadata.base_capacity * intPow(metadata.expansion, filter_idx); + uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); + + // Calculate bucket indices + uint32_t bucket1_idx = hash % num_buckets; + uint64_t alt_hash = CuckooFilter::GetAltHash(fingerprint, hash); + uint32_t bucket2_idx = alt_hash % num_buckets; + + // Read both buckets using helper function + std::string bucket1_key = getBucketKey(ns_key, metadata, filter_idx, bucket1_idx); + std::string bucket2_key = getBucketKey(ns_key, metadata, filter_idx, bucket2_idx); + + std::string bucket1_data, bucket2_data; + s = readBucket(storage_, ctx, bucket1_key, metadata.bucket_size, &bucket1_data); + if (!s.ok()) return s; + + s = readBucket(storage_, ctx, bucket2_key, metadata.bucket_size, &bucket2_data); + if (!s.ok()) return s; + + // Try simple insertion in bucket1 or bucket2 + size_t slot_idx; + std::string *target_bucket_data = nullptr; + std::string target_bucket_key; + + if (tryInsertInBucket(bucket1_data, metadata.bucket_size, fingerprint, &slot_idx)) { + target_bucket_data = &bucket1_data; + target_bucket_key = bucket1_key; + } else if (tryInsertInBucket(bucket2_data, metadata.bucket_size, fingerprint, &slot_idx)) { + target_bucket_data = &bucket2_data; + target_bucket_key = bucket2_key; + } + + if (target_bucket_data != nullptr) { + // Successfully inserted, write to storage atomically + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + batch->Put(target_bucket_key, *target_bucket_data); + + metadata.size++; + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + if (!s.ok()) return s; + + *added = true; + return rocksdb::Status::OK(); + } + } + + // No space found in any filter, try kick-out on the last filter + uint16_t last_filter_idx = metadata.n_filters - 1; + uint64_t filter_capacity = metadata.base_capacity * intPow(metadata.expansion, last_filter_idx); + uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); + + bool inserted = false; + s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted); + if (s.ok() && inserted) { + // Update metadata after successful kick-out + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + + metadata.size++; + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + if (!s.ok()) return s; + + *added = true; + return rocksdb::Status::OK(); + } + + // Kick-out failed, try to expand if allowed + if (metadata.expansion > 0) { + s = expandFilter(ctx, ns_key, &metadata); + if (!s.ok()) { + return s; + } + + // Retry insertion in the new expanded filter + uint16_t new_filter_idx = metadata.n_filters - 1; + uint64_t new_filter_capacity = metadata.base_capacity * intPow(metadata.expansion, new_filter_idx); + uint32_t new_num_buckets = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size); + + uint32_t bucket1_idx = hash % new_num_buckets; + std::string bucket1_key = getBucketKey(ns_key, metadata, new_filter_idx, bucket1_idx); + + // Insert into first slot of the new filter's first bucket + std::string bucket1_data(metadata.bucket_size, 0); + bucket1_data[0] = fingerprint; + + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + batch->Put(bucket1_key, bucket1_data); + + metadata.size++; + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + if (!s.ok()) return s; + + *added = true; + return rocksdb::Status::OK(); + } + + // No expansion allowed and filter is full + *added = false; + return rocksdb::Status::Aborted("filter is full"); +} + +rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, + bool *inserted) { + *inserted = false; + + // Track modified buckets to write atomically at the end + std::unordered_map modified_buckets; + + // Start from bucket1 + uint32_t current_bucket_idx = hash % num_buckets; + uint8_t current_fp = fingerprint; + uint32_t victim_slot = 0; + + // Try to kick out existing fingerprints (all operations in memory) + for (uint16_t iteration = 0; iteration < metadata.max_iterations; ++iteration) { + // Read current bucket (check modified_buckets cache first) + std::string bucket_key = getBucketKey(ns_key, metadata, filter_index, current_bucket_idx); + std::string bucket_data; + + auto cached = modified_buckets.find(bucket_key); + if (cached != modified_buckets.end()) { + bucket_data = cached->second; + } else { + auto s = readBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); + if (!s.ok()) return s; + } + + // Swap fingerprint with victim slot + uint8_t old_fp = static_cast(bucket_data[victim_slot]); + bucket_data[victim_slot] = current_fp; + modified_buckets[bucket_key] = bucket_data; // Cache modification + current_fp = old_fp; + + // If kicked-out fingerprint is 0 (empty), we successfully inserted + if (current_fp == 0) { + *inserted = true; + break; + } + + // Calculate alternate bucket for the kicked-out fingerprint + // CRITICAL FIX: Use XOR hash calculation correctly + // We need to reconstruct the hash for the alternate bucket + // Since h2 = h1 ^ (fp * 0x5bd1e995), and we know bucket_idx = h1 % num_buckets + // We calculate alt_hash using the fingerprint and current bucket index + uint64_t current_hash = current_bucket_idx; // Approximate hash from bucket index + uint64_t alt_hash = CuckooFilter::GetAltHash(current_fp, current_hash); + uint32_t alt_bucket_idx = alt_hash % num_buckets; + + // Check if alternate bucket has empty slot + std::string alt_bucket_key = getBucketKey(ns_key, metadata, filter_index, alt_bucket_idx); + std::string alt_bucket_data; + + cached = modified_buckets.find(alt_bucket_key); + if (cached != modified_buckets.end()) { + alt_bucket_data = cached->second; + } else { + auto s = readBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); + if (!s.ok()) return s; + } + + size_t empty_slot; + if (tryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { + // Found empty slot! Cache this modification + modified_buckets[alt_bucket_key] = alt_bucket_data; + *inserted = true; + break; + } + + // Move to alternate bucket and try next victim slot + current_bucket_idx = alt_bucket_idx; + victim_slot = (victim_slot + 1) % metadata.bucket_size; + } + + // Write all modified buckets atomically + if (*inserted && !modified_buckets.empty()) { + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + + for (const auto &entry : modified_buckets) { + batch->Put(entry.first, entry.second); + } + + auto s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + if (!s.ok()) return s; + } + + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_key, + CuckooChainMetadata *metadata) { + if (metadata->n_filters >= UINT16_MAX) { + return rocksdb::Status::Aborted("maximum number of filters reached"); + } + + metadata->n_filters++; + INFO("CF.ADD: Expanded to {} filters", metadata->n_filters); + + // Write updated metadata + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.EXPAND", ""}); + batch->PutLogData(log_data.Encode()); + + std::string metadata_bytes; + metadata->Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); +} + } // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index f606f5f8c39..4e7ccdfd869 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -40,6 +40,10 @@ class CuckooChain : public Database { rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion); + // CF.ADD command - adds an item to the cuckoo filter + // Returns true if item was added, false if item already exists (probably) + rocksdb::Status Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added); + private: // Get metadata for the cuckoo filter rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); @@ -48,6 +52,14 @@ class CuckooChain : public Database { // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t bucket_index); + + // Kick-out insertion: try to insert fingerprint by evicting existing ones + rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, + uint64_t hash, bool *inserted); + + // Create a new sub-filter for expansion + rocksdb::Status expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); }; } // namespace redis \ No newline at end of file diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 56000f930d6..a78f54b2dab 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -350,4 +350,216 @@ TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { ASSERT_FALSE(s.ok()) << "Test case should have failed: " << tc.description; } } +} + +TEST_F(RedisCuckooFilterTest, AddBasic) { + // First reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Failed to reserve: " << s.ToString(); + + // Add an item + bool added = false; + s = cuckoo_->Add(*ctx_, key_, "item1", &added); + ASSERT_TRUE(s.ok()) << "Failed to add item: " << s.ToString(); + ASSERT_TRUE(added) << "Item should have been added"; +} + +TEST_F(RedisCuckooFilterTest, AddMultipleItems) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add multiple items + std::vector items = {"apple", "banana", "cherry", "date", "elderberry"}; + for (const auto& item : items) { + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + ASSERT_TRUE(s.ok()) << "Failed to add item: " << item; + ASSERT_TRUE(added) << "Item should have been added: " << item; + } +} + +TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { + // Try to add to a filter that doesn't exist + bool added = false; + auto s = cuckoo_->Add(*ctx_, "nonexistent_key", "item1", &added); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsNotFound()) << "Should return NotFound error"; +} + +TEST_F(RedisCuckooFilterTest, AddWithDifferentBucketSizes) { + // Test with different bucket sizes + std::vector bucket_sizes = {1, 2, 4, 8, 16}; + + for (size_t i = 0; i < bucket_sizes.size(); ++i) { + std::string test_key = key_ + "_bucket_" + std::to_string(bucket_sizes[i]); + auto s = cuckoo_->Reserve(*ctx_, test_key, 100, bucket_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "Failed to reserve with bucket_size=" << static_cast(bucket_sizes[i]); + + // Add some items + for (int j = 0; j < 10; ++j) { + std::string item = "item_" + std::to_string(j); + bool added = false; + s = cuckoo_->Add(*ctx_, test_key, item, &added); + ASSERT_TRUE(s.ok()) << "Failed to add item with bucket_size=" << static_cast(bucket_sizes[i]); + ASSERT_TRUE(added); + } + } +} + +TEST_F(RedisCuckooFilterTest, AddDuplicateItems) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add the same item multiple times (Cuckoo Filter allows duplicates) + std::string item = "duplicate_item"; + for (int i = 0; i < 5; ++i) { + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + ASSERT_TRUE(s.ok()) << "Iteration " << i; + ASSERT_TRUE(added) << "Should allow duplicate items"; + } +} + +TEST_F(RedisCuckooFilterTest, AddManyItems) { + // Reserve a filter with moderate capacity + uint64_t capacity = 1000; + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add many items (should succeed without hitting limits) + int num_items = 100; + for (int i = 0; i < num_items; ++i) { + std::string item = "item_" + std::to_string(i); + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + ASSERT_TRUE(s.ok()) << "Failed at item " << i; + ASSERT_TRUE(added); + } +} + +TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { + // Test with very small capacity to trigger potential full filter scenario + uint64_t small_capacity = 10; + auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0); // expansion=0 means no auto-growth + ASSERT_TRUE(s.ok()); + + // Add items up to capacity + // With bucket_size=2 and capacity=10, we should have very limited space + bool full = false; + for (int i = 0; i < 100; ++i) { + std::string item = "item_" + std::to_string(i); + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + + if (!s.ok()) { + // Filter is full + ASSERT_TRUE(s.IsAborted()) << "Should be Aborted status when full"; + full = true; + break; + } + } + + // We expect the filter to become full at some point + ASSERT_TRUE(full) << "Small filter should eventually become full"; +} + +TEST_F(RedisCuckooFilterTest, AddEmptyItem) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add empty string + bool added = false; + s = cuckoo_->Add(*ctx_, key_, "", &added); + ASSERT_TRUE(s.ok()) << "Should be able to add empty string"; + ASSERT_TRUE(added); +} + +TEST_F(RedisCuckooFilterTest, AddLongItem) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add a very long string + std::string long_item(10000, 'x'); + bool added = false; + s = cuckoo_->Add(*ctx_, key_, long_item, &added); + ASSERT_TRUE(s.ok()) << "Should be able to add long string"; + ASSERT_TRUE(added); +} + +TEST_F(RedisCuckooFilterTest, AddBinaryData) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add binary data (including null bytes) + std::string binary_item = std::string("\x00\x01\x02\xFF\xFE", 5); + bool added = false; + s = cuckoo_->Add(*ctx_, key_, binary_item, &added); + ASSERT_TRUE(s.ok()) << "Should be able to add binary data"; + ASSERT_TRUE(added); +} + +TEST_F(RedisCuckooFilterTest, AddWithVariousCapacities) { + // Test that Add works correctly with different filter capacities + std::vector capacities = {10, 100, 1000, 10000}; + + for (size_t i = 0; i < capacities.size(); ++i) { + std::string test_key = key_ + "_cap_" + std::to_string(capacities[i]); + auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add a reasonable number of items relative to capacity + int num_items = std::min(static_cast(capacities[i] / 10), 50); + for (int j = 0; j < num_items; ++j) { + std::string item = "item_" + std::to_string(j); + bool added = false; + s = cuckoo_->Add(*ctx_, test_key, item, &added); + ASSERT_TRUE(s.ok()) << "Failed for capacity=" << capacities[i] << ", item=" << j; + ASSERT_TRUE(added); + } + } +} + +TEST_F(RedisCuckooFilterTest, AddConsistentHashing) { + // Verify that the same item always hashes to the same buckets + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + std::string item = "test_item"; + + // Calculate hash for the item + uint64_t hash1 = redis::CuckooFilter::Hash(item); + uint64_t hash2 = redis::CuckooFilter::Hash(item); + + // Hashing should be deterministic + ASSERT_EQ(hash1, hash2) << "Hash should be consistent for the same item"; + + // Fingerprint should be consistent + uint8_t fp1 = redis::CuckooFilter::GenerateFingerprint(hash1); + uint8_t fp2 = redis::CuckooFilter::GenerateFingerprint(hash2); + ASSERT_EQ(fp1, fp2) << "Fingerprint should be consistent"; + + // Add the item + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(added); +} + +TEST_F(RedisCuckooFilterTest, AddDifferentItemsProduceDifferentHashes) { + // Verify that different items produce different hashes + std::vector items = {"item1", "item2", "item3", "different", "another"}; + std::set hashes; + + for (const auto& item : items) { + uint64_t hash = redis::CuckooFilter::Hash(item); + hashes.insert(hash); + } + + // All items should have unique hashes (with very high probability) + ASSERT_EQ(hashes.size(), items.size()) << "Different items should produce different hashes"; } \ No newline at end of file From f50125c3085783df26ccd3b07cc5efd5b602e8cb Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 4 Feb 2026 20:25:56 +0800 Subject: [PATCH 06/20] trigger CI checks From 65664feeadfcfe082c97730dd157f7225ba09565 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 4 Feb 2026 12:36:45 +0000 Subject: [PATCH 07/20] style: Apply clang-format to cuckoo filter files --- src/commands/cmd_cuckoo_filter.cc | 3 +-- src/storage/redis_metadata.cc | 1 - src/storage/redis_metadata.h | 12 ++++++++---- src/types/cuckoo_filter.h | 8 ++------ src/types/redis_cuckoo_chain.cc | 20 ++++++++------------ src/types/redis_cuckoo_chain.h | 8 ++++---- tests/cppunit/types/cuckoo_filter_test.cc | 9 +++------ 7 files changed, 26 insertions(+), 35 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 82364a60a37..7bc128d878a 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -135,8 +135,7 @@ class CommandCFAdd : public Commander { }; // Register the CF.RESERVE and CF.ADD commands -REDIS_REGISTER_COMMANDS(CuckooFilter, - MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), +REDIS_REGISTER_COMMANDS(CuckooFilter, MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), MakeCmdAttr("cf.add", 3, "write", 1, 1, 1)) } // namespace redis \ No newline at end of file diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 5f44b151ab6..f385aa6fe24 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -570,7 +570,6 @@ rocksdb::Status TimeSeriesMetadata::Decode(Slice *input) { return rocksdb::Status::OK(); } - void CuckooChainMetadata::Encode(std::string *dst) const { Metadata::Encode(dst); diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 8d8b5b21666..7d6e1c61575 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -59,8 +59,8 @@ enum RedisType : uint8_t { }; inline constexpr const std::array RedisTypeNames = { - "none", "string", "hash", "list", "set", "zset", "bitmap", - "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; + "none", "string", "hash", "list", "set", "zset", "bitmap", "sortedint", + "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; struct RedisTypes { RedisTypes(std::initializer_list list) { @@ -330,8 +330,12 @@ class CuckooChainMetadata : public Metadata { explicit CuckooChainMetadata(bool generate_version = true) : Metadata(kRedisCuckooFilter, generate_version), - n_filters(0), expansion(0), base_capacity(0), - bucket_size(0), max_iterations(0), num_deleted_items(0) {} + n_filters(0), + expansion(0), + base_capacity(0), + bucket_size(0), + max_iterations(0), + num_deleted_items(0) {} void Encode(std::string *dst) const override; using Metadata::Decode; diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index eb10eba4758..5a3aa326aa1 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -54,9 +54,7 @@ class CuckooFilter { // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) // Following RedisBloom: fp = hash % 255 + 1 - static uint8_t GenerateFingerprint(uint64_t hash) { - return static_cast(hash % 255 + 1); - } + static uint8_t GenerateFingerprint(uint64_t hash) { return static_cast(hash % 255 + 1); } // Calculate alternate hash using XOR (following RedisBloom) // h2 = h1 ^ (fp * 0x5bd1e995) @@ -77,9 +75,7 @@ class CuckooFilter { // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom) // This is the entry point for hashing items before they are inserted/checked in the filter - static uint64_t Hash(const char* data, size_t length) { - return HllMurMurHash64A(data, static_cast(length), 0); - } + static uint64_t Hash(const char* data, size_t length) { return HllMurMurHash64A(data, static_cast(length), 0); } // Convenience overload for std::string static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index e5a311d78da..ce0667e72cb 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -23,9 +23,8 @@ #include #include -#include "logging.h" - #include "cuckoo_filter.h" +#include "logging.h" namespace redis { @@ -34,8 +33,8 @@ rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); } -std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t bucket_index) { +std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t bucket_index) { // Create a sub-key that includes both filter index and bucket index std::string sub_key; PutFixed16(&sub_key, filter_index); @@ -141,9 +140,8 @@ static bool tryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uin } // Helper function: read bucket from storage and ensure correct size -static rocksdb::Status readBucket(engine::Storage *storage, engine::Context &ctx, - const std::string &bucket_key, uint8_t bucket_size, - std::string *bucket_data) { +static rocksdb::Status readBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, + uint8_t bucket_size, std::string *bucket_data) { rocksdb::ReadOptions read_opts = ctx.DefaultScanOptions(); auto s = storage->Get(ctx, read_opts, bucket_key, bucket_data); if (!s.ok() && !s.IsNotFound()) { @@ -312,9 +310,8 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co } rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, - bool *inserted) { + const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted) { *inserted = false; // Track modified buckets to write atomically at the end @@ -402,8 +399,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns return rocksdb::Status::OK(); } -rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_key, - CuckooChainMetadata *metadata) { +rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata) { if (metadata->n_filters >= UINT16_MAX) { return rocksdb::Status::Aborted("maximum number of filters reached"); } diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 4e7ccdfd869..9c5fa4744cd 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -50,13 +50,13 @@ class CuckooChain : public Database { // Generate key for a specific bucket in bucket-based storage // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} - std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t bucket_index); + std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t bucket_index); // Kick-out insertion: try to insert fingerprint by evicting existing ones rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, - uint64_t hash, bool *inserted); + uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, + bool *inserted); // Create a new sub-filter for expansion rocksdb::Status expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index a78f54b2dab..8864deb3f63 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -19,6 +19,7 @@ */ #include + #include #include "storage/redis_db.h" @@ -39,8 +40,7 @@ class RedisCuckooFilterTest : public TestBase { void SetUp() override { // Use a unique key for each test to avoid conflicts // Include test name to make debugging easier - const ::testing::TestInfo* const test_info = - ::testing::UnitTest::GetInstance()->current_test_info(); + const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); key_ = std::string("cf_test_") + test_info->name(); } @@ -195,7 +195,6 @@ TEST_F(RedisCuckooFilterTest, HashFunction) { ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } - TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { // Test capacity = 1 (too small, following RedisBloom behavior) // With load factor 0.955 and bucket_size=4, this would result in 0 buckets @@ -238,8 +237,7 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; - ASSERT_NE(s.ToString().find("already exists"), std::string::npos) - << "Error message should mention 'already exists'"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos) << "Error message should mention 'already exists'"; // Verify we can still create filters with different keys s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); @@ -251,7 +249,6 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } - TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { // expansion=0 means no auto-expansion when filter is full auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0); From 73ba1d406b3d13bcb656fd661f468d2b17c39fa3 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 6 Feb 2026 11:25:29 +0000 Subject: [PATCH 08/20] fix: Correct WriteBatchLogData constructor calls in CuckooChain --- src/types/redis_cuckoo_chain.cc | 16 ++++++++-------- src/types/redis_cuckoo_chain.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index ce0667e72cb..d1968ee7829 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -100,7 +100,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.RESERVE", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.RESERVE", user_key.ToString()}); batch->PutLogData(log_data.Encode()); // Store the metadata @@ -226,7 +226,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (target_bucket_data != nullptr) { // Successfully inserted, write to storage atomically auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); batch->PutLogData(log_data.Encode()); batch->Put(target_bucket_key, *target_bucket_data); @@ -249,11 +249,11 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); bool inserted = false; - s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted); + s = kickOutInsert(ctx, user_key, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted); if (s.ok() && inserted) { // Update metadata after successful kick-out auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); batch->PutLogData(log_data.Encode()); metadata.size++; @@ -288,7 +288,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co bucket1_data[0] = fingerprint; auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); batch->PutLogData(log_data.Encode()); batch->Put(bucket1_key, bucket1_data); @@ -309,7 +309,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co return rocksdb::Status::Aborted("filter is full"); } -rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, +rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &user_key, const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted) { *inserted = false; @@ -385,7 +385,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns // Write all modified buckets atomically if (*inserted && !modified_buckets.empty()) { auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); batch->PutLogData(log_data.Encode()); for (const auto &entry : modified_buckets) { @@ -409,7 +409,7 @@ rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_ // Write updated metadata auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.EXPAND", ""}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.EXPAND", ""}); batch->PutLogData(log_data.Encode()); std::string metadata_bytes; diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 9c5fa4744cd..c8ad5550bba 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -54,9 +54,9 @@ class CuckooChain : public Database { uint32_t bucket_index); // Kick-out insertion: try to insert fingerprint by evicting existing ones - rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, - bool *inserted); + rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &user_key, const Slice &ns_key, + const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, + uint8_t fingerprint, uint64_t hash, bool *inserted); // Create a new sub-filter for expansion rocksdb::Status expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); From 9ddfc8335b67c72488817a7ddda6e98424e46a11 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 4 May 2026 01:07:57 +0800 Subject: [PATCH 09/20] fix: ci --- src/storage/redis_metadata.cc | 4 ++- src/types/cuckoo_filter.h | 4 +-- src/types/redis_cuckoo_chain.cc | 41 +++++++++++------------ tests/cppunit/types/cuckoo_filter_test.cc | 16 +++++---- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index f385aa6fe24..e85d33e25fd 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -607,8 +607,10 @@ uint64_t CuckooChainMetadata::GetTotalCapacity() const { // Calculate total capacity across all filters uint64_t total = 0; + uint64_t filter_capacity = base_capacity; for (uint16_t i = 0; i < n_filters; i++) { - total += base_capacity * std::pow(expansion, i); + total += filter_capacity; + filter_capacity *= expansion; } return total; } diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 5a3aa326aa1..aa39f0936d7 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -44,7 +44,7 @@ class CuckooFilter { // Calculate the optimal number of buckets for the filter static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { // A load factor of 95.5% is chosen for the cuckoo filter - uint32_t num_buckets = static_cast(capacity / bucket_size / 0.955); + auto num_buckets = static_cast(static_cast(capacity) / bucket_size / 0.955L); // Round up to next power of 2 for better hash distribution if (num_buckets == 0) num_buckets = 1; uint32_t power = 1; @@ -81,4 +81,4 @@ class CuckooFilter { static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } }; -} // namespace redis \ No newline at end of file +} // namespace redis diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index d1968ee7829..97652a58c9a 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -20,7 +20,6 @@ #include "redis_cuckoo_chain.h" -#include #include #include "cuckoo_filter.h" @@ -119,7 +118,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key } // Helper function: calculate integer power (avoid std::pow for integers) -static uint64_t intPow(uint64_t base, uint16_t exp) { +static uint64_t IntPow(uint64_t base, uint16_t exp) { uint64_t result = 1; for (uint16_t i = 0; i < exp; ++i) { result *= base; @@ -128,10 +127,10 @@ static uint64_t intPow(uint64_t base, uint16_t exp) { } // Helper function: try to find empty slot in a bucket and insert fingerprint -static bool tryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uint8_t fingerprint, size_t *slot_idx) { +static bool TryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uint8_t fingerprint, size_t *slot_idx) { for (size_t i = 0; i < bucket_size; ++i) { if (static_cast(bucket_data[i]) == 0) { - bucket_data[i] = fingerprint; + bucket_data[i] = static_cast(fingerprint); *slot_idx = i; return true; } @@ -140,7 +139,7 @@ static bool tryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uin } // Helper function: read bucket from storage and ensure correct size -static rocksdb::Status readBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, +static rocksdb::Status ReadBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, uint8_t bucket_size, std::string *bucket_data) { rocksdb::ReadOptions read_opts = ctx.DefaultScanOptions(); auto s = storage->Get(ctx, read_opts, bucket_key, bucket_data); @@ -191,7 +190,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // This follows RedisBloom's behavior and is more efficient for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { // Calculate capacity for this filter using integer power - uint64_t filter_capacity = metadata.base_capacity * intPow(metadata.expansion, filter_idx); + uint64_t filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, filter_idx); uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); // Calculate bucket indices @@ -204,21 +203,21 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co std::string bucket2_key = getBucketKey(ns_key, metadata, filter_idx, bucket2_idx); std::string bucket1_data, bucket2_data; - s = readBucket(storage_, ctx, bucket1_key, metadata.bucket_size, &bucket1_data); + s = ReadBucket(storage_, ctx, bucket1_key, metadata.bucket_size, &bucket1_data); if (!s.ok()) return s; - s = readBucket(storage_, ctx, bucket2_key, metadata.bucket_size, &bucket2_data); + s = ReadBucket(storage_, ctx, bucket2_key, metadata.bucket_size, &bucket2_data); if (!s.ok()) return s; // Try simple insertion in bucket1 or bucket2 - size_t slot_idx; + size_t slot_idx = 0; std::string *target_bucket_data = nullptr; std::string target_bucket_key; - if (tryInsertInBucket(bucket1_data, metadata.bucket_size, fingerprint, &slot_idx)) { + if (TryInsertInBucket(bucket1_data, metadata.bucket_size, fingerprint, &slot_idx)) { target_bucket_data = &bucket1_data; target_bucket_key = bucket1_key; - } else if (tryInsertInBucket(bucket2_data, metadata.bucket_size, fingerprint, &slot_idx)) { + } else if (TryInsertInBucket(bucket2_data, metadata.bucket_size, fingerprint, &slot_idx)) { target_bucket_data = &bucket2_data; target_bucket_key = bucket2_key; } @@ -245,7 +244,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // No space found in any filter, try kick-out on the last filter uint16_t last_filter_idx = metadata.n_filters - 1; - uint64_t filter_capacity = metadata.base_capacity * intPow(metadata.expansion, last_filter_idx); + uint64_t filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, last_filter_idx); uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); bool inserted = false; @@ -277,7 +276,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Retry insertion in the new expanded filter uint16_t new_filter_idx = metadata.n_filters - 1; - uint64_t new_filter_capacity = metadata.base_capacity * intPow(metadata.expansion, new_filter_idx); + uint64_t new_filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, new_filter_idx); uint32_t new_num_buckets = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size); uint32_t bucket1_idx = hash % new_num_buckets; @@ -285,7 +284,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Insert into first slot of the new filter's first bucket std::string bucket1_data(metadata.bucket_size, 0); - bucket1_data[0] = fingerprint; + bucket1_data[0] = static_cast(fingerprint); auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); @@ -332,13 +331,13 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us if (cached != modified_buckets.end()) { bucket_data = cached->second; } else { - auto s = readBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); + auto s = ReadBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); if (!s.ok()) return s; } // Swap fingerprint with victim slot - uint8_t old_fp = static_cast(bucket_data[victim_slot]); - bucket_data[victim_slot] = current_fp; + auto old_fp = static_cast(bucket_data[victim_slot]); + bucket_data[victim_slot] = static_cast(current_fp); modified_buckets[bucket_key] = bucket_data; // Cache modification current_fp = old_fp; @@ -365,12 +364,12 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us if (cached != modified_buckets.end()) { alt_bucket_data = cached->second; } else { - auto s = readBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); + auto s = ReadBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); if (!s.ok()) return s; } - size_t empty_slot; - if (tryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { + size_t empty_slot = 0; + if (TryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { // Found empty slot! Cache this modification modified_buckets[alt_bucket_key] = alt_bucket_data; *inserted = true; @@ -419,4 +418,4 @@ rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_ return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -} // namespace redis \ No newline at end of file +} // namespace redis diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 8864deb3f63..7a8244675a5 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -32,6 +32,10 @@ class RedisCuckooFilterTest : public TestBase { explicit RedisCuckooFilterTest() : TestBase() { cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); } + RedisCuckooFilterTest(const RedisCuckooFilterTest &) = delete; + RedisCuckooFilterTest &operator=(const RedisCuckooFilterTest &) = delete; + RedisCuckooFilterTest(RedisCuckooFilterTest &&) = delete; + RedisCuckooFilterTest &operator=(RedisCuckooFilterTest &&) = delete; ~RedisCuckooFilterTest() override { // Ensure cuckoo_ is destroyed before storage_ cuckoo_.reset(); @@ -40,7 +44,7 @@ class RedisCuckooFilterTest : public TestBase { void SetUp() override { // Use a unique key for each test to avoid conflicts // Include test name to make debugging easier - const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); key_ = std::string("cf_test_") + test_info->name(); } @@ -121,7 +125,7 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; // Should be able to hold the capacity with 95.5% load factor - uint32_t expected_min = static_cast(capacity / bucket_size / 0.955); + auto expected_min = static_cast(static_cast(capacity) / bucket_size / 0.955L); ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; } @@ -337,7 +341,7 @@ TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { }; for (size_t i = 0; i < test_cases.size(); ++i) { - const auto& tc = test_cases[i]; + const auto &tc = test_cases[i]; std::string test_key = "combo_" + std::to_string(i); auto s = cuckoo_->Reserve(*ctx_, test_key, tc.capacity, tc.bucket_size, tc.max_iterations, tc.expansion); @@ -368,7 +372,7 @@ TEST_F(RedisCuckooFilterTest, AddMultipleItems) { // Add multiple items std::vector items = {"apple", "banana", "cherry", "date", "elderberry"}; - for (const auto& item : items) { + for (const auto &item : items) { bool added = false; s = cuckoo_->Add(*ctx_, key_, item, &added); ASSERT_TRUE(s.ok()) << "Failed to add item: " << item; @@ -552,11 +556,11 @@ TEST_F(RedisCuckooFilterTest, AddDifferentItemsProduceDifferentHashes) { std::vector items = {"item1", "item2", "item3", "different", "another"}; std::set hashes; - for (const auto& item : items) { + for (const auto &item : items) { uint64_t hash = redis::CuckooFilter::Hash(item); hashes.insert(hash); } // All items should have unique hashes (with very high probability) ASSERT_EQ(hashes.size(), items.size()) << "Different items should produce different hashes"; -} \ No newline at end of file +} From c89f2de829f141f24d6d00bbf5d92f0f4209b86a Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 4 May 2026 23:00:12 +0800 Subject: [PATCH 10/20] fix: ci --- tests/cppunit/types/cuckoo_filter_test.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 7a8244675a5..b95caaed7e5 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -28,14 +28,16 @@ #include "types/redis_cuckoo_chain.h" class RedisCuckooFilterTest : public TestBase { - protected: - explicit RedisCuckooFilterTest() : TestBase() { - cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); - } + public: RedisCuckooFilterTest(const RedisCuckooFilterTest &) = delete; RedisCuckooFilterTest &operator=(const RedisCuckooFilterTest &) = delete; RedisCuckooFilterTest(RedisCuckooFilterTest &&) = delete; RedisCuckooFilterTest &operator=(RedisCuckooFilterTest &&) = delete; + + protected: + explicit RedisCuckooFilterTest() : TestBase() { + cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); + } ~RedisCuckooFilterTest() override { // Ensure cuckoo_ is destroyed before storage_ cuckoo_.reset(); From 00abd42061f84a2eac3968e2618c5bb0add8b5d7 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Thu, 7 May 2026 02:23:42 +0800 Subject: [PATCH 11/20] fix: loses atomicity --- src/commands/cmd_cuckoo_filter.cc | 8 +- src/storage/redis_metadata.cc | 4 +- src/types/cuckoo_filter.h | 7 + src/types/redis_cuckoo_chain.cc | 206 ++++++++++------------ src/types/redis_cuckoo_chain.h | 19 +- tests/cppunit/types/cuckoo_filter_test.cc | 8 + 6 files changed, 129 insertions(+), 123 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 7bc128d878a..830c43e7771 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -31,7 +31,7 @@ class CommandCFReserve : public Commander { Status Parse(const std::vector &args) override { // CF.RESERVE key capacity [BUCKETSIZE bs] [MAXITERATIONS mi] [EXPANSION ex] if (args.size() < 3) { - return {Status::RedisParseErr, "wrong number of arguments"}; + return {Status::RedisParseErr, errWrongNumOfArguments}; } // Parse capacity (required) @@ -107,7 +107,7 @@ class CommandCFAdd : public Commander { Status Parse(const std::vector &args) override { // CF.ADD key item if (args.size() != 3) { - return {Status::RedisParseErr, "wrong number of arguments"}; + return {Status::RedisParseErr, errWrongNumOfArguments}; } return Commander::Parse(args); } @@ -128,7 +128,7 @@ class CommandCFAdd : public Commander { return {Status::RedisExecErr, "failed to add item to cuckoo filter"}; } - // Return 1 if added, 0 if already exists (though we don't check for duplicates in this version) + // Duplicate items are allowed, so successful insertions return 1. *output = redis::Integer(added ? 1 : 0); return Status::OK(); } @@ -138,4 +138,4 @@ class CommandCFAdd : public Commander { REDIS_REGISTER_COMMANDS(CuckooFilter, MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), MakeCmdAttr("cf.add", 3, "write", 1, 1, 1)) -} // namespace redis \ No newline at end of file +} // namespace redis diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 7582887574f..f9c91034e86 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -335,7 +335,7 @@ bool Metadata::IsSingleKVType() const { return Type() == kRedisString || Type() bool Metadata::IsEmptyableType() const { return IsSingleKVType() || Type() == kRedisStream || Type() == kRedisBloomFilter || Type() == kRedisHyperLogLog || - Type() == kRedisTDigest || Type() == kRedisTimeSeries; + Type() == kRedisTDigest || Type() == kRedisTimeSeries || Type() == kRedisCuckooFilter; } bool Metadata::Expired() const { return ExpireAt(util::GetTimeStampMS()); } @@ -661,7 +661,7 @@ rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { return s; } - if (input->size() < 21) { + if (input->size() < sizeof(uint16_t) * 3 + sizeof(uint64_t) * 2 + sizeof(uint8_t)) { return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); } diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index aa39f0936d7..211d02cb9fb 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -21,6 +21,7 @@ #pragma once #include +#include #include #include #include @@ -41,6 +42,12 @@ namespace redis { // - bucket_index = hash % num_buckets (only apply modulo when indexing) class CuckooFilter { public: + static bool IsCapacitySupported(uint64_t capacity, uint8_t bucket_size) { + if (bucket_size == 0) return false; + auto num_buckets = static_cast(capacity) / bucket_size / 0.955L; + return num_buckets <= static_cast(std::numeric_limits::max() / 2 + 1ULL); + } + // Calculate the optimal number of buckets for the filter static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { // A load factor of 95.5% is chosen for the cuckoo filter diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 97652a58c9a..2d01ca4f410 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -20,6 +20,7 @@ #include "redis_cuckoo_chain.h" +#include #include #include "cuckoo_filter.h" @@ -32,6 +33,25 @@ rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); } +rocksdb::Status CuckooChain::ValidateMetadata(const CuckooChainMetadata &metadata) { + if (metadata.n_filters == 0) { + return rocksdb::Status::Corruption("invalid metadata: n_filters is 0"); + } + if (metadata.base_capacity == 0) { + return rocksdb::Status::Corruption("invalid metadata: base_capacity is 0"); + } + if (metadata.bucket_size == 0) { + return rocksdb::Status::Corruption("invalid metadata: bucket_size is 0"); + } + if (metadata.max_iterations == 0) { + return rocksdb::Status::Corruption("invalid metadata: max_iterations is 0"); + } + if (!CuckooFilter::IsCapacitySupported(metadata.base_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: base_capacity is too large"); + } + return rocksdb::Status::OK(); +} + std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t bucket_index) { // Create a sub-key that includes both filter index and bucket index @@ -46,7 +66,6 @@ std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMeta rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion) { - // Validate parameters if (capacity == 0) { return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } @@ -64,25 +83,21 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key if (max_iterations == 0) { return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); } + if (!CuckooFilter::IsCapacitySupported(capacity, bucket_size)) { + return rocksdb::Status::InvalidArgument("capacity is too large"); + } std::string ns_key = AppendNamespacePrefix(user_key); - // Check if the key already exists - // Read without snapshot to ensure we see any committed data - std::string raw_value; - rocksdb::ReadOptions read_options; // No snapshot - auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); - if (s.ok()) { - return rocksdb::Status::InvalidArgument("the key already exists"); - } + CuckooChainMetadata existing_metadata; + auto s = getCuckooChainMetadata(ctx, ns_key, &existing_metadata); + if (!s.ok() && !s.IsNotFound()) return s; if (!s.IsNotFound()) { - return s; // Return other errors + return rocksdb::Status::InvalidArgument("the key already exists"); } - // Initialize metadata for the new cuckoo filter CuckooChainMetadata metadata; - // Initialize metadata for the new cuckoo filter metadata.size = 0; metadata.base_capacity = capacity; metadata.bucket_size = bucket_size; @@ -100,12 +115,13 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.RESERVE", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); + s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; - // Store the metadata std::string metadata_bytes; metadata.Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; // Note: With bucket-based storage, we don't pre-allocate all buckets // Buckets will be created lazily on first write @@ -117,13 +133,15 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -// Helper function: calculate integer power (avoid std::pow for integers) -static uint64_t IntPow(uint64_t base, uint16_t exp) { - uint64_t result = 1; - for (uint16_t i = 0; i < exp; ++i) { - result *= base; +static bool CalculateFilterCapacity(uint64_t base_capacity, uint8_t expansion, uint16_t filter_index, + uint64_t *filter_capacity) { + uint64_t capacity = base_capacity; + for (uint16_t i = 0; i < filter_index; ++i) { + if (expansion != 0 && capacity > std::numeric_limits::max() / expansion) return false; + capacity *= expansion; } - return result; + *filter_capacity = capacity; + return true; } // Helper function: try to find empty slot in a bucket and insert fingerprint @@ -141,8 +159,7 @@ static bool TryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uin // Helper function: read bucket from storage and ensure correct size static rocksdb::Status ReadBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, uint8_t bucket_size, std::string *bucket_data) { - rocksdb::ReadOptions read_opts = ctx.DefaultScanOptions(); - auto s = storage->Get(ctx, read_opts, bucket_key, bucket_data); + auto s = storage->Get(ctx, ctx.GetReadOptions(), bucket_key, bucket_data); if (!s.ok() && !s.IsNotFound()) { return s; } @@ -158,29 +175,13 @@ static rocksdb::Status ReadBucket(engine::Storage *storage, engine::Context &ctx rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { std::string ns_key = AppendNamespacePrefix(user_key); - // Get metadata - use read options without snapshot to see latest data CuckooChainMetadata metadata(false); - std::string raw_value; - rocksdb::ReadOptions read_options; // No snapshot - auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); - if (!s.ok()) { - if (s.IsNotFound()) { - return rocksdb::Status::NotFound("key not found"); - } - return s; - } - - // Decode metadata - Slice slice(raw_value); - s = metadata.Decode(&slice); - if (!s.ok()) { - return s; - } + auto s = getCuckooChainMetadata(ctx, ns_key, &metadata); + if (s.IsNotFound()) return rocksdb::Status::NotFound("key not found"); + if (!s.ok()) return s; - // Validate metadata - if (metadata.n_filters == 0) { - return rocksdb::Status::Corruption("invalid metadata: n_filters is 0"); - } + s = ValidateMetadata(metadata); + if (!s.ok()) return s; // Calculate hash and fingerprint for the item uint64_t hash = CuckooFilter::Hash(item.data(), item.size()); @@ -189,8 +190,11 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Try to insert in each sub-filter (starting from the first/smallest one) // This follows RedisBloom's behavior and is more efficient for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { - // Calculate capacity for this filter using integer power - uint64_t filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, filter_idx); + uint64_t filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_idx, &filter_capacity) || + !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + } uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); // Calculate bucket indices @@ -226,13 +230,16 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Successfully inserted, write to storage atomically auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); - batch->Put(target_bucket_key, *target_bucket_data); + s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; + s = batch->Put(target_bucket_key, *target_bucket_data); + if (!s.ok()) return s; metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); if (!s.ok()) return s; @@ -244,21 +251,33 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // No space found in any filter, try kick-out on the last filter uint16_t last_filter_idx = metadata.n_filters - 1; - uint64_t filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, last_filter_idx); + uint64_t filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, last_filter_idx, &filter_capacity) || + !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + } uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); bool inserted = false; - s = kickOutInsert(ctx, user_key, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted); + std::unordered_map modified_buckets; + s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted, + &modified_buckets); if (s.ok() && inserted) { - // Update metadata after successful kick-out auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); + s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; + + for (const auto &entry : modified_buckets) { + s = batch->Put(entry.first, entry.second); + if (!s.ok()) return s; + } metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); if (!s.ok()) return s; @@ -269,14 +288,18 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Kick-out failed, try to expand if allowed if (metadata.expansion > 0) { - s = expandFilter(ctx, ns_key, &metadata); - if (!s.ok()) { - return s; - } + if (metadata.n_filters >= UINT16_MAX) return rocksdb::Status::Aborted("maximum number of filters reached"); + + metadata.n_filters++; + INFO("CF.ADD: Expanded to {} filters", metadata.n_filters); // Retry insertion in the new expanded filter uint16_t new_filter_idx = metadata.n_filters - 1; - uint64_t new_filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, new_filter_idx); + uint64_t new_filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, new_filter_idx, &new_filter_capacity) || + !CuckooFilter::IsCapacitySupported(new_filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Aborted("maximum filter capacity reached"); + } uint32_t new_num_buckets = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size); uint32_t bucket1_idx = hash % new_num_buckets; @@ -288,13 +311,16 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); - batch->Put(bucket1_key, bucket1_data); + s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; + s = batch->Put(bucket1_key, bucket1_data); + if (!s.ok()) return s; metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); if (!s.ok()) return s; @@ -308,13 +334,12 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co return rocksdb::Status::Aborted("filter is full"); } -rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &user_key, const Slice &ns_key, +rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted) { + uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted, + std::unordered_map *modified_buckets) { *inserted = false; - - // Track modified buckets to write atomically at the end - std::unordered_map modified_buckets; + modified_buckets->clear(); // Start from bucket1 uint32_t current_bucket_idx = hash % num_buckets; @@ -327,8 +352,8 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us std::string bucket_key = getBucketKey(ns_key, metadata, filter_index, current_bucket_idx); std::string bucket_data; - auto cached = modified_buckets.find(bucket_key); - if (cached != modified_buckets.end()) { + auto cached = modified_buckets->find(bucket_key); + if (cached != modified_buckets->end()) { bucket_data = cached->second; } else { auto s = ReadBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); @@ -338,7 +363,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us // Swap fingerprint with victim slot auto old_fp = static_cast(bucket_data[victim_slot]); bucket_data[victim_slot] = static_cast(current_fp); - modified_buckets[bucket_key] = bucket_data; // Cache modification + (*modified_buckets)[bucket_key] = bucket_data; current_fp = old_fp; // If kicked-out fingerprint is 0 (empty), we successfully inserted @@ -360,8 +385,8 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us std::string alt_bucket_key = getBucketKey(ns_key, metadata, filter_index, alt_bucket_idx); std::string alt_bucket_data; - cached = modified_buckets.find(alt_bucket_key); - if (cached != modified_buckets.end()) { + cached = modified_buckets->find(alt_bucket_key); + if (cached != modified_buckets->end()) { alt_bucket_data = cached->second; } else { auto s = ReadBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); @@ -370,8 +395,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us size_t empty_slot = 0; if (TryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { - // Found empty slot! Cache this modification - modified_buckets[alt_bucket_key] = alt_bucket_data; + (*modified_buckets)[alt_bucket_key] = alt_bucket_data; *inserted = true; break; } @@ -381,41 +405,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us victim_slot = (victim_slot + 1) % metadata.bucket_size; } - // Write all modified buckets atomically - if (*inserted && !modified_buckets.empty()) { - auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); - - for (const auto &entry : modified_buckets) { - batch->Put(entry.first, entry.second); - } - - auto s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); - if (!s.ok()) return s; - } - return rocksdb::Status::OK(); } -rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata) { - if (metadata->n_filters >= UINT16_MAX) { - return rocksdb::Status::Aborted("maximum number of filters reached"); - } - - metadata->n_filters++; - INFO("CF.ADD: Expanded to {} filters", metadata->n_filters); - - // Write updated metadata - auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.EXPAND", ""}); - batch->PutLogData(log_data.Encode()); - - std::string metadata_bytes; - metadata->Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); - - return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); -} - } // namespace redis diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index c8ad5550bba..8f3885af026 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -20,6 +20,8 @@ #pragma once +#include + #include "cuckoo_filter.h" #include "storage/redis_db.h" #include "storage/redis_metadata.h" @@ -40,26 +42,25 @@ class CuckooChain : public Database { rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion); - // CF.ADD command - adds an item to the cuckoo filter - // Returns true if item was added, false if item already exists (probably) + // CF.ADD command - adds an item to the cuckoo filter. + // Duplicate items are allowed, so added is true whenever insertion succeeds. rocksdb::Status Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added); private: // Get metadata for the cuckoo filter rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); + static rocksdb::Status ValidateMetadata(const CuckooChainMetadata &metadata); + // Generate key for a specific bucket in bucket-based storage // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t bucket_index); // Kick-out insertion: try to insert fingerprint by evicting existing ones - rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &user_key, const Slice &ns_key, - const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, - uint8_t fingerprint, uint64_t hash, bool *inserted); - - // Create a new sub-filter for expansion - rocksdb::Status expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); + rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, + bool *inserted, std::unordered_map *modified_buckets); }; -} // namespace redis \ No newline at end of file +} // namespace redis diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index b95caaed7e5..93d8c693bee 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -20,6 +20,7 @@ #include +#include #include #include "storage/redis_db.h" @@ -289,6 +290,13 @@ TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; } +TEST_F(RedisCuckooFilterTest, ReserveRejectsCapacityOverflow) { + auto s = cuckoo_->Reserve(*ctx_, key_, std::numeric_limits::max(), 1, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("capacity is too large"), std::string::npos); +} + TEST_F(RedisCuckooFilterTest, ReserveMaxIterationsBoundary) { // Test different max_iterations values std::vector iterations = {1, 10, 100, 500, 1000, 5000, 65535}; From 67f4c0d4c1693f7e66ee5fe104520b59cc105d1a Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Thu, 7 May 2026 11:08:53 +0800 Subject: [PATCH 12/20] fix: create new filter in CuckooChain::Add when key is not found --- src/commands/cmd_cuckoo_filter.cc | 9 +----- src/types/cuckoo_filter.h | 37 ++++++++++++++++------- src/types/redis_cuckoo_chain.cc | 31 +++++++++++++++---- tests/cppunit/types/cuckoo_filter_test.cc | 23 ++++++++++---- 4 files changed, 69 insertions(+), 31 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 830c43e7771..6213e2a33af 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -118,14 +118,7 @@ class CommandCFAdd : public Commander { auto s = cuckoo_db.Add(ctx, args_[1], args_[2], &added); if (!s.ok()) { - if (s.IsNotFound()) { - return {Status::RedisExecErr, "key not found"}; - } - if (s.IsAborted()) { - // Filter is full - return {Status::RedisExecErr, s.ToString()}; - } - return {Status::RedisExecErr, "failed to add item to cuckoo filter"}; + return {Status::RedisExecErr, s.ToString()}; } // Duplicate items are allowed, so successful insertions return 1. diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 211d02cb9fb..3d0688491ef 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -20,6 +20,8 @@ #pragma once +#include + #include #include #include @@ -43,20 +45,33 @@ namespace redis { class CuckooFilter { public: static bool IsCapacitySupported(uint64_t capacity, uint8_t bucket_size) { - if (bucket_size == 0) return false; - auto num_buckets = static_cast(capacity) / bucket_size / 0.955L; - return num_buckets <= static_cast(std::numeric_limits::max() / 2 + 1ULL); + uint32_t num_buckets = 0; + return OptimalNumBuckets(capacity, bucket_size, &num_buckets).ok(); } - // Calculate the optimal number of buckets for the filter - static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { - // A load factor of 95.5% is chosen for the cuckoo filter - auto num_buckets = static_cast(static_cast(capacity) / bucket_size / 0.955L); - // Round up to next power of 2 for better hash distribution - if (num_buckets == 0) num_buckets = 1; + // Calculate the optimal number of buckets for the filter. + static rocksdb::Status OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size, uint32_t* num_buckets) { + if (bucket_size == 0) { + return rocksdb::Status::InvalidArgument("bucket_size must be larger than 0"); + } + + constexpr long double kLoadFactor = 0.955L; + constexpr uint64_t kMaxSupportedBuckets = std::numeric_limits::max() / 2 + 1ULL; + auto max_supported_capacity = static_cast(kMaxSupportedBuckets * bucket_size * kLoadFactor); + if (capacity > max_supported_capacity) { + return rocksdb::Status::InvalidArgument("capacity is too large"); + } + + auto exact_buckets = static_cast(capacity) / bucket_size / kLoadFactor; + auto required_buckets = static_cast(exact_buckets); + if (static_cast(required_buckets) < exact_buckets) required_buckets++; + if (required_buckets == 0) required_buckets = 1; + + // Round up to next power of 2 for better hash distribution. uint32_t power = 1; - while (power < num_buckets) power <<= 1; - return power; + while (power < required_buckets) power <<= 1; + *num_buckets = power; + return rocksdb::Status::OK(); } // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 2d01ca4f410..4535a4394ea 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -107,7 +107,9 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key metadata.num_deleted_items = 0; // Calculate the number of buckets needed for this filter - uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + uint32_t num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); + if (!s.ok()) return s; INFO("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); @@ -177,8 +179,19 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co CuckooChainMetadata metadata(false); auto s = getCuckooChainMetadata(ctx, ns_key, &metadata); - if (s.IsNotFound()) return rocksdb::Status::NotFound("key not found"); - if (!s.ok()) return s; + if (s.IsNotFound()) { + // RedisBloom CF.ADD auto-creates the filter when the key does not exist: + // https://redis.io/docs/latest/commands/cf.add/ + metadata = CuckooChainMetadata(); + metadata.size = 0; + metadata.base_capacity = kCFDefaultCapacity; + metadata.bucket_size = kCFDefaultBucketSize; + metadata.max_iterations = kCFDefaultMaxIterations; + metadata.expansion = kCFDefaultExpansion; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + } + if (!s.ok() && !s.IsNotFound()) return s; s = ValidateMetadata(metadata); if (!s.ok()) return s; @@ -195,7 +208,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); } - uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); + uint32_t num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size, &num_buckets); + if (!s.ok()) return s; // Calculate bucket indices uint32_t bucket1_idx = hash % num_buckets; @@ -256,7 +271,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); } - uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); + uint32_t num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size, &num_buckets); + if (!s.ok()) return s; bool inserted = false; std::unordered_map modified_buckets; @@ -300,7 +317,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co !CuckooFilter::IsCapacitySupported(new_filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Aborted("maximum filter capacity reached"); } - uint32_t new_num_buckets = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size); + uint32_t new_num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size, &new_num_buckets); + if (!s.ok()) return s; uint32_t bucket1_idx = hash % new_num_buckets; std::string bucket1_key = getBucketKey(ns_key, metadata, new_filter_idx, bucket1_idx); diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 93d8c693bee..e40603c019d 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -122,7 +122,9 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { uint64_t capacity = 1000; uint8_t bucket_size = 4; - uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + uint32_t num_buckets = 0; + auto s = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); // Should be a power of 2 ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; @@ -277,7 +279,9 @@ TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { ASSERT_TRUE(s.ok()) << "Should handle large capacity"; // Verify num_buckets calculation doesn't overflow - uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4); + uint32_t num_buckets = 0; + s = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_GT(num_buckets, 0) << "Should not overflow to 0"; ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Should be power of 2"; @@ -286,7 +290,8 @@ TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { s = cuckoo_->Reserve(*ctx_, "huge_key", huge_capacity, 4, 500, 2); ASSERT_TRUE(s.ok()) << "Should handle 100M capacity"; - num_buckets = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4); + s = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; } @@ -325,7 +330,9 @@ TEST_F(RedisCuckooFilterTest, ReserveEdgeCaseCapacities) { ASSERT_TRUE(s.ok()) << "capacity=" << small_capacities[i] << " should be valid"; // Verify at least one bucket is created - uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4); + uint32_t num_buckets = 0; + s = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_GE(num_buckets, 1) << "Should have at least 1 bucket for capacity=" << small_capacities[i]; } } @@ -391,11 +398,15 @@ TEST_F(RedisCuckooFilterTest, AddMultipleItems) { } TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { - // Try to add to a filter that doesn't exist bool added = false; auto s = cuckoo_->Add(*ctx_, "nonexistent_key", "item1", &added); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(added) << "CF.ADD should create the filter when the key does not exist"; + + s = cuckoo_->Reserve(*ctx_, "nonexistent_key", 1000, 4, 500, 2); ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsNotFound()) << "Should return NotFound error"; + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } TEST_F(RedisCuckooFilterTest, AddWithDifferentBucketSizes) { From 23beb86c5e3a4658e0b759421877e45c9b67a0d7 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Thu, 7 May 2026 15:10:57 +0800 Subject: [PATCH 13/20] optimize test --- src/commands/cmd_cuckoo_filter.cc | 7 +- src/types/redis_cuckoo_chain.cc | 7 +- src/types/redis_cuckoo_chain.h | 5 +- tests/cppunit/types/cuckoo_filter_test.cc | 554 ++++++------------ .../unit/type/bloom/cuckoo_filter_test.go | 55 ++ 5 files changed, 244 insertions(+), 384 deletions(-) create mode 100644 tests/gocase/unit/type/bloom/cuckoo_filter_test.go diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 6213e2a33af..6bbe5cb2524 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -66,11 +66,14 @@ class CommandCFReserve : public Commander { return {Status::RedisParseErr, "max iterations must be larger than 0"}; } } else if (parser.EatEqICase("EXPANSION")) { - auto parse_expansion = parser.TakeInt(); + auto parse_expansion = parser.TakeInt(); if (!parse_expansion.IsOK()) { return {Status::RedisParseErr, "invalid expansion factor"}; } expansion_ = parse_expansion.GetValue(); + if (expansion_ > kCFMaxExpansion) { + return {Status::RedisParseErr, "expansion must be between 0 and 32768"}; + } } else { return {Status::RedisParseErr, errInvalidSyntax}; } @@ -99,7 +102,7 @@ class CommandCFReserve : public Commander { uint64_t capacity_ = kCFDefaultCapacity; uint8_t bucket_size_ = kCFDefaultBucketSize; uint16_t max_iterations_ = kCFDefaultMaxIterations; - uint8_t expansion_ = kCFDefaultExpansion; + uint16_t expansion_ = kCFDefaultExpansion; }; class CommandCFAdd : public Commander { diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 4535a4394ea..0e817bea5cd 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -65,7 +65,7 @@ std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMeta } rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, - uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion) { + uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion) { if (capacity == 0) { return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } @@ -83,6 +83,9 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key if (max_iterations == 0) { return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); } + if (expansion > kCFMaxExpansion) { + return rocksdb::Status::InvalidArgument("expansion must be between 0 and 32768"); + } if (!CuckooFilter::IsCapacitySupported(capacity, bucket_size)) { return rocksdb::Status::InvalidArgument("capacity is too large"); } @@ -135,7 +138,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -static bool CalculateFilterCapacity(uint64_t base_capacity, uint8_t expansion, uint16_t filter_index, +static bool CalculateFilterCapacity(uint64_t base_capacity, uint16_t expansion, uint16_t filter_index, uint64_t *filter_capacity) { uint64_t capacity = base_capacity; for (uint16_t i = 0; i < filter_index; ++i) { diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 8f3885af026..2e02c13fbb7 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -32,7 +32,8 @@ namespace redis { const uint32_t kCFDefaultCapacity = 1024; const uint8_t kCFDefaultBucketSize = 4; // 4 fingerprints per bucket const uint16_t kCFDefaultMaxIterations = 500; -const uint8_t kCFDefaultExpansion = 2; +const uint16_t kCFDefaultExpansion = 2; +const uint16_t kCFMaxExpansion = 32768; class CuckooChain : public Database { public: @@ -40,7 +41,7 @@ class CuckooChain : public Database { // CF.RESERVE command - creates a new cuckoo filter with specified capacity rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, - uint16_t max_iterations, uint8_t expansion); + uint16_t max_iterations, uint16_t expansion); // CF.ADD command - adds an item to the cuckoo filter. // Duplicate items are allowed, so added is true whenever insertion succeeds. diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index e40603c019d..b9f355bb7f1 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -22,6 +22,7 @@ #include #include +#include #include "storage/redis_db.h" #include "storage/redis_metadata.h" @@ -38,32 +39,126 @@ class RedisCuckooFilterTest : public TestBase { protected: explicit RedisCuckooFilterTest() : TestBase() { cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); + db_ = std::make_unique(storage_.get(), "cuckoo_ns"); } ~RedisCuckooFilterTest() override { // Ensure cuckoo_ is destroyed before storage_ cuckoo_.reset(); + db_.reset(); } void SetUp() override { - // Use a unique key for each test to avoid conflicts - // Include test name to make debugging easier const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); key_ = std::string("cf_test_") + test_info->name(); } + void VerifyMetadata(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, + uint16_t expansion, uint64_t size, uint16_t n_filters, uint64_t num_deleted_items = 0) { + std::string ns_key = db_->AppendNamespacePrefix(key); + CuckooChainMetadata metadata(false); + auto s = db_->GetMetadata(*ctx_, {kRedisCuckooFilter}, ns_key, &metadata); + ASSERT_TRUE(s.ok()) << key << ": metadata not found"; + EXPECT_EQ(metadata.Type(), kRedisCuckooFilter) << key; + EXPECT_EQ(metadata.base_capacity, capacity) << key; + EXPECT_EQ(metadata.bucket_size, bucket_size) << key; + EXPECT_EQ(metadata.max_iterations, max_iterations) << key; + EXPECT_EQ(metadata.expansion, expansion) << key; + EXPECT_EQ(metadata.size, size) << key; + EXPECT_EQ(metadata.n_filters, n_filters) << key; + EXPECT_EQ(metadata.num_deleted_items, num_deleted_items) << key; + } + + void ReserveAndVerify(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, + uint16_t expansion) { + auto s = cuckoo_->Reserve(*ctx_, key, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << key << ": " << s.ToString(); + VerifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0); + } + + void AddAndVerify(const std::string &key, const std::string &item, uint64_t capacity, uint8_t bucket_size, + uint16_t max_iterations, uint16_t expansion, uint64_t expected_size, uint16_t n_filters = 1) { + bool added = false; + auto s = cuckoo_->Add(*ctx_, key, item, &added); + ASSERT_TRUE(s.ok()) << key << ": add '" << item << "' failed: " << s.ToString(); + ASSERT_TRUE(added) << key << ": item '" << item << "' should have been added"; + VerifyMetadata(key, capacity, bucket_size, max_iterations, expansion, expected_size, n_filters, 0); + } + std::unique_ptr cuckoo_; + std::unique_ptr db_; std::string key_; }; -TEST_F(RedisCuckooFilterTest, ReserveBasic) { - // Test basic reserve operation - uint64_t capacity = 1000; - uint8_t bucket_size = 4; - uint16_t max_iterations = 500; - uint8_t expansion = 2; +TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { + struct InvalidTestCase { + std::string key; + uint64_t capacity; + uint8_t bucket_size; + uint16_t max_iterations; + uint16_t expansion; + std::string err; + }; - auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); - ASSERT_TRUE(s.ok()) << "Failed to reserve cuckoo filter: " << s.ToString(); + std::vector invalid_test_cases = { + {"zero_capacity", 0, 4, 500, 2, "capacity must be larger than 0"}, + {"capacity_too_small", 1, 4, 500, 2, "capacity must be at least 2"}, + {"zero_bucket_size", 1000, 0, 500, 2, "bucket_size must be between 1 and 255"}, + {"zero_max_iterations", 1000, 4, 0, 2, "max_iterations must be larger than 0"}, + {"capacity_too_large", std::numeric_limits::max(), 4, 500, 2, "capacity is too large"}, + {"expansion_too_large", 1000, 4, 500, redis::kCFMaxExpansion + 1, "expansion must be between 0 and 32768"}, + }; + + for (const auto &test_case : invalid_test_cases) { + auto s = cuckoo_->Reserve(*ctx_, test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, + test_case.expansion); + ASSERT_FALSE(s.ok()) << test_case.key; + ASSERT_TRUE(s.IsInvalidArgument()) << test_case.key << ": " << s.ToString(); + ASSERT_NE(s.ToString().find(test_case.err), std::string::npos) << test_case.key << ": " << s.ToString(); + } +} + +TEST_F(RedisCuckooFilterTest, ReserveValidParams) { + struct TestCase { + std::string key; + uint64_t capacity; + uint8_t bucket_size; + uint16_t max_iterations; + uint16_t expansion; + }; + + std::vector test_cases = { + {"min_capacity", 2, 4, 500, 2}, + {"min_bucket_size", 1000, 1, 500, 2}, + {"max_bucket_size", 1000, 255, 500, 2}, + {"min_max_iterations", 1000, 4, 1, 2}, + {"max_max_iterations", 1000, 4, 65535, 2}, + // RedisBloom allows expansion=0; it disables creating additional sub-filters. + {"no_auto_expansion", 2, 1, 1, 0}, + {"capacity_3", 3, 4, 500, 2}, + {"capacity_10", 10, 4, 500, 2}, + {"capacity_100", 100, 4, 500, 2}, + {"capacity_10000", 10000, 4, 500, 2}, + {"capacity_100000", 100000, 4, 500, 2}, + {"bucket_size_2", 1000, 2, 500, 2}, + {"bucket_size_8", 1000, 8, 500, 2}, + {"bucket_size_16", 1000, 16, 500, 2}, + {"bucket_size_128", 1000, 128, 500, 2}, + {"no_auto_expansion_regular", 1000, 4, 500, 0}, + // expansion=1 is valid and creates additional sub-filters with the same capacity. + {"expansion_1", 1000, 4, 500, 1}, + {"expansion_8", 1000, 4, 500, 8}, + {"expansion_256", 1000, 4, 500, 256}, + {"max_expansion", 1000, 4, 500, redis::kCFMaxExpansion}, + {"capacity_10M", 10000000, 4, 500, 2}, + {"capacity_100M", 100000000, 4, 500, 2}, + {"max_params", 100000, 255, 65535, redis::kCFMaxExpansion}, + {"mixed_params", 50000, 8, 1000, 4}, + }; + + for (const auto &test_case : test_cases) { + ReserveAndVerify(test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, + test_case.expansion); + } } TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { @@ -78,60 +173,31 @@ TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } -TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { - // Test with zero capacity - auto s = cuckoo_->Reserve(*ctx_, key_, 0, 4, 500, 2); - ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsInvalidArgument()); - - // Test with zero bucket size - s = cuckoo_->Reserve(*ctx_, "key2", 1000, 0, 500, 2); - ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsInvalidArgument()); - - // Test with zero max iterations - s = cuckoo_->Reserve(*ctx_, "key3", 1000, 4, 0, 2); - ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsInvalidArgument()); -} - -TEST_F(RedisCuckooFilterTest, ReserveVariousCapacities) { - // Test with different capacities - std::vector capacities = {100, 1000, 10000, 100000}; - - for (size_t i = 0; i < capacities.size(); ++i) { - std::string test_key = key_ + "_" + std::to_string(i); - auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); - ASSERT_TRUE(s.ok()) << "Failed for capacity " << capacities[i]; - } -} - -TEST_F(RedisCuckooFilterTest, ReserveWithDifferentBucketSizes) { - // Test with different valid bucket sizes - std::vector bucket_sizes = {1, 2, 4, 8, 16}; - - for (size_t i = 0; i < bucket_sizes.size(); ++i) { - std::string test_key = key_ + "_bucket_" + std::to_string(i); - auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, bucket_sizes[i], 500, 2); - ASSERT_TRUE(s.ok()) << "Failed for bucket size " << static_cast(bucket_sizes[i]); - } -} - TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { - // Test the static helper function - uint64_t capacity = 1000; - uint8_t bucket_size = 4; - - uint32_t num_buckets = 0; - auto s = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); - ASSERT_TRUE(s.ok()) << s.ToString(); + struct TestCase { + uint64_t capacity; + uint8_t bucket_size; + uint32_t expected_num_buckets; + }; - // Should be a power of 2 - ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; + std::vector test_cases = { + {0, 4, 1}, {1, 4, 1}, {2, 1, 4}, {4, 4, 2}, {488, 4, 128}, {489, 4, 256}, + {977, 4, 256}, {978, 4, 512}, {1000, 4, 512}, {1024, 4, 512}, {1000, 1, 2048}, {1000, 16, 128}, + }; - // Should be able to hold the capacity with 95.5% load factor - auto expected_min = static_cast(static_cast(capacity) / bucket_size / 0.955L); - ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; + for (const auto &test_case : test_cases) { + uint32_t num_buckets = 0; + auto s = redis::CuckooFilter::OptimalNumBuckets(test_case.capacity, test_case.bucket_size, &num_buckets); + ASSERT_TRUE(s.ok()) << "capacity=" << test_case.capacity + << ", bucket_size=" << static_cast(test_case.bucket_size) << ": " << s.ToString(); + ASSERT_EQ(num_buckets, test_case.expected_num_buckets) + << "capacity=" << test_case.capacity << ", bucket_size=" << static_cast(test_case.bucket_size); + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; + + auto expected_min = + static_cast(static_cast(test_case.capacity) / test_case.bucket_size / 0.955L); + ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; + } } TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { @@ -151,24 +217,27 @@ TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { } TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { - uint32_t num_buckets = 1024; + std::vector num_buckets_cases = {1, 2, 128, 256, 512, 1024, 2048}; // Test GetAltHash symmetry at hash level (following RedisBloom design) // h2 = GetAltHash(fp, h1) // h1 = GetAltHash(fp, h2) <- this is the symmetry property - for (uint64_t hash = 0; hash < 100; ++hash) { - for (uint8_t fp = 1; fp < 10; ++fp) { - uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fp, hash); - - // Applying GetAltHash twice should return original hash - uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fp, alt_hash); - ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; - - // Both hashes should map to valid bucket indices - uint32_t bucket1 = hash % num_buckets; - uint32_t bucket2 = alt_hash % num_buckets; - ASSERT_LT(bucket1, num_buckets) << "Bucket 1 out of range"; - ASSERT_LT(bucket2, num_buckets) << "Bucket 2 out of range"; + for (auto num_buckets : num_buckets_cases) { + for (uint64_t hash = 0; hash < 100; ++hash) { + for (uint16_t fp = 1; fp <= 255; ++fp) { + auto fingerprint = static_cast(fp); + uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fingerprint, hash); + + // Applying GetAltHash twice should return original hash + uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fingerprint, alt_hash); + ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; + + // Both hashes should map to valid bucket indices + uint32_t bucket1 = hash % num_buckets; + uint32_t bucket2 = alt_hash % num_buckets; + ASSERT_LT(bucket1, num_buckets) << "Bucket 1 out of range"; + ASSERT_LT(bucket2, num_buckets) << "Bucket 2 out of range"; + } } } } @@ -204,38 +273,11 @@ TEST_F(RedisCuckooFilterTest, HashFunction) { ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } -TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { - // Test capacity = 1 (too small, following RedisBloom behavior) - // With load factor 0.955 and bucket_size=4, this would result in 0 buckets - auto s = cuckoo_->Reserve(*ctx_, key_, 1, 4, 500, 2); - ASSERT_FALSE(s.ok()) << "Should reject capacity = 1"; - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_NE(s.ToString().find("at least 2"), std::string::npos) << "Error message should mention minimum capacity"; -} - -TEST_F(RedisCuckooFilterTest, ReserveBucketSizeBoundary) { - // Test valid upper boundary (bucket_size is uint8_t, max = 255) - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 255, 500, 2); - ASSERT_TRUE(s.ok()) << "bucket_size=255 should be valid"; - - // Test bucket_size = 1 (minimum valid value) - s = cuckoo_->Reserve(*ctx_, "key_bs1", 1000, 1, 500, 2); - ASSERT_TRUE(s.ok()) << "bucket_size=1 should be valid"; - - // Test common power-of-2 bucket sizes - std::vector valid_sizes = {2, 4, 8, 16, 32, 64, 128}; - for (size_t i = 0; i < valid_sizes.size(); ++i) { - std::string test_key = "key_bs_" + std::to_string(valid_sizes[i]); - s = cuckoo_->Reserve(*ctx_, test_key, 1000, valid_sizes[i], 500, 2); - ASSERT_TRUE(s.ok()) << "bucket_size=" << static_cast(valid_sizes[i]) << " should be valid"; - } -} - TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { uint64_t capacity = 1000; uint8_t bucket_size = 4; uint16_t max_iterations = 500; - uint8_t expansion = 2; + uint16_t expansion = 2; // Create the filter auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); @@ -258,217 +300,58 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } -TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { - // expansion=0 means no auto-expansion when filter is full - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0); - ASSERT_TRUE(s.ok()) << "expansion=0 should be valid (no auto-growth)"; - - // Verify different expansion values - std::vector expansions = {0, 1, 2, 4, 8}; - for (size_t i = 0; i < expansions.size(); ++i) { - std::string test_key = "key_exp_" + std::to_string(expansions[i]); - s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, 500, expansions[i]); - ASSERT_TRUE(s.ok()) << "expansion=" << static_cast(expansions[i]) << " should be valid"; - } -} - -TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { - // Test with very large capacity - uint64_t large_capacity = 10000000; // 10 million - auto s = cuckoo_->Reserve(*ctx_, key_, large_capacity, 4, 500, 2); - ASSERT_TRUE(s.ok()) << "Should handle large capacity"; - - // Verify num_buckets calculation doesn't overflow - uint32_t num_buckets = 0; - s = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4, &num_buckets); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_GT(num_buckets, 0) << "Should not overflow to 0"; - ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Should be power of 2"; - - // Test even larger capacity (100 million) - uint64_t huge_capacity = 100000000; - s = cuckoo_->Reserve(*ctx_, "huge_key", huge_capacity, 4, 500, 2); - ASSERT_TRUE(s.ok()) << "Should handle 100M capacity"; - - s = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4, &num_buckets); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; -} - -TEST_F(RedisCuckooFilterTest, ReserveRejectsCapacityOverflow) { - auto s = cuckoo_->Reserve(*ctx_, key_, std::numeric_limits::max(), 1, 500, 2); - ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_NE(s.ToString().find("capacity is too large"), std::string::npos); -} - -TEST_F(RedisCuckooFilterTest, ReserveMaxIterationsBoundary) { - // Test different max_iterations values - std::vector iterations = {1, 10, 100, 500, 1000, 5000, 65535}; - - for (size_t i = 0; i < iterations.size(); ++i) { - std::string test_key = "key_iter_" + std::to_string(iterations[i]); - auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, iterations[i], 2); - ASSERT_TRUE(s.ok()) << "max_iterations=" << iterations[i] << " should be valid"; - } - - // Test max_iterations = 0 (should fail) - auto s = cuckoo_->Reserve(*ctx_, "iter_zero", 1000, 4, 0, 2); - ASSERT_FALSE(s.ok()) << "max_iterations=0 should fail"; -} - -TEST_F(RedisCuckooFilterTest, ReserveEdgeCaseCapacities) { - // Test minimum valid capacity - auto s = cuckoo_->Reserve(*ctx_, "min_cap", 2, 4, 500, 2); - ASSERT_TRUE(s.ok()) << "capacity=2 should be valid (minimum)"; - - // Test small but valid capacities - std::vector small_capacities = {2, 3, 4, 5, 10, 50, 100}; - for (size_t i = 0; i < small_capacities.size(); ++i) { - std::string test_key = "small_" + std::to_string(small_capacities[i]); - s = cuckoo_->Reserve(*ctx_, test_key, small_capacities[i], 4, 500, 2); - ASSERT_TRUE(s.ok()) << "capacity=" << small_capacities[i] << " should be valid"; - - // Verify at least one bucket is created - uint32_t num_buckets = 0; - s = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4, &num_buckets); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_GE(num_buckets, 1) << "Should have at least 1 bucket for capacity=" << small_capacities[i]; - } -} - -TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { - // Test various parameter combinations to ensure robustness - struct TestCase { - uint64_t capacity; - uint8_t bucket_size; - uint16_t max_iterations; - uint8_t expansion; - bool should_succeed; - std::string description; - }; - - std::vector test_cases = { - {1000, 4, 500, 2, true, "Standard parameters"}, - {2, 1, 1, 0, true, "Minimum all parameters"}, - {100000, 255, 65535, 255, true, "Maximum all parameters"}, - {1000, 2, 100, 1, true, "Small bucket, moderate iterations"}, - {50000, 8, 1000, 4, true, "Large capacity, large bucket"}, - {10, 16, 50, 0, true, "Small capacity, large bucket, no expansion"}, - }; - - for (size_t i = 0; i < test_cases.size(); ++i) { - const auto &tc = test_cases[i]; - std::string test_key = "combo_" + std::to_string(i); - auto s = cuckoo_->Reserve(*ctx_, test_key, tc.capacity, tc.bucket_size, tc.max_iterations, tc.expansion); - - if (tc.should_succeed) { - ASSERT_TRUE(s.ok()) << "Test case failed: " << tc.description; - } else { - ASSERT_FALSE(s.ok()) << "Test case should have failed: " << tc.description; - } - } -} - TEST_F(RedisCuckooFilterTest, AddBasic) { - // First reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()) << "Failed to reserve: " << s.ToString(); - - // Add an item - bool added = false; - s = cuckoo_->Add(*ctx_, key_, "item1", &added); - ASSERT_TRUE(s.ok()) << "Failed to add item: " << s.ToString(); - ASSERT_TRUE(added) << "Item should have been added"; -} - -TEST_F(RedisCuckooFilterTest, AddMultipleItems) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add multiple items - std::vector items = {"apple", "banana", "cherry", "date", "elderberry"}; - for (const auto &item : items) { - bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); - ASSERT_TRUE(s.ok()) << "Failed to add item: " << item; - ASSERT_TRUE(added) << "Item should have been added: " << item; - } + ReserveAndVerify(key_, 1000, 4, 500, 2); + AddAndVerify(key_, "item1", 1000, 4, 500, 2, 1); } TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { + std::string key = "nonexistent_key"; bool added = false; - auto s = cuckoo_->Add(*ctx_, "nonexistent_key", "item1", &added); + auto s = cuckoo_->Add(*ctx_, key, "item1", &added); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(added) << "CF.ADD should create the filter when the key does not exist"; + VerifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, + redis::kCFDefaultExpansion, 1, 1, 0); - s = cuckoo_->Reserve(*ctx_, "nonexistent_key", 1000, 4, 500, 2); + s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } TEST_F(RedisCuckooFilterTest, AddWithDifferentBucketSizes) { - // Test with different bucket sizes std::vector bucket_sizes = {1, 2, 4, 8, 16}; - for (size_t i = 0; i < bucket_sizes.size(); ++i) { - std::string test_key = key_ + "_bucket_" + std::to_string(bucket_sizes[i]); - auto s = cuckoo_->Reserve(*ctx_, test_key, 100, bucket_sizes[i], 500, 2); - ASSERT_TRUE(s.ok()) << "Failed to reserve with bucket_size=" << static_cast(bucket_sizes[i]); - - // Add some items + for (auto bs : bucket_sizes) { + std::string test_key = key_ + "_bucket_" + std::to_string(bs); + ReserveAndVerify(test_key, 100, bs, 500, 2); for (int j = 0; j < 10; ++j) { - std::string item = "item_" + std::to_string(j); - bool added = false; - s = cuckoo_->Add(*ctx_, test_key, item, &added); - ASSERT_TRUE(s.ok()) << "Failed to add item with bucket_size=" << static_cast(bucket_sizes[i]); - ASSERT_TRUE(added); + AddAndVerify(test_key, "item_" + std::to_string(j), 100, bs, 500, 2, j + 1); } } } TEST_F(RedisCuckooFilterTest, AddDuplicateItems) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add the same item multiple times (Cuckoo Filter allows duplicates) - std::string item = "duplicate_item"; + ReserveAndVerify(key_, 1000, 4, 500, 2); for (int i = 0; i < 5; ++i) { - bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); - ASSERT_TRUE(s.ok()) << "Iteration " << i; - ASSERT_TRUE(added) << "Should allow duplicate items"; + AddAndVerify(key_, "duplicate_item", 1000, 4, 500, 2, i + 1); } } TEST_F(RedisCuckooFilterTest, AddManyItems) { - // Reserve a filter with moderate capacity - uint64_t capacity = 1000; - auto s = cuckoo_->Reserve(*ctx_, key_, capacity, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add many items (should succeed without hitting limits) - int num_items = 100; - for (int i = 0; i < num_items; ++i) { - std::string item = "item_" + std::to_string(i); - bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); - ASSERT_TRUE(s.ok()) << "Failed at item " << i; - ASSERT_TRUE(added); + ReserveAndVerify(key_, 1000, 4, 500, 2); + for (int i = 0; i < 100; ++i) { + AddAndVerify(key_, "item_" + std::to_string(i), 1000, 4, 500, 2, i + 1); } } TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { - // Test with very small capacity to trigger potential full filter scenario uint64_t small_capacity = 10; - auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0); // expansion=0 means no auto-growth + auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0); ASSERT_TRUE(s.ok()); - // Add items up to capacity - // With bucket_size=2 and capacity=10, we should have very limited space + uint64_t added_count = 0; bool full = false; for (int i = 0; i < 100; ++i) { std::string item = "item_" + std::to_string(i); @@ -476,112 +359,27 @@ TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { s = cuckoo_->Add(*ctx_, key_, item, &added); if (!s.ok()) { - // Filter is full ASSERT_TRUE(s.IsAborted()) << "Should be Aborted status when full"; full = true; break; } - } - - // We expect the filter to become full at some point - ASSERT_TRUE(full) << "Small filter should eventually become full"; -} - -TEST_F(RedisCuckooFilterTest, AddEmptyItem) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add empty string - bool added = false; - s = cuckoo_->Add(*ctx_, key_, "", &added); - ASSERT_TRUE(s.ok()) << "Should be able to add empty string"; - ASSERT_TRUE(added); -} - -TEST_F(RedisCuckooFilterTest, AddLongItem) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add a very long string - std::string long_item(10000, 'x'); - bool added = false; - s = cuckoo_->Add(*ctx_, key_, long_item, &added); - ASSERT_TRUE(s.ok()) << "Should be able to add long string"; - ASSERT_TRUE(added); -} -TEST_F(RedisCuckooFilterTest, AddBinaryData) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add binary data (including null bytes) - std::string binary_item = std::string("\x00\x01\x02\xFF\xFE", 5); - bool added = false; - s = cuckoo_->Add(*ctx_, key_, binary_item, &added); - ASSERT_TRUE(s.ok()) << "Should be able to add binary data"; - ASSERT_TRUE(added); -} - -TEST_F(RedisCuckooFilterTest, AddWithVariousCapacities) { - // Test that Add works correctly with different filter capacities - std::vector capacities = {10, 100, 1000, 10000}; - - for (size_t i = 0; i < capacities.size(); ++i) { - std::string test_key = key_ + "_cap_" + std::to_string(capacities[i]); - auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add a reasonable number of items relative to capacity - int num_items = std::min(static_cast(capacities[i] / 10), 50); - for (int j = 0; j < num_items; ++j) { - std::string item = "item_" + std::to_string(j); - bool added = false; - s = cuckoo_->Add(*ctx_, test_key, item, &added); - ASSERT_TRUE(s.ok()) << "Failed for capacity=" << capacities[i] << ", item=" << j; - ASSERT_TRUE(added); - } + ASSERT_TRUE(added) << "Item should have been added before the filter is full"; + ++added_count; } -} - -TEST_F(RedisCuckooFilterTest, AddConsistentHashing) { - // Verify that the same item always hashes to the same buckets - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - std::string item = "test_item"; - - // Calculate hash for the item - uint64_t hash1 = redis::CuckooFilter::Hash(item); - uint64_t hash2 = redis::CuckooFilter::Hash(item); - - // Hashing should be deterministic - ASSERT_EQ(hash1, hash2) << "Hash should be consistent for the same item"; - - // Fingerprint should be consistent - uint8_t fp1 = redis::CuckooFilter::GenerateFingerprint(hash1); - uint8_t fp2 = redis::CuckooFilter::GenerateFingerprint(hash2); - ASSERT_EQ(fp1, fp2) << "Fingerprint should be consistent"; - - // Add the item - bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); - ASSERT_TRUE(s.ok()); - ASSERT_TRUE(added); + ASSERT_TRUE(full) << "Small filter should eventually become full"; + VerifyMetadata(key_, small_capacity, 2, 500, 0, added_count, 1, 0); } -TEST_F(RedisCuckooFilterTest, AddDifferentItemsProduceDifferentHashes) { - // Verify that different items produce different hashes - std::vector items = {"item1", "item2", "item3", "different", "another"}; - std::set hashes; - - for (const auto &item : items) { - uint64_t hash = redis::CuckooFilter::Hash(item); - hashes.insert(hash); +TEST_F(RedisCuckooFilterTest, AddEdgeCaseItems) { + ReserveAndVerify(key_, 1000, 4, 500, 2); + std::vector items = { + "", + std::string(10000, 'x'), + std::string("\x00\x01\x02\xFF\xFE", 5), + }; + for (size_t i = 0; i < items.size(); ++i) { + AddAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); } - - // All items should have unique hashes (with very high probability) - ASSERT_EQ(hashes.size(), items.size()) << "Different items should produce different hashes"; } diff --git a/tests/gocase/unit/type/bloom/cuckoo_filter_test.go b/tests/gocase/unit/type/bloom/cuckoo_filter_test.go new file mode 100644 index 00000000000..1a87e3cc8ef --- /dev/null +++ b/tests/gocase/unit/type/bloom/cuckoo_filter_test.go @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package bloom + +import ( + "context" + "testing" + + "github.com/apache/kvrocks/tests/gocase/util" + "github.com/stretchr/testify/require" +) + +func TestCuckooFilter(t *testing.T) { + srv := util.StartServer(t, map[string]string{}) + defer srv.Close() + ctx := context.Background() + rdb := srv.NewClient() + defer func() { require.NoError(t, rdb.Close()) }() + + t.Run("Add creates filter", func(t *testing.T) { + key := "test_cuckoo_filter_add_create" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.Equal(t, int64(1), rdb.Do(ctx, "cf.add", key, "item").Val()) + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", key, "1000").Err(), "already exists") + }) + + t.Run("Wrong type", func(t *testing.T) { + key := "test_cuckoo_filter_wrong_type" + require.NoError(t, rdb.Set(ctx, key, "value", 0).Err()) + require.ErrorContains(t, rdb.Do(ctx, "cf.add", key, "item").Err(), "WRONGTYPE") + }) + + t.Run("Reserve expansion", func(t *testing.T) { + require.NoError(t, rdb.Do(ctx, "cf.reserve", "test_cuckoo_filter_expansion_256", "1000", "EXPANSION", "256").Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", "test_cuckoo_filter_expansion_max", "1000", "EXPANSION", "32768").Err()) + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "test_cuckoo_filter_expansion_too_large", "1000", "EXPANSION", "32769").Err(), "expansion must be between 0 and 32768") + }) +} From 1ade399ba84c690d16019510fd593d38399f4b19 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Fri, 8 May 2026 13:17:27 +0800 Subject: [PATCH 14/20] fix: lint --- tests/cppunit/types/cuckoo_filter_test.cc | 36 +++++++++++------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index b9f355bb7f1..17dbe28e520 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -52,7 +52,7 @@ class RedisCuckooFilterTest : public TestBase { key_ = std::string("cf_test_") + test_info->name(); } - void VerifyMetadata(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, + void verifyMetadata(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion, uint64_t size, uint16_t n_filters, uint64_t num_deleted_items = 0) { std::string ns_key = db_->AppendNamespacePrefix(key); CuckooChainMetadata metadata(false); @@ -68,20 +68,20 @@ class RedisCuckooFilterTest : public TestBase { EXPECT_EQ(metadata.num_deleted_items, num_deleted_items) << key; } - void ReserveAndVerify(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, + void reserveAndVerify(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion) { auto s = cuckoo_->Reserve(*ctx_, key, capacity, bucket_size, max_iterations, expansion); ASSERT_TRUE(s.ok()) << key << ": " << s.ToString(); - VerifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0); + verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0); } - void AddAndVerify(const std::string &key, const std::string &item, uint64_t capacity, uint8_t bucket_size, + void addAndVerify(const std::string &key, const std::string &item, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion, uint64_t expected_size, uint16_t n_filters = 1) { bool added = false; auto s = cuckoo_->Add(*ctx_, key, item, &added); ASSERT_TRUE(s.ok()) << key << ": add '" << item << "' failed: " << s.ToString(); ASSERT_TRUE(added) << key << ": item '" << item << "' should have been added"; - VerifyMetadata(key, capacity, bucket_size, max_iterations, expansion, expected_size, n_filters, 0); + verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, expected_size, n_filters, 0); } std::unique_ptr cuckoo_; @@ -156,7 +156,7 @@ TEST_F(RedisCuckooFilterTest, ReserveValidParams) { }; for (const auto &test_case : test_cases) { - ReserveAndVerify(test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, + reserveAndVerify(test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, test_case.expansion); } } @@ -301,8 +301,8 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { } TEST_F(RedisCuckooFilterTest, AddBasic) { - ReserveAndVerify(key_, 1000, 4, 500, 2); - AddAndVerify(key_, "item1", 1000, 4, 500, 2, 1); + reserveAndVerify(key_, 1000, 4, 500, 2); + addAndVerify(key_, "item1", 1000, 4, 500, 2, 1); } TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { @@ -311,7 +311,7 @@ TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { auto s = cuckoo_->Add(*ctx_, key, "item1", &added); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(added) << "CF.ADD should create the filter when the key does not exist"; - VerifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, + verifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, redis::kCFDefaultExpansion, 1, 1, 0); s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2); @@ -325,24 +325,24 @@ TEST_F(RedisCuckooFilterTest, AddWithDifferentBucketSizes) { for (auto bs : bucket_sizes) { std::string test_key = key_ + "_bucket_" + std::to_string(bs); - ReserveAndVerify(test_key, 100, bs, 500, 2); + reserveAndVerify(test_key, 100, bs, 500, 2); for (int j = 0; j < 10; ++j) { - AddAndVerify(test_key, "item_" + std::to_string(j), 100, bs, 500, 2, j + 1); + addAndVerify(test_key, "item_" + std::to_string(j), 100, bs, 500, 2, j + 1); } } } TEST_F(RedisCuckooFilterTest, AddDuplicateItems) { - ReserveAndVerify(key_, 1000, 4, 500, 2); + reserveAndVerify(key_, 1000, 4, 500, 2); for (int i = 0; i < 5; ++i) { - AddAndVerify(key_, "duplicate_item", 1000, 4, 500, 2, i + 1); + addAndVerify(key_, "duplicate_item", 1000, 4, 500, 2, i + 1); } } TEST_F(RedisCuckooFilterTest, AddManyItems) { - ReserveAndVerify(key_, 1000, 4, 500, 2); + reserveAndVerify(key_, 1000, 4, 500, 2); for (int i = 0; i < 100; ++i) { - AddAndVerify(key_, "item_" + std::to_string(i), 1000, 4, 500, 2, i + 1); + addAndVerify(key_, "item_" + std::to_string(i), 1000, 4, 500, 2, i + 1); } } @@ -369,17 +369,17 @@ TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { } ASSERT_TRUE(full) << "Small filter should eventually become full"; - VerifyMetadata(key_, small_capacity, 2, 500, 0, added_count, 1, 0); + verifyMetadata(key_, small_capacity, 2, 500, 0, added_count, 1, 0); } TEST_F(RedisCuckooFilterTest, AddEdgeCaseItems) { - ReserveAndVerify(key_, 1000, 4, 500, 2); + reserveAndVerify(key_, 1000, 4, 500, 2); std::vector items = { "", std::string(10000, 'x'), std::string("\x00\x01\x02\xFF\xFE", 5), }; for (size_t i = 0; i < items.size(); ++i) { - AddAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); + addAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); } } From da03915b2c4ecc07b27df2858eb43262607b721b Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 11 May 2026 02:10:33 +0800 Subject: [PATCH 15/20] feat: CuckooPageSet --- src/commands/cmd_cuckoo_filter.cc | 3 +- src/storage/redis_metadata.cc | 4 +- src/storage/redis_metadata.h | 8 +- src/types/cuckoo_filter.h | 9 +- src/types/cuckoo_filter_page.cc | 259 ++++++++++++++ src/types/cuckoo_filter_page.h | 91 +++++ src/types/redis_cuckoo_chain.cc | 174 +++------ src/types/redis_cuckoo_chain.h | 11 +- .../cppunit/types/cuckoo_filter_page_test.cc | 337 ++++++++++++++++++ tests/cppunit/types/cuckoo_filter_test.cc | 220 +++++++++++- 10 files changed, 956 insertions(+), 160 deletions(-) create mode 100644 src/types/cuckoo_filter_page.cc create mode 100644 src/types/cuckoo_filter_page.h create mode 100644 tests/cppunit/types/cuckoo_filter_page_test.cc diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 6bbe5cb2524..2002912f94d 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -84,7 +84,8 @@ class CommandCFReserve : public Commander { Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); - auto s = cuckoo_db.Reserve(ctx, args_[1], capacity_, bucket_size_, max_iterations_, expansion_); + auto s = cuckoo_db.Reserve(ctx, args_[1], capacity_, bucket_size_, max_iterations_, expansion_, + kCuckooFilterDefaultPageSize); if (!s.ok()) { if (s.IsInvalidArgument()) { diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index f9c91034e86..027ae06b456 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -654,6 +654,7 @@ void CuckooChainMetadata::Encode(std::string *dst) const { PutFixed8(dst, bucket_size); PutFixed16(dst, max_iterations); PutFixed64(dst, num_deleted_items); + PutFixed32(dst, page_size); } rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { @@ -661,7 +662,7 @@ rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { return s; } - if (input->size() < sizeof(uint16_t) * 3 + sizeof(uint64_t) * 2 + sizeof(uint8_t)) { + if (input->size() < sizeof(uint16_t) * 3 + sizeof(uint64_t) * 2 + sizeof(uint32_t) + sizeof(uint8_t)) { return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); } @@ -671,6 +672,7 @@ rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { GetFixed8(input, &bucket_size); GetFixed16(input, &max_iterations); GetFixed64(input, &num_deleted_items); + GetFixed32(input, &page_size); return rocksdb::Status::OK(); } diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 4503fbf444c..e4658336ebf 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -335,6 +335,8 @@ class BloomChainMetadata : public Metadata { bool IsScaling() const { return expansion != 0; }; }; +constexpr uint32_t kCuckooFilterDefaultPageSize = 2048; + class CuckooChainMetadata : public Metadata { public: /// The number of sub-filters in the chain @@ -356,6 +358,9 @@ class CuckooChainMetadata : public Metadata { /// Track number of deleted items for maintenance uint64_t num_deleted_items; + /// Target maximum payload size for each persisted Cuckoo Filter page + uint32_t page_size; + explicit CuckooChainMetadata(bool generate_version = true) : Metadata(kRedisCuckooFilter, generate_version), n_filters(0), @@ -363,7 +368,8 @@ class CuckooChainMetadata : public Metadata { base_capacity(0), bucket_size(0), max_iterations(0), - num_deleted_items(0) {} + num_deleted_items(0), + page_size(kCuckooFilterDefaultPageSize) {} void Encode(std::string *dst) const override; using Metadata::Decode; diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 3d0688491ef..7e5fbef40cd 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -34,8 +34,8 @@ namespace redis { // Cuckoo filter implementation from the paper: // "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. -// This is a bucket-based storage implementation where each bucket is stored -// as an independent key-value pair in RocksDB +// Buckets are grouped into page values in RocksDB. The Cuckoo algorithm still +// works with logical bucket indexes, while the storage layer maps buckets to pages. // // Hash calculation follows RedisBloom's design: // - fp = hash % 255 + 1 (fingerprint, non-zero, range: 1-255) @@ -85,13 +85,10 @@ class CuckooFilter { return hash ^ (static_cast(fingerprint) * 0x5bd1e995); } - // Legacy function for backward compatibility with tests - // Converts bucket index to hash, applies GetAltHash, then converts back to bucket index + // Calculate an alternate bucket from a bucket index and fingerprint. static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { - // Treat bucket_idx as a hash value for the calculation uint64_t hash = bucket_idx; uint64_t alt_hash = GetAltHash(fingerprint, hash); - // Convert back to bucket index return static_cast(alt_hash % num_buckets); } diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc new file mode 100644 index 00000000000..543506fa97e --- /dev/null +++ b/src/types/cuckoo_filter_page.cc @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "cuckoo_filter_page.h" + +#include + +#include "cuckoo_filter.h" +#include "storage/redis_db.h" + +namespace redis { + +namespace { + +uint32_t GetBucketsPerPage(uint32_t page_size, uint8_t bucket_size) { + return std::max(1, page_size / bucket_size); +} + +uint32_t GetPageIndex(uint32_t bucket_index, uint32_t buckets_per_page) { return bucket_index / buckets_per_page; } + +uint32_t GetBucketOffset(uint32_t bucket_index, uint32_t buckets_per_page, uint8_t bucket_size) { + return (bucket_index % buckets_per_page) * bucket_size; +} + +uint32_t GetPageValueSize(uint32_t page_index, uint32_t num_buckets, uint32_t buckets_per_page, uint8_t bucket_size) { + uint32_t first_bucket = page_index * buckets_per_page; + uint32_t page_bucket_count = std::min(buckets_per_page, num_buckets - first_bucket); + return page_bucket_count * bucket_size; +} + +std::string GetCuckooPageKey(const Slice &ns_key, const CuckooChainMetadata &metadata, bool slot_id_encoded, + uint16_t filter_index, uint32_t page_index) { + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, page_index); + return InternalKey(ns_key, sub_key, metadata.version, slot_id_encoded).Encode(); +} + +} // namespace + +CuckooPageSet::CuckooPageSet(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, bool slot_id_encoded) + : storage_(storage), + ctx_(ctx), + ns_key_(ns_key.ToString()), + metadata_(metadata), + slot_id_encoded_(slot_id_encoded) {} + +rocksdb::Status CuckooPageSet::TryInsertInCandidateBuckets(uint16_t filter_index, uint32_t num_buckets, + uint32_t bucket1_index, uint32_t bucket2_index, + uint8_t fingerprint, bool *inserted) { + *inserted = false; + BucketRef bucket1, bucket2; + auto s = EnsureCandidateBucketsLoaded(filter_index, num_buckets, bucket1_index, bucket2_index, &bucket1, &bucket2); + if (!s.ok()) return s; + + size_t slot_idx = 0; + if (TryInsertInBucketRef(bucket1, fingerprint, &slot_idx)) { + *inserted = true; + return rocksdb::Status::OK(); + } + if (bucket1_index != bucket2_index && TryInsertInBucketRef(bucket2, fingerprint, &slot_idx)) { + *inserted = true; + } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::TryInsertInBucket(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint8_t fingerprint, bool *inserted) { + *inserted = false; + BucketRef bucket; + auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + if (!s.ok()) return s; + + size_t slot_idx = 0; + *inserted = TryInsertInBucketRef(bucket, fingerprint, &slot_idx); + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::GetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint32_t slot_idx, uint8_t *fingerprint) { + if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); + + BucketRef bucket; + auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + if (!s.ok()) return s; + *fingerprint = GetBucketRefSlot(bucket, slot_idx); + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::SetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint32_t slot_idx, uint8_t fingerprint) { + if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); + + BucketRef bucket; + auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + if (!s.ok()) return s; + SetBucketRefSlot(bucket, slot_idx, fingerprint); + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::WriteBackDirtyPages(rocksdb::WriteBatchBase *batch) { + for (const auto &entry : pages_) { + if (!entry.second.is_dirty) continue; + auto s = batch->Put(entry.first, entry.second.data); + if (!s.ok()) return s; + } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::ResolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketLocation *location) const { + if (metadata_.bucket_size == 0 || num_buckets == 0 || bucket_index >= num_buckets) { + return rocksdb::Status::Corruption("invalid cuckoo filter bucket location"); + } + + uint32_t buckets_per_page = GetBucketsPerPage(metadata_.page_size, metadata_.bucket_size); + uint32_t page_index = GetPageIndex(bucket_index, buckets_per_page); + location->page_key = GetCuckooPageKey(ns_key_, metadata_, slot_id_encoded_, filter_index, page_index); + location->offset = GetBucketOffset(bucket_index, buckets_per_page, metadata_.bucket_size); + location->expected_page_size = GetPageValueSize(page_index, num_buckets, buckets_per_page, metadata_.bucket_size); + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::EnsureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketRef *bucket) { + BucketLocation location; + auto s = ResolveBucketLocation(filter_index, num_buckets, bucket_index, &location); + if (!s.ok()) return s; + + PageEntry *page = nullptr; + s = LoadPage(location, &page); + if (!s.ok()) return s; + + bucket->page = page; + bucket->offset = location.offset; + bucket->size = metadata_.bucket_size; + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::EnsureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, + uint32_t bucket1_index, uint32_t bucket2_index, + BucketRef *bucket1, BucketRef *bucket2) { + BucketLocation location1, location2; + auto s = ResolveBucketLocation(filter_index, num_buckets, bucket1_index, &location1); + if (!s.ok()) return s; + s = ResolveBucketLocation(filter_index, num_buckets, bucket2_index, &location2); + if (!s.ok()) return s; + + std::vector missing_locations; + if (pages_.find(location1.page_key) == pages_.end()) missing_locations.push_back(location1); + if (location2.page_key != location1.page_key && pages_.find(location2.page_key) == pages_.end()) { + missing_locations.push_back(location2); + } + s = LoadPages(missing_locations); + if (!s.ok()) return s; + + bucket1->page = &pages_.at(location1.page_key); + bucket1->offset = location1.offset; + bucket1->size = metadata_.bucket_size; + bucket2->page = &pages_.at(location2.page_key); + bucket2->offset = location2.offset; + bucket2->size = metadata_.bucket_size; + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::LoadPage(const BucketLocation &location, PageEntry **page) { + auto iter = pages_.find(location.page_key); + if (iter != pages_.end()) { + *page = &iter->second; + return rocksdb::Status::OK(); + } + + PageEntry page_entry; + auto s = storage_->Get(ctx_, ctx_.GetReadOptions(), location.page_key, &page_entry.data); + if (!s.ok() && !s.IsNotFound()) return s; + s = NormalizePage(s, location.expected_page_size, &page_entry); + if (!s.ok()) return s; + + auto result = pages_.emplace(location.page_key, std::move(page_entry)); + *page = &result.first->second; + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::LoadPages(const std::vector &locations) { + if (locations.empty()) return rocksdb::Status::OK(); + if (locations.size() == 1) { + PageEntry *page = nullptr; + return LoadPage(locations[0], &page); + } + + std::vector keys; + keys.reserve(locations.size()); + for (const auto &location : locations) keys.emplace_back(location.page_key); + + std::vector values(locations.size()); + std::vector statuses(locations.size()); + storage_->MultiGet(ctx_, ctx_.DefaultMultiGetOptions(), storage_->GetDB()->DefaultColumnFamily(), keys.size(), + keys.data(), values.data(), statuses.data()); + + for (size_t i = 0; i < locations.size(); ++i) { + PageEntry page_entry; + if (statuses[i].ok()) page_entry.data.assign(values[i].data(), values[i].size()); + auto s = NormalizePage(statuses[i], locations[i].expected_page_size, &page_entry); + if (!s.ok()) return s; + pages_.emplace(locations[i].page_key, std::move(page_entry)); + } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::NormalizePage(const rocksdb::Status &status, uint32_t expected_size, + PageEntry *page) const { + if (!status.ok() && !status.IsNotFound()) return status; + if (status.IsNotFound()) page->data.clear(); + if (page->data.size() > expected_size) return rocksdb::Status::Corruption("invalid cuckoo filter page size"); + if (page->data.size() < expected_size) page->data.resize(expected_size, 0); + return rocksdb::Status::OK(); +} + +bool CuckooPageSet::TryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx) { + for (size_t i = 0; i < bucket.size; ++i) { + size_t offset = bucket.offset + i; + if (static_cast(bucket.page->data[offset]) == 0) { + bucket.page->data[offset] = static_cast(fingerprint); + bucket.page->is_dirty = true; + *slot_idx = i; + return true; + } + } + return false; +} + +uint8_t CuckooPageSet::GetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const { + return static_cast(bucket.page->data[bucket.offset + slot_idx]); +} + +void CuckooPageSet::SetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint) { + bucket.page->data[bucket.offset + slot_idx] = static_cast(fingerprint); + bucket.page->is_dirty = true; +} + +} // namespace redis diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h new file mode 100644 index 00000000000..17627eef0ed --- /dev/null +++ b/src/types/cuckoo_filter_page.h @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "storage/redis_metadata.h" +#include "storage/storage.h" + +namespace redis { + +class CuckooPageSet { + public: + CuckooPageSet(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, bool slot_id_encoded); + + rocksdb::Status TryInsertInCandidateBuckets(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, + uint32_t bucket2_index, uint8_t fingerprint, bool *inserted); + rocksdb::Status TryInsertInBucket(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint8_t fingerprint, bool *inserted); + rocksdb::Status GetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, uint32_t slot_idx, + uint8_t *fingerprint); + rocksdb::Status SetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, uint32_t slot_idx, + uint8_t fingerprint); + rocksdb::Status WriteBackDirtyPages(rocksdb::WriteBatchBase *batch); + + private: + struct PageEntry { + std::string data; + bool is_dirty = false; + }; + + struct BucketRef { + PageEntry *page = nullptr; + uint32_t offset = 0; + uint8_t size = 0; + }; + + struct BucketLocation { + std::string page_key; + uint32_t offset = 0; + uint32_t expected_page_size = 0; + }; + + rocksdb::Status ResolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketLocation *location) const; + rocksdb::Status EnsureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketRef *bucket); + rocksdb::Status EnsureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, + uint32_t bucket2_index, BucketRef *bucket1, BucketRef *bucket2); + rocksdb::Status LoadPage(const BucketLocation &location, PageEntry **page); + rocksdb::Status LoadPages(const std::vector &locations); + rocksdb::Status NormalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const; + + bool TryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx); + uint8_t GetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const; + void SetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint); + + engine::Storage *storage_ = nullptr; + engine::Context &ctx_; + std::string ns_key_; + const CuckooChainMetadata &metadata_; + bool slot_id_encoded_ = false; + std::unordered_map pages_; +}; + +} // namespace redis diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 0e817bea5cd..0cfc76b7000 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -21,9 +21,9 @@ #include "redis_cuckoo_chain.h" #include -#include #include "cuckoo_filter.h" +#include "cuckoo_filter_page.h" #include "logging.h" namespace redis { @@ -46,26 +46,18 @@ rocksdb::Status CuckooChain::ValidateMetadata(const CuckooChainMetadata &metadat if (metadata.max_iterations == 0) { return rocksdb::Status::Corruption("invalid metadata: max_iterations is 0"); } + if (metadata.page_size < metadata.bucket_size) { + return rocksdb::Status::Corruption("invalid metadata: page_size is smaller than bucket_size"); + } if (!CuckooFilter::IsCapacitySupported(metadata.base_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: base_capacity is too large"); } return rocksdb::Status::OK(); } -std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t bucket_index) { - // Create a sub-key that includes both filter index and bucket index - std::string sub_key; - PutFixed16(&sub_key, filter_index); - PutFixed32(&sub_key, bucket_index); - - // Create the internal key using the storage encoding - std::string bucket_key = InternalKey(ns_key, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); - return bucket_key; -} - rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, - uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion) { + uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion, + uint32_t page_size) { if (capacity == 0) { return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } @@ -83,6 +75,12 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key if (max_iterations == 0) { return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); } + if (page_size == 0) { + return rocksdb::Status::InvalidArgument("page_size must be larger than 0"); + } + if (page_size < bucket_size) { + return rocksdb::Status::InvalidArgument("page_size must be at least bucket_size"); + } if (expansion > kCFMaxExpansion) { return rocksdb::Status::InvalidArgument("expansion must be between 0 and 32768"); } @@ -108,14 +106,17 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key metadata.expansion = expansion; metadata.n_filters = 1; metadata.num_deleted_items = 0; + metadata.page_size = page_size; // Calculate the number of buckets needed for this filter uint32_t num_buckets = 0; s = CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); if (!s.ok()) return s; - INFO("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", - capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); + INFO( + "Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}, " + "page_size={}", + capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion), page_size); // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); @@ -128,12 +129,8 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); if (!s.ok()) return s; - // Note: With bucket-based storage, we don't pre-allocate all buckets - // Buckets will be created lazily on first write - // This saves memory for sparse filters - - // Optionally, we could create the first few buckets to ensure the filter is ready - // But for now, we'll keep it fully lazy for maximum memory efficiency + // Pages are created lazily on first write. Reserve only persists metadata so sparse filters don't preallocate page + // values that may never be used. return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } @@ -149,34 +146,6 @@ static bool CalculateFilterCapacity(uint64_t base_capacity, uint16_t expansion, return true; } -// Helper function: try to find empty slot in a bucket and insert fingerprint -static bool TryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uint8_t fingerprint, size_t *slot_idx) { - for (size_t i = 0; i < bucket_size; ++i) { - if (static_cast(bucket_data[i]) == 0) { - bucket_data[i] = static_cast(fingerprint); - *slot_idx = i; - return true; - } - } - return false; -} - -// Helper function: read bucket from storage and ensure correct size -static rocksdb::Status ReadBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, - uint8_t bucket_size, std::string *bucket_data) { - auto s = storage->Get(ctx, ctx.GetReadOptions(), bucket_key, bucket_data); - if (!s.ok() && !s.IsNotFound()) { - return s; - } - if (s.IsNotFound()) { - bucket_data->clear(); - } - if (bucket_data->size() < bucket_size) { - bucket_data->resize(bucket_size, 0); - } - return rocksdb::Status::OK(); -} - rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { std::string ns_key = AppendNamespacePrefix(user_key); @@ -193,6 +162,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co metadata.expansion = kCFDefaultExpansion; metadata.n_filters = 1; metadata.num_deleted_items = 0; + metadata.page_size = kCuckooFilterDefaultPageSize; } if (!s.ok() && !s.IsNotFound()) return s; @@ -220,37 +190,18 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co uint64_t alt_hash = CuckooFilter::GetAltHash(fingerprint, hash); uint32_t bucket2_idx = alt_hash % num_buckets; - // Read both buckets using helper function - std::string bucket1_key = getBucketKey(ns_key, metadata, filter_idx, bucket1_idx); - std::string bucket2_key = getBucketKey(ns_key, metadata, filter_idx, bucket2_idx); - - std::string bucket1_data, bucket2_data; - s = ReadBucket(storage_, ctx, bucket1_key, metadata.bucket_size, &bucket1_data); + CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); + bool inserted = false; + s = pages.TryInsertInCandidateBuckets(filter_idx, num_buckets, bucket1_idx, bucket2_idx, fingerprint, &inserted); if (!s.ok()) return s; - s = ReadBucket(storage_, ctx, bucket2_key, metadata.bucket_size, &bucket2_data); - if (!s.ok()) return s; - - // Try simple insertion in bucket1 or bucket2 - size_t slot_idx = 0; - std::string *target_bucket_data = nullptr; - std::string target_bucket_key; - - if (TryInsertInBucket(bucket1_data, metadata.bucket_size, fingerprint, &slot_idx)) { - target_bucket_data = &bucket1_data; - target_bucket_key = bucket1_key; - } else if (TryInsertInBucket(bucket2_data, metadata.bucket_size, fingerprint, &slot_idx)) { - target_bucket_data = &bucket2_data; - target_bucket_key = bucket2_key; - } - - if (target_bucket_data != nullptr) { + if (inserted) { // Successfully inserted, write to storage atomically auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; - s = batch->Put(target_bucket_key, *target_bucket_data); + s = pages.WriteBackDirtyPages(batch.Get()); if (!s.ok()) return s; metadata.size++; @@ -279,20 +230,13 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (!s.ok()) return s; bool inserted = false; - std::unordered_map modified_buckets; - s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted, - &modified_buckets); + auto batch = storage_->GetWriteBatchBase(); + s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted, batch.Get()); if (s.ok() && inserted) { - auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; - for (const auto &entry : modified_buckets) { - s = batch->Put(entry.first, entry.second); - if (!s.ok()) return s; - } - metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); @@ -325,17 +269,16 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (!s.ok()) return s; uint32_t bucket1_idx = hash % new_num_buckets; - std::string bucket1_key = getBucketKey(ns_key, metadata, new_filter_idx, bucket1_idx); - - // Insert into first slot of the new filter's first bucket - std::string bucket1_data(metadata.bucket_size, 0); - bucket1_data[0] = static_cast(fingerprint); + CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); + s = pages.TryInsertInBucket(new_filter_idx, new_num_buckets, bucket1_idx, fingerprint, &inserted); + if (!s.ok()) return s; + if (!inserted) return rocksdb::Status::Corruption("failed to insert into new cuckoo filter"); auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; - s = batch->Put(bucket1_key, bucket1_data); + s = pages.WriteBackDirtyPages(batch.Get()); if (!s.ok()) return s; metadata.size++; @@ -359,9 +302,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted, - std::unordered_map *modified_buckets) { + rocksdb::WriteBatchBase *batch) { *inserted = false; - modified_buckets->clear(); + CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); // Start from bucket1 uint32_t current_bucket_idx = hash % num_buckets; @@ -370,22 +313,12 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns // Try to kick out existing fingerprints (all operations in memory) for (uint16_t iteration = 0; iteration < metadata.max_iterations; ++iteration) { - // Read current bucket (check modified_buckets cache first) - std::string bucket_key = getBucketKey(ns_key, metadata, filter_index, current_bucket_idx); - std::string bucket_data; - - auto cached = modified_buckets->find(bucket_key); - if (cached != modified_buckets->end()) { - bucket_data = cached->second; - } else { - auto s = ReadBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); - if (!s.ok()) return s; - } - // Swap fingerprint with victim slot - auto old_fp = static_cast(bucket_data[victim_slot]); - bucket_data[victim_slot] = static_cast(current_fp); - (*modified_buckets)[bucket_key] = bucket_data; + uint8_t old_fp = 0; + auto s = pages.GetBucketSlot(filter_index, num_buckets, current_bucket_idx, victim_slot, &old_fp); + if (!s.ok()) return s; + s = pages.SetBucketSlot(filter_index, num_buckets, current_bucket_idx, victim_slot, current_fp); + if (!s.ok()) return s; current_fp = old_fp; // If kicked-out fingerprint is 0 (empty), we successfully inserted @@ -394,30 +327,12 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns break; } - // Calculate alternate bucket for the kicked-out fingerprint - // CRITICAL FIX: Use XOR hash calculation correctly - // We need to reconstruct the hash for the alternate bucket - // Since h2 = h1 ^ (fp * 0x5bd1e995), and we know bucket_idx = h1 % num_buckets - // We calculate alt_hash using the fingerprint and current bucket index - uint64_t current_hash = current_bucket_idx; // Approximate hash from bucket index - uint64_t alt_hash = CuckooFilter::GetAltHash(current_fp, current_hash); - uint32_t alt_bucket_idx = alt_hash % num_buckets; - - // Check if alternate bucket has empty slot - std::string alt_bucket_key = getBucketKey(ns_key, metadata, filter_index, alt_bucket_idx); - std::string alt_bucket_data; - - cached = modified_buckets->find(alt_bucket_key); - if (cached != modified_buckets->end()) { - alt_bucket_data = cached->second; - } else { - auto s = ReadBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); - if (!s.ok()) return s; - } + uint32_t alt_bucket_idx = CuckooFilter::GetAltBucketIndex(current_bucket_idx, current_fp, num_buckets); - size_t empty_slot = 0; - if (TryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { - (*modified_buckets)[alt_bucket_key] = alt_bucket_data; + bool inserted_in_alt_bucket = false; + s = pages.TryInsertInBucket(filter_index, num_buckets, alt_bucket_idx, current_fp, &inserted_in_alt_bucket); + if (!s.ok()) return s; + if (inserted_in_alt_bucket) { *inserted = true; break; } @@ -427,6 +342,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns victim_slot = (victim_slot + 1) % metadata.bucket_size; } + if (*inserted) return pages.WriteBackDirtyPages(batch); return rocksdb::Status::OK(); } diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 2e02c13fbb7..2f469d2be9d 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -20,8 +20,6 @@ #pragma once -#include - #include "cuckoo_filter.h" #include "storage/redis_db.h" #include "storage/redis_metadata.h" @@ -41,7 +39,7 @@ class CuckooChain : public Database { // CF.RESERVE command - creates a new cuckoo filter with specified capacity rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, - uint16_t max_iterations, uint16_t expansion); + uint16_t max_iterations, uint16_t expansion, uint32_t page_size); // CF.ADD command - adds an item to the cuckoo filter. // Duplicate items are allowed, so added is true whenever insertion succeeds. @@ -53,15 +51,10 @@ class CuckooChain : public Database { static rocksdb::Status ValidateMetadata(const CuckooChainMetadata &metadata); - // Generate key for a specific bucket in bucket-based storage - // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} - std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t bucket_index); - // Kick-out insertion: try to insert fingerprint by evicting existing ones rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, - bool *inserted, std::unordered_map *modified_buckets); + bool *inserted, rocksdb::WriteBatchBase *batch); }; } // namespace redis diff --git a/tests/cppunit/types/cuckoo_filter_page_test.cc b/tests/cppunit/types/cuckoo_filter_page_test.cc new file mode 100644 index 00000000000..a09b0f6e35f --- /dev/null +++ b/tests/cppunit/types/cuckoo_filter_page_test.cc @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "types/cuckoo_filter_page.h" + +#include + +#include +#include + +#include "common/encoding.h" +#include "storage/redis_metadata.h" +#include "test_base.h" +#include "types/redis_cuckoo_chain.h" + +class RedisCuckooPageSetTest : public TestBase { + public: + RedisCuckooPageSetTest(const RedisCuckooPageSetTest &) = delete; + RedisCuckooPageSetTest &operator=(const RedisCuckooPageSetTest &) = delete; + RedisCuckooPageSetTest(RedisCuckooPageSetTest &&) = delete; + RedisCuckooPageSetTest &operator=(RedisCuckooPageSetTest &&) = delete; + + protected: + explicit RedisCuckooPageSetTest() : TestBase(), db_(std::make_unique(storage_.get(), "cuckoo_ns")) {} + + ~RedisCuckooPageSetTest() override { db_.reset(); } + + void SetUp() override { + const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + ns_key_ = db_->AppendNamespacePrefix(std::string("cf_page_test_") + test_info->name()); + } + + static CuckooChainMetadata MakeMetadata(uint8_t bucket_size, uint64_t version = 1, + uint32_t page_size = kCuckooFilterDefaultPageSize) { + CuckooChainMetadata metadata(false); + metadata.version = version; + metadata.size = 0; + metadata.base_capacity = 2; + metadata.bucket_size = bucket_size; + metadata.max_iterations = 500; + metadata.expansion = 0; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + metadata.page_size = page_size; + return metadata; + } + + std::string MakePageKey(const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t page_index) const { + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, page_index); + return InternalKey(ns_key_, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); + } + + rocksdb::Status ReadPage(const std::string &page_key, std::string *value) { + return storage_->Get(*ctx_, ctx_->GetReadOptions(), storage_->GetCFHandle(ColumnFamilyID::PrimarySubkey), page_key, + value); + } + + void WritePage(const std::string &page_key, const std::string &value) { + auto batch = storage_->GetWriteBatchBase(); + auto s = batch->Put(page_key, value); + ASSERT_TRUE(s.ok()) << s.ToString(); + s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + + void CommitBatch(rocksdb::WriteBatchBase *batch) { + auto s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + + std::unique_ptr db_; + std::string ns_key_; +}; + +TEST_F(RedisCuckooPageSetTest, TryInsertWritesSmallPage) { + auto metadata = MakeMetadata(4); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 2, 1, 11, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 8); + EXPECT_EQ(page.substr(0, 4), std::string(4, 0)); + EXPECT_EQ(static_cast(page[4]), 11); + EXPECT_EQ(page.substr(5, 3), std::string(3, 0)); +} + +TEST_F(RedisCuckooPageSetTest, TryInsertWritesLastPartialPage) { + auto metadata = MakeMetadata(4); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 513, 512, 33, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 1), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 4); + EXPECT_EQ(static_cast(page[0]), 33); + EXPECT_EQ(page.substr(1, 3), std::string(3, 0)); + + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + +TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnSamePagePreserveBothBuckets) { + auto metadata = MakeMetadata(2); + WritePage(MakePageKey(metadata, 0, 0), std::string{static_cast(11), static_cast(12), 0, 0, 0, 0, 0, 0}); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + bool inserted = false; + auto s = pages.TryInsertInCandidateBuckets(0, 4, 0, 1, 22, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 8); + EXPECT_EQ(static_cast(page[0]), 11); + EXPECT_EQ(static_cast(page[1]), 12); + EXPECT_EQ(static_cast(page[2]), 22); + EXPECT_EQ(static_cast(page[3]), 0); + EXPECT_EQ(page.substr(4), std::string(4, 0)); +} + +TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnDifferentPagesWriteBothPages) { + auto metadata = MakeMetadata(4); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + auto s = pages.SetBucketSlot(0, 1025, 0, 0, 44); + ASSERT_TRUE(s.ok()) << s.ToString(); + s = pages.SetBucketSlot(0, 1025, 512, 0, 55); + ASSERT_TRUE(s.ok()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), metadata.page_size); + EXPECT_EQ(static_cast(page[0]), 44); + + s = ReadPage(MakePageKey(metadata, 0, 1), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), metadata.page_size); + EXPECT_EQ(static_cast(page[0]), 55); +} + +TEST_F(RedisCuckooPageSetTest, NonDefaultPageSizeControlsBucketMapping) { + auto metadata = MakeMetadata(4, 1, 8); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 3, 2, 66, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); + + s = ReadPage(MakePageKey(metadata, 0, 1), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 4); + EXPECT_EQ(static_cast(page[0]), 66); + EXPECT_EQ(page.substr(1, 3), std::string(3, 0)); +} + +TEST_F(RedisCuckooPageSetTest, SetBucketSlotWritesOnlyTargetSlot) { + auto metadata = MakeMetadata(4); + std::string expected(16, static_cast(7)); + WritePage(MakePageKey(metadata, 0, 0), expected); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + auto s = pages.SetBucketSlot(0, 4, 2, 1, 88); + ASSERT_TRUE(s.ok()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + expected[9] = static_cast(88); + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(page, expected); +} + +TEST_F(RedisCuckooPageSetTest, InvalidBucketAndSlotArguments) { + auto metadata = MakeMetadata(4); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + uint8_t fingerprint = 0; + auto s = pages.GetBucketSlot(0, 2, 0, 4, &fingerprint); + EXPECT_TRUE(s.IsInvalidArgument()) << s.ToString(); + + s = pages.SetBucketSlot(0, 2, 0, 4, 11); + EXPECT_TRUE(s.IsInvalidArgument()) << s.ToString(); + + bool inserted = true; + s = pages.TryInsertInBucket(0, 2, 2, 11, &inserted); + EXPECT_TRUE(s.IsCorruption()) << s.ToString(); + + s = pages.TryInsertInBucket(0, 0, 0, 11, &inserted); + EXPECT_TRUE(s.IsCorruption()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + +TEST_F(RedisCuckooPageSetTest, OversizedPageReturnsCorruption) { + auto metadata = MakeMetadata(4); + WritePage(MakePageKey(metadata, 0, 0), std::string(9, static_cast(1))); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + uint8_t fingerprint = 0; + auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); + EXPECT_TRUE(s.IsCorruption()) << s.ToString(); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(page, std::string(9, static_cast(1))); +} + +TEST_F(RedisCuckooPageSetTest, DirtyPagesAreDiscardedWithoutWriteBack) { + auto metadata = MakeMetadata(4); + { + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + } + + std::string page; + auto s = ReadPage(MakePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + +TEST_F(RedisCuckooPageSetTest, PageKeyUsesMetadataVersion) { + auto old_metadata = MakeMetadata(4, 100); + auto new_metadata = MakeMetadata(4, 101); + { + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, old_metadata, storage_->IsSlotIdEncoded()); + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + } + + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, new_metadata, storage_->IsSlotIdEncoded()); + uint8_t fingerprint = 0; + auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(fingerprint, 0); + s = pages.SetBucketSlot(0, 2, 0, 0, 22); + ASSERT_TRUE(s.ok()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(old_metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 8); + EXPECT_EQ(static_cast(page[0]), 11); + + s = ReadPage(MakePageKey(new_metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 8); + EXPECT_EQ(static_cast(page[0]), 22); +} diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 17dbe28e520..c00c15f41c9 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -20,13 +20,16 @@ #include +#include #include #include #include +#include "common/encoding.h" #include "storage/redis_db.h" #include "storage/redis_metadata.h" #include "test_base.h" +#include "types/cuckoo_filter_page.h" #include "types/redis_cuckoo_chain.h" class RedisCuckooFilterTest : public TestBase { @@ -53,7 +56,8 @@ class RedisCuckooFilterTest : public TestBase { } void verifyMetadata(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, - uint16_t expansion, uint64_t size, uint16_t n_filters, uint64_t num_deleted_items = 0) { + uint16_t expansion, uint64_t size, uint16_t n_filters, uint64_t num_deleted_items = 0, + uint32_t page_size = kCuckooFilterDefaultPageSize) { std::string ns_key = db_->AppendNamespacePrefix(key); CuckooChainMetadata metadata(false); auto s = db_->GetMetadata(*ctx_, {kRedisCuckooFilter}, ns_key, &metadata); @@ -66,13 +70,14 @@ class RedisCuckooFilterTest : public TestBase { EXPECT_EQ(metadata.size, size) << key; EXPECT_EQ(metadata.n_filters, n_filters) << key; EXPECT_EQ(metadata.num_deleted_items, num_deleted_items) << key; + EXPECT_EQ(metadata.page_size, page_size) << key; } void reserveAndVerify(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, - uint16_t expansion) { - auto s = cuckoo_->Reserve(*ctx_, key, capacity, bucket_size, max_iterations, expansion); + uint16_t expansion, uint32_t page_size = kCuckooFilterDefaultPageSize) { + auto s = cuckoo_->Reserve(*ctx_, key, capacity, bucket_size, max_iterations, expansion, page_size); ASSERT_TRUE(s.ok()) << key << ": " << s.ToString(); - verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0); + verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0, page_size); } void addAndVerify(const std::string &key, const std::string &item, uint64_t capacity, uint8_t bucket_size, @@ -84,6 +89,28 @@ class RedisCuckooFilterTest : public TestBase { verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, expected_size, n_filters, 0); } + CuckooChainMetadata getMetadata(const std::string &key) { + std::string ns_key = db_->AppendNamespacePrefix(key); + CuckooChainMetadata metadata(false); + auto s = db_->GetMetadata(*ctx_, {kRedisCuckooFilter}, ns_key, &metadata); + EXPECT_TRUE(s.ok()) << key << ": metadata not found"; + return metadata; + } + + std::string makePageKey(const std::string &key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t page_index) { + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, page_index); + return InternalKey(db_->AppendNamespacePrefix(key), sub_key, metadata.version, storage_->IsSlotIdEncoded()) + .Encode(); + } + + rocksdb::Status readPage(const std::string &page_key, std::string *value) { + return storage_->Get(*ctx_, ctx_->GetReadOptions(), storage_->GetCFHandle(ColumnFamilyID::PrimarySubkey), page_key, + value); + } + std::unique_ptr cuckoo_; std::unique_ptr db_; std::string key_; @@ -106,11 +133,19 @@ TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { {"zero_max_iterations", 1000, 4, 0, 2, "max_iterations must be larger than 0"}, {"capacity_too_large", std::numeric_limits::max(), 4, 500, 2, "capacity is too large"}, {"expansion_too_large", 1000, 4, 500, redis::kCFMaxExpansion + 1, "expansion must be between 0 and 32768"}, + {"zero_page_size", 1000, 4, 500, 2, "page_size must be larger than 0"}, + {"page_size_smaller_than_bucket", 1000, 4, 500, 2, "page_size must be at least bucket_size"}, }; for (const auto &test_case : invalid_test_cases) { + uint32_t page_size = kCuckooFilterDefaultPageSize; + if (test_case.key == "zero_page_size") { + page_size = 0; + } else if (test_case.key == "page_size_smaller_than_bucket") { + page_size = test_case.bucket_size - 1; + } auto s = cuckoo_->Reserve(*ctx_, test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, - test_case.expansion); + test_case.expansion, page_size); ASSERT_FALSE(s.ok()) << test_case.key; ASSERT_TRUE(s.IsInvalidArgument()) << test_case.key << ": " << s.ToString(); ASSERT_NE(s.ToString().find(test_case.err), std::string::npos) << test_case.key << ": " << s.ToString(); @@ -163,11 +198,11 @@ TEST_F(RedisCuckooFilterTest, ReserveValidParams) { TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { // First reserve should succeed - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_TRUE(s.ok()); // Second reserve with same key should fail - s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2); + s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); @@ -280,31 +315,84 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { uint16_t expansion = 2; // Create the filter - auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + auto s = + cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); ASSERT_TRUE(s.ok()) << "First reserve should succeed"; // Verify metadata was stored by trying to reserve again with same key // This should fail with "already exists" error - s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); + s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; ASSERT_NE(s.ToString().find("already exists"), std::string::npos) << "Error message should mention 'already exists'"; // Verify we can still create filters with different keys - s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); + s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion, + kCuckooFilterDefaultPageSize); ASSERT_TRUE(s.ok()) << "Should be able to create filter with different key"; // Verify the original key still exists (can't create it again) - s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()) << "Original key should still exist"; ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } +TEST_F(RedisCuckooFilterTest, ReservePersistsPageSize) { + constexpr uint32_t page_size = 4096; + reserveAndVerify(key_, 1000, 4, 500, 2, page_size); +} + TEST_F(RedisCuckooFilterTest, AddBasic) { reserveAndVerify(key_, 1000, 4, 500, 2); addAndVerify(key_, "item1", 1000, 4, 500, 2, 1); } +TEST_F(RedisCuckooFilterTest, ReserveDoesNotPreallocatePages) { + reserveAndVerify(key_, 1000, 4, 500, 2); + + auto metadata = getMetadata(key_); + std::string page; + auto s = readPage(makePageKey(key_, metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + +TEST_F(RedisCuckooFilterTest, AddWritesPagedLayout) { + constexpr uint64_t capacity = 1000; + constexpr uint8_t bucket_size = 4; + reserveAndVerify(key_, capacity, bucket_size, 500, 2); + + const std::string item = "item1"; + addAndVerify(key_, item, capacity, bucket_size, 500, 2, 1); + + auto metadata = getMetadata(key_); + uint32_t num_buckets = 0; + auto s = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); + auto hash = redis::CuckooFilter::Hash(item); + auto fingerprint = redis::CuckooFilter::GenerateFingerprint(hash); + auto bucket1_idx = static_cast(hash % num_buckets); + auto bucket2_idx = static_cast(redis::CuckooFilter::GetAltHash(fingerprint, hash) % num_buckets); + auto buckets_per_page = metadata.page_size / bucket_size; + auto page_index = bucket1_idx / buckets_per_page; + auto page_size = std::min(buckets_per_page, num_buckets - page_index * buckets_per_page) * bucket_size; + + std::string page; + s = readPage(makePageKey(key_, metadata, 0, page_index), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), page_size); + + auto bucket1_offset = (bucket1_idx % buckets_per_page) * bucket_size; + auto bucket2_offset = (bucket2_idx % buckets_per_page) * bucket_size; + bool found = false; + for (uint32_t i = 0; i < bucket_size; ++i) { + found = found || static_cast(page[bucket1_offset + i]) == fingerprint; + if (bucket2_idx / buckets_per_page == page_index) { + found = found || static_cast(page[bucket2_offset + i]) == fingerprint; + } + } + EXPECT_TRUE(found); +} + TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { std::string key = "nonexistent_key"; bool added = false; @@ -314,7 +402,7 @@ TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { verifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, redis::kCFDefaultExpansion, 1, 1, 0); - s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2); + s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); @@ -348,7 +436,7 @@ TEST_F(RedisCuckooFilterTest, AddManyItems) { TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { uint64_t small_capacity = 10; - auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0); + auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0, kCuckooFilterDefaultPageSize); ASSERT_TRUE(s.ok()); uint64_t added_count = 0; @@ -383,3 +471,109 @@ TEST_F(RedisCuckooFilterTest, AddEdgeCaseItems) { addAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); } } + +TEST_F(RedisCuckooFilterTest, KickOutSuccessWritesDirtyPages) { + constexpr uint64_t capacity = 2; + constexpr uint8_t bucket_size = 1; + constexpr uint32_t num_buckets = 4; + reserveAndVerify(key_, capacity, bucket_size, 1, 0); + + struct Candidate { + std::string item; + uint8_t fingerprint = 0; + uint32_t bucket1 = 0; + uint32_t bucket2 = 0; + }; + + std::vector candidates; + for (int i = 0; i < 10000; ++i) { + std::string item = "kick_item_" + std::to_string(i); + auto hash = redis::CuckooFilter::Hash(item); + auto fingerprint = redis::CuckooFilter::GenerateFingerprint(hash); + candidates.push_back({item, fingerprint, static_cast(hash % num_buckets), + static_cast(redis::CuckooFilter::GetAltHash(fingerprint, hash) % num_buckets)}); + } + + Candidate first; + Candidate second; + Candidate kicked; + uint32_t evicted_bucket = 0; + bool found = false; + for (const auto &candidate : candidates) { + if (candidate.bucket1 == candidate.bucket2) continue; + for (const auto &first_candidate : candidates) { + if (first_candidate.item == candidate.item || first_candidate.bucket1 != candidate.bucket1) continue; + auto alt_for_victim = + redis::CuckooFilter::GetAltBucketIndex(candidate.bucket1, first_candidate.fingerprint, num_buckets); + if (alt_for_victim == candidate.bucket1 || alt_for_victim == candidate.bucket2) continue; + + for (const auto &second_candidate : candidates) { + if (second_candidate.item == candidate.item || second_candidate.item == first_candidate.item || + second_candidate.bucket1 != candidate.bucket2) { + continue; + } + first = first_candidate; + second = second_candidate; + kicked = candidate; + evicted_bucket = alt_for_victim; + found = true; + break; + } + if (found) break; + } + if (found) break; + } + ASSERT_TRUE(found); + + addAndVerify(key_, first.item, capacity, bucket_size, 1, 0, 1); + addAndVerify(key_, second.item, capacity, bucket_size, 1, 0, 2); + addAndVerify(key_, kicked.item, capacity, bucket_size, 1, 0, 3); + + auto metadata = getMetadata(key_); + ASSERT_EQ(metadata.n_filters, 1); + ASSERT_EQ(metadata.size, 3); + + std::string page; + auto s = readPage(makePageKey(key_, metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), num_buckets * bucket_size); + EXPECT_EQ(static_cast(page[kicked.bucket1]), kicked.fingerprint); + EXPECT_EQ(static_cast(page[kicked.bucket2]), second.fingerprint); + EXPECT_EQ(static_cast(page[evicted_bucket]), first.fingerprint); +} + +TEST_F(RedisCuckooFilterTest, ExpansionWritesNewFilterIndexPage) { + constexpr uint64_t capacity = 2; + constexpr uint8_t bucket_size = 1; + reserveAndVerify(key_, capacity, bucket_size, 1, 2); + + CuckooChainMetadata metadata(false); + uint64_t added_count = 0; + for (int i = 0; i < 100; ++i) { + bool added = false; + auto s = cuckoo_->Add(*ctx_, key_, "item_" + std::to_string(i), &added); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(added); + ++added_count; + + metadata = getMetadata(key_); + if (metadata.n_filters > 1) break; + } + + ASSERT_GT(metadata.n_filters, 1); + EXPECT_EQ(metadata.size, added_count); + + uint32_t num_buckets = 0; + uint64_t new_filter_capacity = capacity; + for (uint16_t i = 0; i < metadata.n_filters - 1; ++i) { + new_filter_capacity *= metadata.expansion; + } + auto s = redis::CuckooFilter::OptimalNumBuckets(new_filter_capacity, bucket_size, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); + auto expected_page_size = std::min(metadata.page_size / bucket_size, num_buckets) * bucket_size; + + std::string page; + s = readPage(makePageKey(key_, metadata, metadata.n_filters - 1, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(page.size(), expected_page_size); +} From adf0ff042448c8224be0d28215fe542b3b8c263e Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 11 May 2026 10:58:19 +0800 Subject: [PATCH 16/20] fix: lint --- src/types/cuckoo_filter_page.cc | 52 ++++++++++++++++----------------- src/types/cuckoo_filter_page.h | 18 ++++++------ src/types/redis_cuckoo_chain.cc | 4 +-- src/types/redis_cuckoo_chain.h | 2 +- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc index 543506fa97e..8efc2a9622c 100644 --- a/src/types/cuckoo_filter_page.cc +++ b/src/types/cuckoo_filter_page.cc @@ -68,15 +68,15 @@ rocksdb::Status CuckooPageSet::TryInsertInCandidateBuckets(uint16_t filter_index uint8_t fingerprint, bool *inserted) { *inserted = false; BucketRef bucket1, bucket2; - auto s = EnsureCandidateBucketsLoaded(filter_index, num_buckets, bucket1_index, bucket2_index, &bucket1, &bucket2); + auto s = ensureCandidateBucketsLoaded(filter_index, num_buckets, bucket1_index, bucket2_index, &bucket1, &bucket2); if (!s.ok()) return s; size_t slot_idx = 0; - if (TryInsertInBucketRef(bucket1, fingerprint, &slot_idx)) { + if (tryInsertInBucketRef(bucket1, fingerprint, &slot_idx)) { *inserted = true; return rocksdb::Status::OK(); } - if (bucket1_index != bucket2_index && TryInsertInBucketRef(bucket2, fingerprint, &slot_idx)) { + if (bucket1_index != bucket2_index && tryInsertInBucketRef(bucket2, fingerprint, &slot_idx)) { *inserted = true; } return rocksdb::Status::OK(); @@ -86,11 +86,11 @@ rocksdb::Status CuckooPageSet::TryInsertInBucket(uint16_t filter_index, uint32_t uint8_t fingerprint, bool *inserted) { *inserted = false; BucketRef bucket; - auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); if (!s.ok()) return s; size_t slot_idx = 0; - *inserted = TryInsertInBucketRef(bucket, fingerprint, &slot_idx); + *inserted = tryInsertInBucketRef(bucket, fingerprint, &slot_idx); return rocksdb::Status::OK(); } @@ -99,9 +99,9 @@ rocksdb::Status CuckooPageSet::GetBucketSlot(uint16_t filter_index, uint32_t num if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; - auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); if (!s.ok()) return s; - *fingerprint = GetBucketRefSlot(bucket, slot_idx); + *fingerprint = getBucketRefSlot(bucket, slot_idx); return rocksdb::Status::OK(); } @@ -110,9 +110,9 @@ rocksdb::Status CuckooPageSet::SetBucketSlot(uint16_t filter_index, uint32_t num if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; - auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); if (!s.ok()) return s; - SetBucketRefSlot(bucket, slot_idx, fingerprint); + setBucketRefSlot(bucket, slot_idx, fingerprint); return rocksdb::Status::OK(); } @@ -125,7 +125,7 @@ rocksdb::Status CuckooPageSet::WriteBackDirtyPages(rocksdb::WriteBatchBase *batc return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::ResolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, +rocksdb::Status CuckooPageSet::resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketLocation *location) const { if (metadata_.bucket_size == 0 || num_buckets == 0 || bucket_index >= num_buckets) { return rocksdb::Status::Corruption("invalid cuckoo filter bucket location"); @@ -139,14 +139,14 @@ rocksdb::Status CuckooPageSet::ResolveBucketLocation(uint16_t filter_index, uint return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::EnsureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, +rocksdb::Status CuckooPageSet::ensureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketRef *bucket) { BucketLocation location; - auto s = ResolveBucketLocation(filter_index, num_buckets, bucket_index, &location); + auto s = resolveBucketLocation(filter_index, num_buckets, bucket_index, &location); if (!s.ok()) return s; PageEntry *page = nullptr; - s = LoadPage(location, &page); + s = loadPage(location, &page); if (!s.ok()) return s; bucket->page = page; @@ -155,13 +155,13 @@ rocksdb::Status CuckooPageSet::EnsureBucketLoaded(uint16_t filter_index, uint32_ return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::EnsureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, +rocksdb::Status CuckooPageSet::ensureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, uint32_t bucket2_index, BucketRef *bucket1, BucketRef *bucket2) { BucketLocation location1, location2; - auto s = ResolveBucketLocation(filter_index, num_buckets, bucket1_index, &location1); + auto s = resolveBucketLocation(filter_index, num_buckets, bucket1_index, &location1); if (!s.ok()) return s; - s = ResolveBucketLocation(filter_index, num_buckets, bucket2_index, &location2); + s = resolveBucketLocation(filter_index, num_buckets, bucket2_index, &location2); if (!s.ok()) return s; std::vector missing_locations; @@ -169,7 +169,7 @@ rocksdb::Status CuckooPageSet::EnsureCandidateBucketsLoaded(uint16_t filter_inde if (location2.page_key != location1.page_key && pages_.find(location2.page_key) == pages_.end()) { missing_locations.push_back(location2); } - s = LoadPages(missing_locations); + s = loadPages(missing_locations); if (!s.ok()) return s; bucket1->page = &pages_.at(location1.page_key); @@ -181,7 +181,7 @@ rocksdb::Status CuckooPageSet::EnsureCandidateBucketsLoaded(uint16_t filter_inde return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::LoadPage(const BucketLocation &location, PageEntry **page) { +rocksdb::Status CuckooPageSet::loadPage(const BucketLocation &location, PageEntry **page) { auto iter = pages_.find(location.page_key); if (iter != pages_.end()) { *page = &iter->second; @@ -191,7 +191,7 @@ rocksdb::Status CuckooPageSet::LoadPage(const BucketLocation &location, PageEntr PageEntry page_entry; auto s = storage_->Get(ctx_, ctx_.GetReadOptions(), location.page_key, &page_entry.data); if (!s.ok() && !s.IsNotFound()) return s; - s = NormalizePage(s, location.expected_page_size, &page_entry); + s = normalizePage(s, location.expected_page_size, &page_entry); if (!s.ok()) return s; auto result = pages_.emplace(location.page_key, std::move(page_entry)); @@ -199,11 +199,11 @@ rocksdb::Status CuckooPageSet::LoadPage(const BucketLocation &location, PageEntr return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::LoadPages(const std::vector &locations) { +rocksdb::Status CuckooPageSet::loadPages(const std::vector &locations) { if (locations.empty()) return rocksdb::Status::OK(); if (locations.size() == 1) { PageEntry *page = nullptr; - return LoadPage(locations[0], &page); + return loadPage(locations[0], &page); } std::vector keys; @@ -218,14 +218,14 @@ rocksdb::Status CuckooPageSet::LoadPages(const std::vector &loca for (size_t i = 0; i < locations.size(); ++i) { PageEntry page_entry; if (statuses[i].ok()) page_entry.data.assign(values[i].data(), values[i].size()); - auto s = NormalizePage(statuses[i], locations[i].expected_page_size, &page_entry); + auto s = normalizePage(statuses[i], locations[i].expected_page_size, &page_entry); if (!s.ok()) return s; pages_.emplace(locations[i].page_key, std::move(page_entry)); } return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::NormalizePage(const rocksdb::Status &status, uint32_t expected_size, +rocksdb::Status CuckooPageSet::normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const { if (!status.ok() && !status.IsNotFound()) return status; if (status.IsNotFound()) page->data.clear(); @@ -234,7 +234,7 @@ rocksdb::Status CuckooPageSet::NormalizePage(const rocksdb::Status &status, uint return rocksdb::Status::OK(); } -bool CuckooPageSet::TryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx) { +bool CuckooPageSet::tryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx) { for (size_t i = 0; i < bucket.size; ++i) { size_t offset = bucket.offset + i; if (static_cast(bucket.page->data[offset]) == 0) { @@ -247,11 +247,11 @@ bool CuckooPageSet::TryInsertInBucketRef(const BucketRef &bucket, uint8_t finger return false; } -uint8_t CuckooPageSet::GetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const { +uint8_t CuckooPageSet::getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const { return static_cast(bucket.page->data[bucket.offset + slot_idx]); } -void CuckooPageSet::SetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint) { +void CuckooPageSet::setBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint) { bucket.page->data[bucket.offset + slot_idx] = static_cast(fingerprint); bucket.page->is_dirty = true; } diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h index 17627eef0ed..d8473033203 100644 --- a/src/types/cuckoo_filter_page.h +++ b/src/types/cuckoo_filter_page.h @@ -66,19 +66,19 @@ class CuckooPageSet { uint32_t expected_page_size = 0; }; - rocksdb::Status ResolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + rocksdb::Status resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketLocation *location) const; - rocksdb::Status EnsureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + rocksdb::Status ensureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketRef *bucket); - rocksdb::Status EnsureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, + rocksdb::Status ensureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, uint32_t bucket2_index, BucketRef *bucket1, BucketRef *bucket2); - rocksdb::Status LoadPage(const BucketLocation &location, PageEntry **page); - rocksdb::Status LoadPages(const std::vector &locations); - rocksdb::Status NormalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const; + rocksdb::Status loadPage(const BucketLocation &location, PageEntry **page); + rocksdb::Status loadPages(const std::vector &locations); + rocksdb::Status normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const; - bool TryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx); - uint8_t GetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const; - void SetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint); + bool tryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx); + uint8_t getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const; + void setBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint); engine::Storage *storage_ = nullptr; engine::Context &ctx_; diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 0cfc76b7000..acd38a2b02e 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -33,7 +33,7 @@ rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); } -rocksdb::Status CuckooChain::ValidateMetadata(const CuckooChainMetadata &metadata) { +rocksdb::Status CuckooChain::validateMetadata(const CuckooChainMetadata &metadata) { if (metadata.n_filters == 0) { return rocksdb::Status::Corruption("invalid metadata: n_filters is 0"); } @@ -166,7 +166,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co } if (!s.ok() && !s.IsNotFound()) return s; - s = ValidateMetadata(metadata); + s = validateMetadata(metadata); if (!s.ok()) return s; // Calculate hash and fingerprint for the item diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 2f469d2be9d..0289c8e160f 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -49,7 +49,7 @@ class CuckooChain : public Database { // Get metadata for the cuckoo filter rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); - static rocksdb::Status ValidateMetadata(const CuckooChainMetadata &metadata); + static rocksdb::Status validateMetadata(const CuckooChainMetadata &metadata); // Kick-out insertion: try to insert fingerprint by evicting existing ones rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, From cb91f43538fdf4af3cf115fdf80b7bb837956165 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 11 May 2026 11:50:00 +0800 Subject: [PATCH 17/20] optimize code --- src/types/redis_cuckoo_chain.cc | 11 +-- tests/cppunit/types/cuckoo_filter_test.cc | 85 +++++++++++++++++------ 2 files changed, 70 insertions(+), 26 deletions(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index acd38a2b02e..0e3c8570f79 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -173,11 +173,11 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co uint64_t hash = CuckooFilter::Hash(item.data(), item.size()); uint8_t fingerprint = CuckooFilter::GenerateFingerprint(hash); - // Try to insert in each sub-filter (starting from the first/smallest one) - // This follows RedisBloom's behavior and is more efficient - for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { + // RedisBloom prioritizes the newest sub-filter to avoid repeatedly probing older, fuller filters. + for (int filter_idx = static_cast(metadata.n_filters) - 1; filter_idx >= 0; --filter_idx) { + uint16_t current_filter_idx = static_cast(filter_idx); uint64_t filter_capacity = 0; - if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_idx, &filter_capacity) || + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, current_filter_idx, &filter_capacity) || !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); } @@ -192,7 +192,8 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; - s = pages.TryInsertInCandidateBuckets(filter_idx, num_buckets, bucket1_idx, bucket2_idx, fingerprint, &inserted); + s = pages.TryInsertInCandidateBuckets(current_filter_idx, num_buckets, bucket1_idx, bucket2_idx, fingerprint, + &inserted); if (!s.ok()) return s; if (inserted) { diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index c00c15f41c9..83d4d6de612 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -111,6 +111,17 @@ class RedisCuckooFilterTest : public TestBase { value); } + void writeMetadata(const std::string &key, const CuckooChainMetadata &metadata) { + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + auto batch = storage_->GetWriteBatchBase(); + auto s = + batch->Put(storage_->GetCFHandle(ColumnFamilyID::Metadata), db_->AppendNamespacePrefix(key), metadata_bytes); + ASSERT_TRUE(s.ok()) << s.ToString(); + s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + std::unique_ptr cuckoo_; std::unique_ptr db_; std::string key_; @@ -197,12 +208,10 @@ TEST_F(RedisCuckooFilterTest, ReserveValidParams) { } TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { - // First reserve should succeed - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); - ASSERT_TRUE(s.ok()); + reserveAndVerify(key_, 1000, 4, 500, 2); // Second reserve with same key should fail - s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2, kCuckooFilterDefaultPageSize); + auto s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); @@ -315,21 +324,18 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { uint16_t expansion = 2; // Create the filter - auto s = - cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); - ASSERT_TRUE(s.ok()) << "First reserve should succeed"; + reserveAndVerify(key_, capacity, bucket_size, max_iterations, expansion); // Verify metadata was stored by trying to reserve again with same key // This should fail with "already exists" error - s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); + auto s = + cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; ASSERT_NE(s.ToString().find("already exists"), std::string::npos) << "Error message should mention 'already exists'"; // Verify we can still create filters with different keys - s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion, - kCuckooFilterDefaultPageSize); - ASSERT_TRUE(s.ok()) << "Should be able to create filter with different key"; + reserveAndVerify("different_key", capacity, bucket_size, max_iterations, expansion); // Verify the original key still exists (can't create it again) s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); @@ -395,14 +401,10 @@ TEST_F(RedisCuckooFilterTest, AddWritesPagedLayout) { TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { std::string key = "nonexistent_key"; - bool added = false; - auto s = cuckoo_->Add(*ctx_, key, "item1", &added); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_TRUE(added) << "CF.ADD should create the filter when the key does not exist"; - verifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, - redis::kCFDefaultExpansion, 1, 1, 0); + addAndVerify(key, "item1", redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, + redis::kCFDefaultExpansion, 1); - s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); + auto s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); @@ -427,6 +429,48 @@ TEST_F(RedisCuckooFilterTest, AddDuplicateItems) { } } +TEST_F(RedisCuckooFilterTest, AddPrioritizesNewestFilter) { + CuckooChainMetadata metadata; + metadata.size = 0; + metadata.base_capacity = 1000; + metadata.bucket_size = 4; + metadata.max_iterations = 500; + metadata.expansion = 2; + metadata.n_filters = 2; + metadata.num_deleted_items = 0; + metadata.page_size = kCuckooFilterDefaultPageSize; + writeMetadata(key_, metadata); + + const std::string item = "item"; + addAndVerify(key_, item, metadata.base_capacity, metadata.bucket_size, metadata.max_iterations, metadata.expansion, 1, + metadata.n_filters); + + auto stored_metadata = getMetadata(key_); + ASSERT_EQ(stored_metadata.n_filters, 2); + ASSERT_EQ(stored_metadata.size, 1); + + auto hash = redis::CuckooFilter::Hash(item); + uint32_t old_num_buckets = 0; + auto s = redis::CuckooFilter::OptimalNumBuckets(metadata.base_capacity, metadata.bucket_size, &old_num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); + auto buckets_per_page = stored_metadata.page_size / stored_metadata.bucket_size; + auto old_page_idx = static_cast(hash % old_num_buckets) / buckets_per_page; + + std::string page; + s = readPage(makePageKey(key_, stored_metadata, 0, old_page_idx), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); + + uint32_t num_buckets = 0; + s = redis::CuckooFilter::OptimalNumBuckets(metadata.base_capacity * metadata.expansion, metadata.bucket_size, + &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); + auto bucket1_idx = static_cast(hash % num_buckets); + auto expected_page_idx = bucket1_idx / buckets_per_page; + + s = readPage(makePageKey(key_, stored_metadata, 1, expected_page_idx), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); +} + TEST_F(RedisCuckooFilterTest, AddManyItems) { reserveAndVerify(key_, 1000, 4, 500, 2); for (int i = 0; i < 100; ++i) { @@ -436,15 +480,14 @@ TEST_F(RedisCuckooFilterTest, AddManyItems) { TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { uint64_t small_capacity = 10; - auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0, kCuckooFilterDefaultPageSize); - ASSERT_TRUE(s.ok()); + reserveAndVerify(key_, small_capacity, 2, 500, 0); uint64_t added_count = 0; bool full = false; for (int i = 0; i < 100; ++i) { std::string item = "item_" + std::to_string(i); bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); + auto s = cuckoo_->Add(*ctx_, key_, item, &added); if (!s.ok()) { ASSERT_TRUE(s.IsAborted()) << "Should be Aborted status when full"; From 3043383fed88b564ed1b867c249ef965743962b8 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Mon, 2 Feb 2026 12:30:07 +0000 Subject: [PATCH 18/20] feat: Implement CF.EXISTS command --- src/commands/cmd_cuckoo_filter.cc | 35 ++++- src/types/redis_cuckoo_chain.cc | 58 +++++++- src/types/redis_cuckoo_chain.h | 4 + tests/cppunit/types/cuckoo_filter_test.cc | 173 +++++++++++++++++++++- 4 files changed, 265 insertions(+), 5 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 2002912f94d..c2aca9b9007 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -131,8 +131,39 @@ class CommandCFAdd : public Commander { } }; -// Register the CF.RESERVE and CF.ADD commands +class CommandCFExists : public Commander { + public: + Status Parse(const std::vector &args) override { + // CF.EXISTS key item + if (args.size() != 3) { + return {Status::RedisParseErr, "wrong number of arguments"}; + } + return Commander::Parse(args); + } + + Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { + redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); + bool exists = false; + auto s = cuckoo_db.Exists(ctx, args_[1], args_[2], &exists); + + if (!s.ok()) { + if (s.IsNotFound()) { + // Return 0 if key doesn't exist, not an error + *output = redis::Integer(0); + return Status::OK(); + } + return {Status::RedisExecErr, "failed to check item existence in cuckoo filter"}; + } + + // Return 1 if exists (might exist), 0 if doesn't exist (definitely not) + *output = redis::Integer(exists ? 1 : 0); + return Status::OK(); + } +}; + +// Register the CF.RESERVE, CF.ADD, and CF.EXISTS commands REDIS_REGISTER_COMMANDS(CuckooFilter, MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), - MakeCmdAttr("cf.add", 3, "write", 1, 1, 1)) + MakeCmdAttr("cf.add", 3, "write", 1, 1, 1), + MakeCmdAttr("cf.exists", 3, "read-only", 1, 1, 1)) } // namespace redis diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 0e3c8570f79..b5737c53976 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -62,7 +62,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } - // RedisBloom requires minimum capacity to ensure at least one bucket can be created + // Require minimum capacity to ensure at least one bucket can be created // With load factor 0.955, capacity=1 and bucket_size=4 results in 0 buckets if (capacity < 2) { return rocksdb::Status::InvalidArgument("capacity must be at least 2"); @@ -347,4 +347,60 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns return rocksdb::Status::OK(); } +rocksdb::Status CuckooChain::Exists(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *exists) { + std::string ns_key = AppendNamespacePrefix(user_key); + + CuckooChainMetadata metadata(false); + auto s = getCuckooChainMetadata(ctx, ns_key, &metadata); + if (!s.ok()) return s; + + s = validateMetadata(metadata); + if (!s.ok()) return s; + + uint64_t hash = CuckooFilter::Hash(item.data(), item.size()); + uint8_t fingerprint = CuckooFilter::GenerateFingerprint(hash); + + for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { + uint64_t filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_idx, &filter_capacity) || + !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + } + uint32_t num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size, &num_buckets); + if (!s.ok()) return s; + + uint32_t bucket1_idx = hash % num_buckets; + uint64_t alt_hash = CuckooFilter::GetAltHash(fingerprint, hash); + uint32_t bucket2_idx = alt_hash % num_buckets; + + CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); + + uint8_t slot = 0; + s = pages.GetBucketSlot(filter_idx, num_buckets, bucket1_idx, 0, &slot); + if (!s.ok()) return s; + for (size_t i = 0; i < metadata.bucket_size; ++i) { + s = pages.GetBucketSlot(filter_idx, num_buckets, bucket1_idx, static_cast(i), &slot); + if (!s.ok()) return s; + if (slot == fingerprint) { + *exists = true; + return rocksdb::Status::OK(); + } + } + + if (bucket1_idx == bucket2_idx) continue; + for (size_t i = 0; i < metadata.bucket_size; ++i) { + s = pages.GetBucketSlot(filter_idx, num_buckets, bucket2_idx, static_cast(i), &slot); + if (!s.ok()) return s; + if (slot == fingerprint) { + *exists = true; + return rocksdb::Status::OK(); + } + } + } + + *exists = false; + return rocksdb::Status::OK(); +} + } // namespace redis diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 0289c8e160f..a1a27c233f8 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -45,6 +45,10 @@ class CuckooChain : public Database { // Duplicate items are allowed, so added is true whenever insertion succeeds. rocksdb::Status Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added); + // CF.EXISTS command - checks if an item might exist in the cuckoo filter + // Returns true if item might exist (false positive possible), false if definitely doesn't exist + rocksdb::Status Exists(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *exists); + private: // Get metadata for the cuckoo filter rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 83d4d6de612..de956f8c62c 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -246,7 +246,7 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { // Test fingerprint generation ensures non-zero values in range [1, 255] - // Following RedisBloom: fp = hash % 255 + 1 + // Standard implementation: fp = hash % 255 + 1 for (uint64_t hash = 0; hash < 1000; ++hash) { uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; @@ -263,7 +263,7 @@ TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { std::vector num_buckets_cases = {1, 2, 128, 256, 512, 1024, 2048}; - // Test GetAltHash symmetry at hash level (following RedisBloom design) + // Test GetAltHash symmetry property // h2 = GetAltHash(fp, h1) // h1 = GetAltHash(fp, h2) <- this is the symmetry property for (auto num_buckets : num_buckets_cases) { @@ -620,3 +620,172 @@ TEST_F(RedisCuckooFilterTest, ExpansionWritesNewFilterIndexPage) { ASSERT_TRUE(s.ok()) << s.ToString(); EXPECT_EQ(page.size(), expected_page_size); } + +TEST_F(RedisCuckooFilterTest, ExistsBasic) { + reserveAndVerify(key_, 1000, 4, 500, 2); + addAndVerify(key_, "test_item", 1000, 4, 500, 2, 1); + + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, "test_item", &exists); + ASSERT_TRUE(s.ok()) << "Failed to check existence: " << s.ToString(); + ASSERT_TRUE(exists) << "Item should exist"; +} + +TEST_F(RedisCuckooFilterTest, ExistsNonExistentItem) { + reserveAndVerify(key_, 1000, 4, 500, 2); + addAndVerify(key_, "item1", 1000, 4, 500, 2, 1); + + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, "item2", &exists); + ASSERT_TRUE(s.ok()); + ASSERT_FALSE(exists) << "Item should not exist"; +} + +TEST_F(RedisCuckooFilterTest, ExistsNonExistentFilter) { + bool exists = true; + auto s = cuckoo_->Exists(*ctx_, "nonexistent_key", "item", &exists); + ASSERT_TRUE(s.IsNotFound()) << "Internal should return NotFound"; +} + +TEST_F(RedisCuckooFilterTest, ExistsMultipleItems) { + reserveAndVerify(key_, 1000, 4, 500, 2); + + std::vector items = {"apple", "banana", "cherry", "date", "elderberry"}; + for (size_t i = 0; i < items.size(); ++i) { + addAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); + } + + for (const auto &item : items) { + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, item, &exists); + ASSERT_TRUE(s.ok()) << "Failed to check: " << item; + ASSERT_TRUE(exists) << "Item should exist: " << item; + } + + std::vector non_items = {"fig", "grape", "honeydew"}; + for (const auto &item : non_items) { + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, item, &exists); + ASSERT_TRUE(s.ok()); + } +} + +TEST_F(RedisCuckooFilterTest, ExistsDuplicateItems) { + reserveAndVerify(key_, 1000, 4, 500, 2); + + std::string item = "duplicate_item"; + for (int i = 0; i < 3; ++i) { + addAndVerify(key_, item, 1000, 4, 500, 2, i + 1); + } + + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, item, &exists); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(exists) << "Duplicate item should exist"; +} + +TEST_F(RedisCuckooFilterTest, ExistsEmptyString) { + reserveAndVerify(key_, 1000, 4, 500, 2); + addAndVerify(key_, "", 1000, 4, 500, 2, 1); + + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, "", &exists); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(exists) << "Empty string should exist"; +} + +TEST_F(RedisCuckooFilterTest, ExistsBinaryData) { + reserveAndVerify(key_, 1000, 4, 500, 2); + std::string binary_item = std::string("\x00\x01\x02\xFF\xFE", 5); + addAndVerify(key_, binary_item, 1000, 4, 500, 2, 1); + + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, binary_item, &exists); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(exists) << "Binary data should exist"; +} + +TEST_F(RedisCuckooFilterTest, ExistsAfterExpansion) { + reserveAndVerify(key_, 10, 2, 500, 2); + + std::vector items; + for (int i = 0; i < 30; ++i) { + items.push_back("item_" + std::to_string(i)); + } + + for (const auto &item : items) { + bool added = false; + auto s = cuckoo_->Add(*ctx_, key_, item, &added); + if (s.ok() && added) { + bool exists = false; + s = cuckoo_->Exists(*ctx_, key_, item, &exists); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(exists) << "Item should exist: " << item; + } + } +} + +TEST_F(RedisCuckooFilterTest, ExistsWithDifferentBucketSizes) { + std::vector bucket_sizes = {1, 2, 4, 8}; + + for (size_t i = 0; i < bucket_sizes.size(); ++i) { + std::string test_key = key_ + "_bucket_" + std::to_string(bucket_sizes[i]); + reserveAndVerify(test_key, 100, bucket_sizes[i], 500, 2); + + std::vector items = {"item1", "item2", "item3"}; + for (size_t j = 0; j < items.size(); ++j) { + addAndVerify(test_key, items[j], 100, bucket_sizes[i], 500, 2, j + 1); + } + + for (const auto &item : items) { + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, test_key, item, &exists); + ASSERT_TRUE(s.ok()) << "bucket_size=" << static_cast(bucket_sizes[i]); + ASSERT_TRUE(exists) << "Item should exist with bucket_size=" << static_cast(bucket_sizes[i]); + } + } +} + +TEST_F(RedisCuckooFilterTest, ExistsLongString) { + reserveAndVerify(key_, 1000, 4, 500, 2); + std::string long_item(10000, 'x'); + addAndVerify(key_, long_item, 1000, 4, 500, 2, 1); + + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, long_item, &exists); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(exists) << "Long string should exist"; + + std::string different_long_item(10000, 'y'); + exists = false; + s = cuckoo_->Exists(*ctx_, key_, different_long_item, &exists); + ASSERT_TRUE(s.ok()); +} + +TEST_F(RedisCuckooFilterTest, ExistsBatchTest) { + reserveAndVerify(key_, 1000, 4, 500, 2); + + for (int x = 0; x < 100; ++x) { + addAndVerify(key_, std::to_string(x), 1000, 4, 500, 2, x + 1); + } + + for (int x = 0; x < 100; ++x) { + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, std::to_string(x), &exists); + ASSERT_TRUE(s.ok()) << "Failed to check item " << x; + ASSERT_TRUE(exists) << "Item " << x << " should exist"; + } +} + +TEST_F(RedisCuckooFilterTest, ExistsWithRepeatedInsertions) { + reserveAndVerify(key_, 1000, 4, 500, 2); + + std::string item = "k1"; + addAndVerify(key_, item, 1000, 4, 500, 2, 1); + addAndVerify(key_, item, 1000, 4, 500, 2, 2); + + bool exists = false; + auto s = cuckoo_->Exists(*ctx_, key_, item, &exists); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(exists) << "Item should exist after duplicate insertions"; +} From e0116d2e9adb74a1dc00371590e63c3849ce1e9f Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 14 Apr 2026 23:26:18 +0800 Subject: [PATCH 19/20] feat: MExists --- src/types/redis_cuckoo_chain.cc | 64 +++++++++++++++++++++++++++++++++ src/types/redis_cuckoo_chain.h | 6 ++++ 2 files changed, 70 insertions(+) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index b5737c53976..1666ecb6b1c 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -403,4 +403,68 @@ rocksdb::Status CuckooChain::Exists(engine::Context &ctx, const Slice &user_key, return rocksdb::Status::OK(); } +rocksdb::Status CuckooChain::MExists(engine::Context &ctx, const Slice &user_key, const std::vector &items, + std::vector *exists) { + exists->assign(items.size(), false); + std::string ns_key = AppendNamespacePrefix(user_key); + + CuckooChainMetadata metadata(false); + auto s = getCuckooChainMetadata(ctx, ns_key, &metadata); + if (!s.ok()) return s; + + s = validateMetadata(metadata); + if (!s.ok()) return s; + + std::vector hashes(items.size()); + std::vector fingerprints(items.size()); + for (size_t i = 0; i < items.size(); ++i) { + hashes[i] = CuckooFilter::Hash(items[i].data(), items[i].size()); + fingerprints[i] = CuckooFilter::GenerateFingerprint(hashes[i]); + CHECK(fingerprints[i] != 0); + } + + for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { + uint64_t filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_idx, &filter_capacity) || + !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + } + uint32_t num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size, &num_buckets); + if (!s.ok()) return s; + + CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); + + for (size_t i = 0; i < items.size(); ++i) { + if ((*exists)[i] || fingerprints[i] == 0) continue; + + uint32_t bucket1_idx = hashes[i] % num_buckets; + uint64_t alt_hash = CuckooFilter::GetAltHash(fingerprints[i], hashes[i]); + uint32_t bucket2_idx = alt_hash % num_buckets; + + uint8_t slot = 0; + for (size_t slot_idx = 0; slot_idx < metadata.bucket_size; ++slot_idx) { + s = pages.GetBucketSlot(filter_idx, num_buckets, bucket1_idx, static_cast(slot_idx), &slot); + if (!s.ok()) return s; + if (slot == fingerprints[i]) { + (*exists)[i] = true; + break; + } + } + + if ((*exists)[i] || bucket1_idx == bucket2_idx) continue; + for (size_t slot_idx = 0; slot_idx < metadata.bucket_size; ++slot_idx) { + s = pages.GetBucketSlot(filter_idx, num_buckets, bucket2_idx, static_cast(slot_idx), &slot); + if (!s.ok()) return s; + if (slot == fingerprints[i]) { + (*exists)[i] = true; + break; + } + } + } + } + + return rocksdb::Status::OK(); +} + } // namespace redis diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index a1a27c233f8..167383c5d60 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -20,6 +20,8 @@ #pragma once +#include + #include "cuckoo_filter.h" #include "storage/redis_db.h" #include "storage/redis_metadata.h" @@ -49,6 +51,10 @@ class CuckooChain : public Database { // Returns true if item might exist (false positive possible), false if definitely doesn't exist rocksdb::Status Exists(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *exists); + // CF.MEXISTS command - checks whether multiple items might exist in the cuckoo filter + rocksdb::Status MExists(engine::Context &ctx, const Slice &user_key, const std::vector &items, + std::vector *exists); + private: // Get metadata for the cuckoo filter rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); From 33d36bbf55325f6d39cc6c3189fba332ab8e2e84 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Wed, 15 Apr 2026 00:07:39 +0800 Subject: [PATCH 20/20] fate: CommandCFMExists --- src/commands/cmd_cuckoo_filter.cc | 37 +++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index c2aca9b9007..80464aea4b4 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -161,9 +161,42 @@ class CommandCFExists : public Commander { } }; -// Register the CF.RESERVE, CF.ADD, and CF.EXISTS commands +class CommandCFMExists : public Commander { + public: + Status Parse(const std::vector &args) override { + // CF.MEXISTS key item [item ...] + if (args.size() < 3) { + return {Status::RedisParseErr, "wrong number of arguments"}; + } + return Commander::Parse(args); + } + + Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { + redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); + std::vector items(args_.begin() + 2, args_.end()); + std::vector exists; + auto s = cuckoo_db.MExists(ctx, args_[1], items, &exists); + + if (!s.ok()) { + if (s.IsNotFound()) { + exists.assign(items.size(), false); + } else { + return {Status::RedisExecErr, "failed to check items existence in cuckoo filter"}; + } + } + + *output = redis::MultiLen(exists.size()); + for (bool exist : exists) { + output->append(redis::Integer(exist ? 1 : 0)); + } + return Status::OK(); + } +}; + +// Register the CF.RESERVE, CF.ADD, CF.EXISTS, and CF.MEXISTS commands REDIS_REGISTER_COMMANDS(CuckooFilter, MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), MakeCmdAttr("cf.add", 3, "write", 1, 1, 1), - MakeCmdAttr("cf.exists", 3, "read-only", 1, 1, 1)) + MakeCmdAttr("cf.exists", 3, "read-only", 1, 1, 1), + MakeCmdAttr("cf.mexists", -3, "read-only", 1, 1, 1)) } // namespace redis