From 81709a8bbb274fab561c15c73f1ef40c49a79f98 Mon Sep 17 00:00:00 2001 From: canepat <16927169+canepat@users.noreply.github.com> Date: Sun, 13 Nov 2022 13:48:48 +0100 Subject: [PATCH] Move non-constexpr array initialization Add RecSplit methods to add key one by one Remove ctor/dtor template parameters made invalid in C++20 Add unit test --- sux/function/RecSplit.hpp | 20 ++++++++++++++++++-- sux/util/Vector.hpp | 8 ++++---- test/function/recsplit.hpp | 30 ++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/sux/function/RecSplit.hpp b/sux/function/RecSplit.hpp index 733ba7f..5152a66 100644 --- a/sux/function/RecSplit.hpp +++ b/sux/function/RecSplit.hpp @@ -307,17 +307,32 @@ template class // For each bucket size, the Golomb-Rice parameter (upper 8 bits) and the number of bits to // skip in the fixed part of the tree (lower 24 bits). static constexpr array memo = fill_golomb_rice(); - static constexpr array bij_midstop = fill_bij_midstop(); size_t bucket_size; size_t nbuckets; size_t keys_count; RiceBitVector descriptors; DoubleEF ef; + std::vector key_hashes; public: RecSplit() {} + RecSplit(const size_t _keys_count, const size_t _bucket_size) { + this->bucket_size = _bucket_size; + this->keys_count = _keys_count; + key_hashes.reserve(this->keys_count); + } + + void add_key(const string& key) { + key_hashes.push_back(first_hash(key.c_str(), key.size())); + } + + void build() { + hash_gen(key_hashes.data()); + key_hashes.clear(); + } + /** Builds a RecSplit instance using a given list of keys and bucket size. * * **Warning**: duplicate keys will cause this method to never return. @@ -448,7 +463,8 @@ template class } void recSplit(vector &bucket, vector &temp, size_t start, size_t end, typename RiceBitVector::Builder &builder, vector &unary, const int level) { - const auto m = end - start; + static const array bij_midstop = fill_bij_midstop(); + const auto m = end - start; assert(m > 1); uint64_t x = start_seed[level]; diff --git a/sux/util/Vector.hpp b/sux/util/Vector.hpp index 0459041..d9f29c9 100644 --- a/sux/util/Vector.hpp +++ b/sux/util/Vector.hpp @@ -92,13 +92,13 @@ template class Vector : public Expandable { T *data = nullptr; public: - Vector() = default; + Vector() = default; - explicit Vector(size_t length) { size(length); } + explicit Vector(size_t length) { size(length); } - explicit Vector(const T *data, size_t length) : Vector(length) { memcpy(this->data, data, length); } + explicit Vector(const T *data, size_t length) : Vector(length) { memcpy(this->data, data, length); } - ~Vector() { + ~Vector() { if (data) { if (AT == MALLOC) { free(data); diff --git a/test/function/recsplit.hpp b/test/function/recsplit.hpp index 8605786..5485f90 100644 --- a/test/function/recsplit.hpp +++ b/test/function/recsplit.hpp @@ -135,3 +135,33 @@ TEST(recsplit_test, small_text_dump_and_load) { recsplit_unit_test(rs_load, keys); remove(filename); } + +TEST(recsplit_test, small_text_dump_and_load_one_by_one) { + vector keys; + keys.push_back("a"); + keys.push_back("b"); + keys.push_back("c"); + keys.push_back("d"); + + const char *filename = "test/test_dump"; + + RecSplit<8> rs_dump(keys.size(), 2); + for (size_t i = 0; i < keys.size(); i++) rs_dump.add_key(keys[i]); + rs_dump.build(); + + fstream fs; + fs.exceptions(fstream::failbit | fstream::badbit); + fs.open(filename, fstream::out | fstream::binary | fstream::trunc); + fs << rs_dump; + fs.close(); + + RecSplit<8> rs_load; + fs.open(filename, std::fstream::in | std::fstream::binary); + fs >> rs_load; + fs.close(); + + for (size_t i = 0; i < rs_dump.size(); i++) ASSERT_EQ(rs_dump(keys[i]), rs_load(keys[i])); + + recsplit_unit_test(rs_load, keys); + remove(filename); +}