diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 2747671..6c8a68c 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -64,7 +64,7 @@ inline void random_benchmarks(std::integral_constant) { } } if constexpr (true) { - using TreeType = BppTreeMap::internal_node_bytes<256>::leaf_node_bytes<1024>::Persistent; + using TreeType = BppTreeMap::Persistent; TreeType tree{}; cout << "BppTreeMap::Persistent : " << n << endl; cout << "=============================================================" << endl; @@ -155,6 +155,39 @@ inline void sequential_benchmark(T&& tree, std::string const& message) { cout << sum << endl << endl; } +template +struct HasPushBack : std::false_type {}; + +template +struct HasPushBack().push_back(std::make_pair(0, 0)))>> : std::true_type {}; + +template +inline void iterator_benchmark(T&& tree, std::string const& message) { + cout << "Running iterator benchmark using " << message << " with size " << n << endl; + cout << "=============================================================" << endl; + auto startTime = std::chrono::steady_clock::now(); + for (int i = 0; i < n; i++) { + if constexpr (HasPushBack::value) { + tree.push_back(std::make_pair(i, i)); + } else { + tree[i] = i; + } + } + auto endTime = std::chrono::steady_clock::now(); + std::chrono::duration elapsed = endTime - startTime; + cout << elapsed.count() << 's' << endl; + + int64_t sum = 0; + startTime = std::chrono::steady_clock::now(); + for (auto const& p: std::as_const(tree)) { + sum += p.second; + } + endTime = std::chrono::steady_clock::now(); + elapsed = endTime - startTime; + cout << elapsed.count() << 's' << endl; + cout << sum << endl << endl; +} + template inline void sequential_benchmarks(std::integral_constant) { for (int j = 0; j < 5; ++j) { @@ -162,6 +195,11 @@ inline void sequential_benchmarks(std::integral_constant) { sequential_benchmark(tlx::btree_map(), "tlx::btree_map"); sequential_benchmark(BppTreeMap::Transient(), "BppTreeMap::Transient"); sequential_benchmark(std::map(), "std::map"); + + iterator_benchmark(absl::btree_map(), "absl::btree_map"); + iterator_benchmark(tlx::btree_map(), "tlx::btree_map"); + iterator_benchmark(BppTreeMap::Transient(), "BppTreeMap::Transient"); + iterator_benchmark(std::map(), "std::map"); } if constexpr (true) { using TreeType = BppTreeMap::internal_node_bytes<256>::leaf_node_bytes<1024>::Persistent; diff --git a/include/bpptree/detail/internalnodebase.hpp b/include/bpptree/detail/internalnodebase.hpp index 70d3c92..56ec30a 100644 --- a/include/bpptree/detail/internalnodebase.hpp +++ b/include/bpptree/detail/internalnodebase.hpp @@ -42,6 +42,18 @@ struct InternalNodeBase : public Parent { NodePtr pointers[internal_size]{}; + void prefetch_children(bool condition) { + if (condition) { + for (IndexType i = 0; i < this->length; ++i) { + __builtin_prefetch(&*pointers[i], 1, 3); + } + } + } + + ~InternalNodeBase() { + prefetch_children(true); + } + static IndexType get_index(uint64_t it) { return (it >> it_shift) & it_mask; } @@ -303,6 +315,7 @@ struct InternalNodeBase : public Parent { template void insert(T const& search_val, F&& finder, R&& do_replace, S&& do_split, size_t& size, uint64_t& iter, bool right_most, Args&&... args) { auto [index, remainder] = finder(this->self(), search_val); + prefetch_children(this->persistent); pointers[index]->insert(remainder, finder, DoReplace(this->self(), index, do_replace, iter), @@ -316,6 +329,7 @@ struct InternalNodeBase : public Parent { template void assign(T const& search_val, F&& finder, R&& do_replace, uint64_t& iter, Args&&... args) { auto [index, remainder] = finder(this->self(), search_val); + prefetch_children(this->persistent); pointers[index]->assign(remainder, finder, DoReplace(this->self(), index, do_replace, iter), @@ -390,6 +404,7 @@ struct InternalNodeBase : public Parent { template void erase(T const& search_val, F&& finder, R&& do_replace, E&& do_erase, size_t& size, uint64_t& iter, bool right_most) { auto [index, remainder] = finder(this->self(), search_val); + prefetch_children(this->persistent); pointers[index]->erase(remainder, finder, DoReplace(this->self(), index, do_replace, iter), @@ -402,6 +417,7 @@ struct InternalNodeBase : public Parent { template void update(T const& search_val, F&& finder, R&& do_replace, uint64_t& iter, U&& updater) { auto [index, remainder] = finder(this->self(), search_val); + prefetch_children(this->persistent); pointers[index]->update(remainder, finder, DoReplace(this->self(), index, do_replace, iter), @@ -413,6 +429,7 @@ struct InternalNodeBase : public Parent { template void update2(T const& search_val, F&& finder, R&& do_replace, uint64_t& iter, U&& updater) { auto [index, remainder] = finder(this->self(), search_val); + prefetch_children(this->persistent); pointers[index]->update2(remainder, finder, DoReplace(this->self(), index, do_replace, iter), @@ -428,6 +445,7 @@ struct InternalNodeBase : public Parent { void make_persistent() { if (!this->persistent) { this->persistent = true; + prefetch_children(true); for (IndexType i = 0; i < this->length; ++i) { pointers[i]->make_persistent(); } diff --git a/include/bpptree/detail/nodeptr.hpp b/include/bpptree/detail/nodeptr.hpp index 7120fa1..e28a9db 100644 --- a/include/bpptree/detail/nodeptr.hpp +++ b/include/bpptree/detail/nodeptr.hpp @@ -38,7 +38,8 @@ class NodePtr { void dec_ref() { if (ptr != nullptr) { - if (--ptr->ref_count == 0) { + if (ptr->ref_count.fetch_sub(1, std::memory_order_release) == 1) { + std::atomic_thread_fence(std::memory_order_acquire); delete ptr; if constexpr (count_allocations) ++deallocations; } @@ -48,7 +49,7 @@ class NodePtr { void inc_ref() const { if (ptr != nullptr) { - ++ptr->ref_count; + ptr->ref_count.fetch_add(1, std::memory_order_relaxed); if constexpr (count_allocations) ++increments; } } diff --git a/include/bpptree/detail/ordered_detail.hpp b/include/bpptree/detail/ordered_detail.hpp index d695bd2..fcf2bb1 100644 --- a/include/bpptree/detail/ordered_detail.hpp +++ b/include/bpptree/detail/ordered_detail.hpp @@ -16,6 +16,12 @@ namespace bpptree::detail { +template +struct HasDataPtr : std::false_type {}; + +template +struct HasDataPtr().data())>>> : std::true_type {}; + template struct OrderedDetail { private: @@ -32,6 +38,16 @@ struct OrderedDetail { template IndexType find_key_index(Key const& search_val, Comp const& comp) const { + if constexpr (std::is_pointer_v) { + for (IndexType i = 0; i < this->length; ++i) { + __builtin_prefetch(extractor.get_key(this->values[i]), 0, 3); + } + } + if constexpr (HasDataPtr::value) { + for (IndexType i = 0; i < this->length; ++i) { + __builtin_prefetch(extractor.get_key(this->values[i]).data(), 0, 3); + } + } if constexpr (!binary_search) { IndexType index = 0; while (index < this->length) { @@ -238,6 +254,16 @@ struct OrderedDetail { template IndexType find_key_index(Key const& search_val, Comp const& comp) const { + if constexpr (std::is_pointer_v) { + for (IndexType i = 0; i < this->length; ++i) { + __builtin_prefetch(keys[i], 0, 3); + } + } + if constexpr (HasDataPtr::value) { + for (IndexType i = 0; i < this->length; ++i) { + __builtin_prefetch(keys[i].data(), 0, 3); + } + } if constexpr (!binary_search) { IndexType index = 0; while (index < this->length - 1) { @@ -269,6 +295,7 @@ struct OrderedDetail { void insert_or_assign(Key const& search_val, F&& finder, R&& do_replace, S&& do_split, size_t& size, uint64_t& iter, bool right_most, Args&& ... args) { auto [index, remainder] = finder(this->self(), search_val); + this->prefetch_children(this->persistent); this->pointers[index]->template insert_or_assign(remainder, finder, typename Parent::template DoReplace(this->self(), index, do_replace, iter),