From 7333bcd922715e3dc05861c3630200616c0a2285 Mon Sep 17 00:00:00 2001 From: lisyarus Date: Sat, 1 Jun 2024 14:56:53 +0300 Subject: [PATCH] Support removal from util::hash_table and add more hash table tests --- libs/util/include/psemek/util/hash_table.hpp | 156 ++++++++-- libs/util/tests/hash_table.cpp | 304 ++++++++++++++++++- 2 files changed, 437 insertions(+), 23 deletions(-) diff --git a/libs/util/include/psemek/util/hash_table.hpp b/libs/util/include/psemek/util/hash_table.hpp index fdab55c2..e815b0df 100644 --- a/libs/util/include/psemek/util/hash_table.hpp +++ b/libs/util/include/psemek/util/hash_table.hpp @@ -5,7 +5,6 @@ #include #include -#include #include namespace psemek::util @@ -14,11 +13,65 @@ namespace psemek::util namespace detail { + constexpr std::size_t stored_value_mask = 1ull << 63; + constexpr std::size_t tombstone_mask = 1ull << 62; + constexpr std::size_t hash_value_mask = ~(stored_value_mask | tombstone_mask); + template struct hash_table_entry { - std::size_t hash; - std::optional value; + std::size_t hash = 0; + alignas(T) char storage[sizeof(T)] = {0}; + + bool has_value() const + { + return (hash & stored_value_mask) != 0; + } + + bool is_tombstone() const + { + return (hash & tombstone_mask) != 0; + } + + T * storage_ptr() + { + return reinterpret_cast(storage); + } + + T & value() + { + return *storage_ptr(); + } + + template + void set_value(H && value, std::size_t hash) + { + new (storage_ptr()) T{std::forward(value)}; + this->hash = (hash & hash_value_mask) | stored_value_mask; + } + + bool hash_equal(std::size_t hash) const + { + return (hash & hash_value_mask) == (this->hash & hash_value_mask); + } + + void set_tombstone() + { + this->hash = tombstone_mask; + } + + void reset() + { + if (has_value()) + value().~T(); + + hash = 0; + } + + ~hash_table_entry() + { + reset(); + } }; template @@ -41,12 +94,12 @@ namespace psemek::util T & operator *() const { - return *(p_->value); + return p_->value(); } T * operator ->() const { - return std::addressof(*(p_->value)); + return std::addressof(p_->value()); } hash_table_iterator & operator ++() @@ -73,13 +126,18 @@ namespace psemek::util return {p_, end_}; } + entry_type * internal() const + { + return p_; + } + private: entry_type * p_; entry_type * end_; void advance() { - while (p_ != end_ && !(p_->value)) + while (p_ != end_ && !p_->has_value()) ++p_; } }; @@ -159,10 +217,22 @@ namespace psemek::util return find_impl(key, hash); } + void erase(hash_table_entry * entry) + { + entry->reset(); + entry->set_tombstone(); + --size_; + ++tombstone_count_; + + // Ensure at most 25% tombstones + if (4 * tombstone_count_ >= storage_.capacity) + rehash(); + } + void clear() { for (auto & entry : storage_.entries()) - entry.value.reset(); + entry.reset(); size_ = 0; } @@ -189,6 +259,7 @@ namespace psemek::util private: hash_table_storage storage_; std::size_t size_ = 0; + std::size_t tombstone_count_ = 0; static std::size_t min_capacity_for_size(std::size_t size) { @@ -219,17 +290,23 @@ namespace psemek::util std::swap(storage_, storage); size_ = 0; + tombstone_count_ = 0; for (hash_table_entry & entry : storage.entries()) { - if (entry.value) + if (entry.has_value()) { - insert_impl(std::move(*entry.value), entry.hash); - entry.value.reset(); + insert_impl(std::move(entry.value()), entry.hash); + entry.reset(); } } } + void rehash() + { + reallocate(capacity()); + } + std::size_t probe_index(std::size_t hash, std::size_t i) const { return (hash + (i * (i + 1)) / 2) % storage_.capacity; @@ -243,14 +320,13 @@ namespace psemek::util { std::size_t index = probe_index(hash, i); auto & entry = storage_.table[index]; - if (!entry.value) + if (!entry.has_value() || entry.is_tombstone()) { - entry.value.emplace(std::forward(value)); - entry.hash = hash; + entry.set_value(std::forward(value), hash); ++size_; return {storage_.iterator(index), true}; } - else if (entry.hash == hash && equal()(value, *entry.value)) + else if (entry.hash_equal(hash) && equal()(value, entry.value())) { return {storage_.iterator(index), false}; } @@ -267,16 +343,18 @@ namespace psemek::util { std::size_t index = probe_index(hash, i); auto & entry = storage_.table[index]; - if (!entry.value) + if (!entry.is_tombstone()) { - return storage_.iterator(storage_.capacity); + if (!entry.has_value()) + { + return end(); + } + else if (entry.hash_equal(hash) && equal()(key, entry.value())) + { + return storage_.iterator(index); + } } - else if (entry.hash == hash && equal()(key, *entry.value)) - { - return storage_.iterator(index); - } - else - ++i; + ++i; } } }; @@ -408,6 +486,23 @@ namespace psemek::util return find(key) != end(); } + bool erase(iterator const & it) + { + impl_.erase(it.internal()); + return true; + } + + template + bool erase(Key const & key) + { + if (auto it = find(key); it != end()) + { + erase(it); + return true; + } + return false; + } + iterator begin() const { return impl_.begin().as_const(); @@ -497,6 +592,23 @@ namespace psemek::util return find(key) != end(); } + bool erase(iterator const & it) + { + impl_.erase(it.internal()); + return true; + } + + template + bool erase(Key1 const & key) + { + if (auto it = find(key); it != end()) + { + erase(it); + return true; + } + return false; + } + iterator begin() const { return impl_.begin(); diff --git a/libs/util/tests/hash_table.cpp b/libs/util/tests/hash_table.cpp index 48c539e3..a4402072 100644 --- a/libs/util/tests/hash_table.cpp +++ b/libs/util/tests/hash_table.cpp @@ -6,10 +6,66 @@ #include #include #include +#include using namespace psemek; using namespace psemek::util; +namespace +{ + + struct lifetime_tracker + { + static std::size_t constructed_count; + static std::size_t move_constructed_count; + static std::size_t destroyed_count; + + static std::size_t alive_count() + { + return constructed_count + move_constructed_count - destroyed_count; + } + + int value; + + lifetime_tracker(int value) + : value(value) + { + ++constructed_count; + } + + lifetime_tracker(lifetime_tracker && other) + : value(other.value) + { + ++move_constructed_count; + } + + lifetime_tracker(lifetime_tracker const &) = delete; + + lifetime_tracker & operator = (lifetime_tracker &&) = delete; + lifetime_tracker & operator = (lifetime_tracker const &) = delete; + + ~lifetime_tracker() + { + ++destroyed_count; + } + + friend bool operator == (lifetime_tracker const &, lifetime_tracker const &) = default; + }; + + std::size_t lifetime_tracker::constructed_count = 0; + std::size_t lifetime_tracker::move_constructed_count = 0; + std::size_t lifetime_tracker::destroyed_count = 0; + + struct lifetime_tracker_hash + { + std::size_t operator()(lifetime_tracker const & value) const noexcept + { + return value.value; + } + }; + +} + test_case(util_hash__set_empty) { hash_set set; @@ -22,7 +78,7 @@ test_case(util_hash__set_empty) expect_equal(call_count, 0); } -test_case(util_hash__set_insert) +test_case(util_hash__set_insert_sequential) { hash_set set; @@ -47,6 +103,209 @@ test_case(util_hash__set_insert) } } +test_case(util_hash__set_insert_random__small) +{ + hash_set set; + + random::generator rng{0x8d6ed4c8749bda57ull, 0x580a939046371825ull}; + + std::uint32_t const max = 1024; + + while (set.size() < max) + { + set.insert(rng() % max); + } + + expect_equal(set.size(), max); + + for (int i = 0; i < max; ++i) + { + expect(set.contains(i)); + auto it = set.find(i); + expect(it != set.end()); + expect_equal(*it, i); + } + + for (int i = max; i < 2 * max; ++i) + { + expect(!set.contains(i)); + expect(set.find(i) == set.end()); + } + + int const probe_count = 1024 * 16; + for (int i = 0; i < probe_count; ++i) + { + auto value = rng(); + if (value < max) continue; + + expect(!set.contains(value)); + expect(set.find(value) == set.end()); + } +} + +test_case(util_hash__set_insert_random) +{ + hash_set set; + + random::generator rng{0x3096a19223fed1cfull, 0xf690a99db056b624ull}; + + int const count = 1024 * 16; + + std::vector inserted; + + while (inserted.size() < count) + { + int value = rng(); + if (set.insert(value).second) + inserted.push_back(value); + } + + expect_equal(set.size(), count); + + std::vector not_inserted; + + while (not_inserted.size() < count) + { + int value = rng(); + if (!set.contains(value)) + not_inserted.push_back(value); + } + + for (auto value : inserted) + { + expect(set.contains(value)); + auto it = set.find(value); + expect(it != set.end()); + expect_equal(*it, value); + } + + for (auto value : not_inserted) + { + expect(!set.contains(value)); + auto it = set.find(value); + expect(it == set.end()); + } +} + +test_case(util_hash__set_erase_sequential) +{ + hash_set set; + + int const count = 1024 * 16; + for (int i = 0; i < count; ++i) + expect(set.insert(i * i).second); + + expect_equal(set.size(), count); + + for (int i = count; i < 2 * count; ++i) + expect(!set.erase(i * i)); + + for (int i = 0; i < count; ++i) + { + expect(set.erase(i * i)); + expect(!set.contains(i * i)); + expect(set.size() == count - i - 1); + } + + expect(set.empty()); + + for (int i = 0; i < count; ++i) + expect(!set.erase(i * i)); +} + +test_case(util_hash__set_erase_random) +{ + hash_set set; + + random::generator rng{0xff60de1081bc862aull, 0xe0a81aad7a42f1b0ull}; + + int const count = 1024 * 16; + + std::vector inserted; + + while (inserted.size() < count) + { + int value = rng(); + if (set.insert(value).second) + inserted.push_back(value); + } + + expect_equal(set.size(), count); + + std::vector not_inserted; + + while (not_inserted.size() < count) + { + int value = rng(); + if (!set.contains(value)) + not_inserted.push_back(value); + } + + for (auto value : not_inserted) + { + expect(!set.erase(value)); + expect_equal(set.size(), count); + } + + for (int i = 0; i < count; ++i) + { + expect(set.erase(inserted[i])); + expect(!set.contains(inserted[i])); + expect_equal(set.size(), count - i - 1); + } +} + +test_case(util_hash__set_insert__erase_sequential) +{ + hash_set set; + + int const count = 1024 * 16; + for (int i = 0; i < count; ++i) + expect(set.insert(i * i).second); + + expect_equal(set.size(), count); + + for (int i = 0; i < count / 2; ++i) + { + expect(set.erase(i * i)); + expect(set.find(i * i) == set.end()); + expect_equal(set.size(), count - i - 1); + } + + expect_equal(set.size(), count / 2); + + for (int i = 0; i < count / 2; ++i) + { + expect(!set.contains(i * i)); + expect(!set.erase(i * i)); + expect(set.find(i * i) == set.end()); + } + + for (int i = count / 2; i < count; ++i) + { + expect(set.contains(i * i)); + auto it = set.find(i * i); + expect(it != set.end()); + expect_equal(*it, i * i); + } + + for (int i = count; i < 2 * count; ++i) + { + expect(set.find(i * i) == set.end()); + expect(set.insert(i * i).second); + expect_equal(set.size(), count / 2 + (i - count) + 1); + } + + for (int i = count / 2; i < count; ++i) + { + expect(set.erase(i * i)); + expect(!set.contains(i * i)); + expect(set.size() == 2 * count - i - 1); + } + + expect_equal(set.size(), count); +} + test_case(util_hash__set_clear) { hash_set set; @@ -116,6 +375,49 @@ test_case(util_hash__set_move) } } +test_case(util_hash__set_movable) +{ + hash_set> set; + + int const count = 1024 * 16; + for (int i = 0; i < count; ++i) + { + expect(set.insert(std::make_unique(i)).second); + expect_equal(set.size(), i + 1); + } + + expect_equal(set.size(), count); +} + +test_case(util_hash__set_lifetime) +{ + hash_set set; + + int const count = 1024 * 16; + for (int i = 0; i < count; ++i) + { + expect(set.insert(lifetime_tracker(i)).second); + expect_equal(set.size(), i + 1); + expect_equal(lifetime_tracker::alive_count(), i + 1); + } + + for (int i = 0; i < count; ++i) + { + expect(set.contains(lifetime_tracker(i))); + auto it = set.find(lifetime_tracker(i)); + expect(it != set.end()); + expect(*it == lifetime_tracker(i)); + } + + for (int i = 0; i < count; ++i) + { + expect(set.erase(lifetime_tracker(i))); + expect(!set.contains(lifetime_tracker(i))); + expect(set.find(lifetime_tracker(i)) == set.end()); + expect_equal(lifetime_tracker::alive_count(), count - i - 1); + } +} + test_case(util_hash__set_benchmark) { random::generator rng;