Support removal from util::hash_table and add more hash table tests

This commit is contained in:
Nikita Lisitsa 2024-06-01 14:56:53 +03:00
parent fc18c75557
commit 7333bcd922
2 changed files with 437 additions and 23 deletions

View file

@ -5,7 +5,6 @@
#include <psemek/util/at.hpp>
#include <memory>
#include <optional>
#include <initializer_list>
namespace psemek::util
@ -14,11 +13,65 @@ namespace psemek::util
namespace detail
{
constexpr std::size_t stored_value_mask = 1ull << 63;
constexpr std::size_t tombstone_mask = 1ull << 62;
constexpr std::size_t hash_value_mask = ~(stored_value_mask | tombstone_mask);
template <typename T>
struct hash_table_entry
{
std::size_t hash;
std::optional<T> value;
std::size_t hash = 0;
alignas(T) char storage[sizeof(T)] = {0};
bool has_value() const
{
return (hash & stored_value_mask) != 0;
}
bool is_tombstone() const
{
return (hash & tombstone_mask) != 0;
}
T * storage_ptr()
{
return reinterpret_cast<T *>(storage);
}
T & value()
{
return *storage_ptr();
}
template <typename H>
void set_value(H && value, std::size_t hash)
{
new (storage_ptr()) T{std::forward<H>(value)};
this->hash = (hash & hash_value_mask) | stored_value_mask;
}
bool hash_equal(std::size_t hash) const
{
return (hash & hash_value_mask) == (this->hash & hash_value_mask);
}
void set_tombstone()
{
this->hash = tombstone_mask;
}
void reset()
{
if (has_value())
value().~T();
hash = 0;
}
~hash_table_entry()
{
reset();
}
};
template <typename T>
@ -41,12 +94,12 @@ namespace psemek::util
T & operator *() const
{
return *(p_->value);
return p_->value();
}
T * operator ->() const
{
return std::addressof(*(p_->value));
return std::addressof(p_->value());
}
hash_table_iterator<T> & operator ++()
@ -73,13 +126,18 @@ namespace psemek::util
return {p_, end_};
}
entry_type * internal() const
{
return p_;
}
private:
entry_type * p_;
entry_type * end_;
void advance()
{
while (p_ != end_ && !(p_->value))
while (p_ != end_ && !p_->has_value())
++p_;
}
};
@ -159,10 +217,22 @@ namespace psemek::util
return find_impl(key, hash);
}
void erase(hash_table_entry<T> * entry)
{
entry->reset();
entry->set_tombstone();
--size_;
++tombstone_count_;
// Ensure at most 25% tombstones
if (4 * tombstone_count_ >= storage_.capacity)
rehash();
}
void clear()
{
for (auto & entry : storage_.entries())
entry.value.reset();
entry.reset();
size_ = 0;
}
@ -189,6 +259,7 @@ namespace psemek::util
private:
hash_table_storage<T> storage_;
std::size_t size_ = 0;
std::size_t tombstone_count_ = 0;
static std::size_t min_capacity_for_size(std::size_t size)
{
@ -219,17 +290,23 @@ namespace psemek::util
std::swap(storage_, storage);
size_ = 0;
tombstone_count_ = 0;
for (hash_table_entry<T> & entry : storage.entries())
{
if (entry.value)
if (entry.has_value())
{
insert_impl(std::move(*entry.value), entry.hash);
entry.value.reset();
insert_impl(std::move(entry.value()), entry.hash);
entry.reset();
}
}
}
void rehash()
{
reallocate(capacity());
}
std::size_t probe_index(std::size_t hash, std::size_t i) const
{
return (hash + (i * (i + 1)) / 2) % storage_.capacity;
@ -243,14 +320,13 @@ namespace psemek::util
{
std::size_t index = probe_index(hash, i);
auto & entry = storage_.table[index];
if (!entry.value)
if (!entry.has_value() || entry.is_tombstone())
{
entry.value.emplace(std::forward<H>(value));
entry.hash = hash;
entry.set_value(std::forward<H>(value), hash);
++size_;
return {storage_.iterator(index), true};
}
else if (entry.hash == hash && equal()(value, *entry.value))
else if (entry.hash_equal(hash) && equal()(value, entry.value()))
{
return {storage_.iterator(index), false};
}
@ -267,16 +343,18 @@ namespace psemek::util
{
std::size_t index = probe_index(hash, i);
auto & entry = storage_.table[index];
if (!entry.value)
if (!entry.is_tombstone())
{
return storage_.iterator(storage_.capacity);
if (!entry.has_value())
{
return end();
}
else if (entry.hash_equal(hash) && equal()(key, entry.value()))
{
return storage_.iterator(index);
}
}
else if (entry.hash == hash && equal()(key, *entry.value))
{
return storage_.iterator(index);
}
else
++i;
++i;
}
}
};
@ -408,6 +486,23 @@ namespace psemek::util
return find(key) != end();
}
bool erase(iterator const & it)
{
impl_.erase(it.internal());
return true;
}
template <typename Key>
bool erase(Key const & key)
{
if (auto it = find(key); it != end())
{
erase(it);
return true;
}
return false;
}
iterator begin() const
{
return impl_.begin().as_const();
@ -497,6 +592,23 @@ namespace psemek::util
return find(key) != end();
}
bool erase(iterator const & it)
{
impl_.erase(it.internal());
return true;
}
template <typename Key1>
bool erase(Key1 const & key)
{
if (auto it = find(key); it != end())
{
erase(it);
return true;
}
return false;
}
iterator begin() const
{
return impl_.begin();

View file

@ -6,10 +6,66 @@
#include <algorithm>
#include <unordered_set>
#include <unordered_map>
#include <memory>
using namespace psemek;
using namespace psemek::util;
namespace
{
struct lifetime_tracker
{
static std::size_t constructed_count;
static std::size_t move_constructed_count;
static std::size_t destroyed_count;
static std::size_t alive_count()
{
return constructed_count + move_constructed_count - destroyed_count;
}
int value;
lifetime_tracker(int value)
: value(value)
{
++constructed_count;
}
lifetime_tracker(lifetime_tracker && other)
: value(other.value)
{
++move_constructed_count;
}
lifetime_tracker(lifetime_tracker const &) = delete;
lifetime_tracker & operator = (lifetime_tracker &&) = delete;
lifetime_tracker & operator = (lifetime_tracker const &) = delete;
~lifetime_tracker()
{
++destroyed_count;
}
friend bool operator == (lifetime_tracker const &, lifetime_tracker const &) = default;
};
std::size_t lifetime_tracker::constructed_count = 0;
std::size_t lifetime_tracker::move_constructed_count = 0;
std::size_t lifetime_tracker::destroyed_count = 0;
struct lifetime_tracker_hash
{
std::size_t operator()(lifetime_tracker const & value) const noexcept
{
return value.value;
}
};
}
test_case(util_hash__set_empty)
{
hash_set<int> set;
@ -22,7 +78,7 @@ test_case(util_hash__set_empty)
expect_equal(call_count, 0);
}
test_case(util_hash__set_insert)
test_case(util_hash__set_insert_sequential)
{
hash_set<int> set;
@ -47,6 +103,209 @@ test_case(util_hash__set_insert)
}
}
test_case(util_hash__set_insert_random__small)
{
hash_set<int> set;
random::generator rng{0x8d6ed4c8749bda57ull, 0x580a939046371825ull};
std::uint32_t const max = 1024;
while (set.size() < max)
{
set.insert(rng() % max);
}
expect_equal(set.size(), max);
for (int i = 0; i < max; ++i)
{
expect(set.contains(i));
auto it = set.find(i);
expect(it != set.end());
expect_equal(*it, i);
}
for (int i = max; i < 2 * max; ++i)
{
expect(!set.contains(i));
expect(set.find(i) == set.end());
}
int const probe_count = 1024 * 16;
for (int i = 0; i < probe_count; ++i)
{
auto value = rng();
if (value < max) continue;
expect(!set.contains(value));
expect(set.find(value) == set.end());
}
}
test_case(util_hash__set_insert_random)
{
hash_set<int> set;
random::generator rng{0x3096a19223fed1cfull, 0xf690a99db056b624ull};
int const count = 1024 * 16;
std::vector<int> inserted;
while (inserted.size() < count)
{
int value = rng();
if (set.insert(value).second)
inserted.push_back(value);
}
expect_equal(set.size(), count);
std::vector<int> not_inserted;
while (not_inserted.size() < count)
{
int value = rng();
if (!set.contains(value))
not_inserted.push_back(value);
}
for (auto value : inserted)
{
expect(set.contains(value));
auto it = set.find(value);
expect(it != set.end());
expect_equal(*it, value);
}
for (auto value : not_inserted)
{
expect(!set.contains(value));
auto it = set.find(value);
expect(it == set.end());
}
}
test_case(util_hash__set_erase_sequential)
{
hash_set<int> set;
int const count = 1024 * 16;
for (int i = 0; i < count; ++i)
expect(set.insert(i * i).second);
expect_equal(set.size(), count);
for (int i = count; i < 2 * count; ++i)
expect(!set.erase(i * i));
for (int i = 0; i < count; ++i)
{
expect(set.erase(i * i));
expect(!set.contains(i * i));
expect(set.size() == count - i - 1);
}
expect(set.empty());
for (int i = 0; i < count; ++i)
expect(!set.erase(i * i));
}
test_case(util_hash__set_erase_random)
{
hash_set<int> set;
random::generator rng{0xff60de1081bc862aull, 0xe0a81aad7a42f1b0ull};
int const count = 1024 * 16;
std::vector<int> inserted;
while (inserted.size() < count)
{
int value = rng();
if (set.insert(value).second)
inserted.push_back(value);
}
expect_equal(set.size(), count);
std::vector<int> not_inserted;
while (not_inserted.size() < count)
{
int value = rng();
if (!set.contains(value))
not_inserted.push_back(value);
}
for (auto value : not_inserted)
{
expect(!set.erase(value));
expect_equal(set.size(), count);
}
for (int i = 0; i < count; ++i)
{
expect(set.erase(inserted[i]));
expect(!set.contains(inserted[i]));
expect_equal(set.size(), count - i - 1);
}
}
test_case(util_hash__set_insert__erase_sequential)
{
hash_set<int> set;
int const count = 1024 * 16;
for (int i = 0; i < count; ++i)
expect(set.insert(i * i).second);
expect_equal(set.size(), count);
for (int i = 0; i < count / 2; ++i)
{
expect(set.erase(i * i));
expect(set.find(i * i) == set.end());
expect_equal(set.size(), count - i - 1);
}
expect_equal(set.size(), count / 2);
for (int i = 0; i < count / 2; ++i)
{
expect(!set.contains(i * i));
expect(!set.erase(i * i));
expect(set.find(i * i) == set.end());
}
for (int i = count / 2; i < count; ++i)
{
expect(set.contains(i * i));
auto it = set.find(i * i);
expect(it != set.end());
expect_equal(*it, i * i);
}
for (int i = count; i < 2 * count; ++i)
{
expect(set.find(i * i) == set.end());
expect(set.insert(i * i).second);
expect_equal(set.size(), count / 2 + (i - count) + 1);
}
for (int i = count / 2; i < count; ++i)
{
expect(set.erase(i * i));
expect(!set.contains(i * i));
expect(set.size() == 2 * count - i - 1);
}
expect_equal(set.size(), count);
}
test_case(util_hash__set_clear)
{
hash_set<int> set;
@ -116,6 +375,49 @@ test_case(util_hash__set_move)
}
}
test_case(util_hash__set_movable)
{
hash_set<std::unique_ptr<int>> set;
int const count = 1024 * 16;
for (int i = 0; i < count; ++i)
{
expect(set.insert(std::make_unique<int>(i)).second);
expect_equal(set.size(), i + 1);
}
expect_equal(set.size(), count);
}
test_case(util_hash__set_lifetime)
{
hash_set<lifetime_tracker, lifetime_tracker_hash> set;
int const count = 1024 * 16;
for (int i = 0; i < count; ++i)
{
expect(set.insert(lifetime_tracker(i)).second);
expect_equal(set.size(), i + 1);
expect_equal(lifetime_tracker::alive_count(), i + 1);
}
for (int i = 0; i < count; ++i)
{
expect(set.contains(lifetime_tracker(i)));
auto it = set.find(lifetime_tracker(i));
expect(it != set.end());
expect(*it == lifetime_tracker(i));
}
for (int i = 0; i < count; ++i)
{
expect(set.erase(lifetime_tracker(i)));
expect(!set.contains(lifetime_tracker(i)));
expect(set.find(lifetime_tracker(i)) == set.end());
expect_equal(lifetime_tracker::alive_count(), count - i - 1);
}
}
test_case(util_hash__set_benchmark)
{
random::generator rng;