Add util::hash_set/map with some tests
This commit is contained in:
parent
d3fa7fdfea
commit
a27378a3a7
2 changed files with 585 additions and 0 deletions
401
libs/util/include/psemek/util/hash_table.hpp
Normal file
401
libs/util/include/psemek/util/hash_table.hpp
Normal file
|
|
@ -0,0 +1,401 @@
|
|||
#pragma once
|
||||
|
||||
#include <psemek/util/hash.hpp>
|
||||
#include <psemek/util/span.hpp>
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
namespace psemek::util
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct hash_table_entry
|
||||
{
|
||||
std::size_t hash;
|
||||
std::optional<T> value;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct hash_table_iterator
|
||||
{
|
||||
using value_type = T;
|
||||
using pointer = T *;
|
||||
using reference = T &;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
|
||||
using entry_type = hash_table_entry<std::remove_const_t<T>>;
|
||||
|
||||
hash_table_iterator(entry_type * p, entry_type * end)
|
||||
: p_(p)
|
||||
, end_(end)
|
||||
{
|
||||
advance();
|
||||
}
|
||||
|
||||
T & operator *() const
|
||||
{
|
||||
return *(p_->value);
|
||||
}
|
||||
|
||||
T * operator ->() const
|
||||
{
|
||||
return std::addressof(*(p_->value));
|
||||
}
|
||||
|
||||
hash_table_iterator<T> & operator ++()
|
||||
{
|
||||
++p_;
|
||||
advance();
|
||||
return *this;
|
||||
}
|
||||
|
||||
hash_table_iterator<T> operator ++(int)
|
||||
{
|
||||
auto copy = *this;
|
||||
this->operator++();
|
||||
return copy;
|
||||
}
|
||||
|
||||
friend bool operator == (hash_table_iterator<T> const & it1, hash_table_iterator<T> const & it2)
|
||||
{
|
||||
return it1.p_ == it2.p_;
|
||||
}
|
||||
|
||||
hash_table_iterator<T const> as_const() const
|
||||
{
|
||||
return {p_, end_};
|
||||
}
|
||||
|
||||
private:
|
||||
entry_type * p_;
|
||||
entry_type * end_;
|
||||
|
||||
void advance()
|
||||
{
|
||||
while (!(p_->value) && p_ != end_)
|
||||
++p_;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct hash_table_storage
|
||||
{
|
||||
std::unique_ptr<hash_table_entry<T>[]> table;
|
||||
std::size_t capacity = 0;
|
||||
|
||||
void reset(std::size_t capacity)
|
||||
{
|
||||
table.reset(new hash_table_entry<T>[capacity]);
|
||||
this->capacity = capacity;
|
||||
}
|
||||
|
||||
util::span<hash_table_entry<T>> entries()
|
||||
{
|
||||
return {table.get(), table.get() + capacity};
|
||||
}
|
||||
|
||||
hash_table_iterator<T> iterator(std::size_t index) const
|
||||
{
|
||||
return {table.get() + index, table.get() + capacity};
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Hash, typename Equal>
|
||||
struct hash_table_impl
|
||||
: Hash, Equal
|
||||
{
|
||||
template <typename H, typename K>
|
||||
hash_table_impl(H && h, K && k)
|
||||
: Hash(std::forward<H>(h))
|
||||
, Equal(std::forward<K>(k))
|
||||
{}
|
||||
|
||||
Hash const & hash() const { return *this; }
|
||||
Equal const & equal() const { return *this; }
|
||||
|
||||
template <typename H>
|
||||
std::pair<hash_table_iterator<T>, bool> insert(H && value)
|
||||
{
|
||||
ensure_capacity_for(size_ + 1);
|
||||
std::size_t hash = this->hash()(value);
|
||||
return insert_impl(std::forward<H>(value), hash);
|
||||
}
|
||||
|
||||
template <typename Key>
|
||||
hash_table_iterator<T> find(Key const & key) const
|
||||
{
|
||||
std::size_t hash = this->hash()(key);
|
||||
return find_impl(key, hash);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
for (auto & entry : storage_.entries())
|
||||
entry.value.reset();
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
hash_table_iterator<T> begin() const
|
||||
{
|
||||
return storage_.iterator(0);
|
||||
}
|
||||
|
||||
hash_table_iterator<T> end() const
|
||||
{
|
||||
return storage_.iterator(storage_.capacity);
|
||||
}
|
||||
|
||||
std::size_t size() const
|
||||
{
|
||||
return size_;
|
||||
}
|
||||
|
||||
std::size_t capacity() const
|
||||
{
|
||||
return storage_.capacity;
|
||||
}
|
||||
|
||||
private:
|
||||
hash_table_storage<T> storage_;
|
||||
std::size_t size_ = 0;
|
||||
|
||||
static std::size_t min_capacity_for_size(std::size_t size)
|
||||
{
|
||||
// Ensure at most 0.5 load factor
|
||||
return 2 * size;
|
||||
}
|
||||
|
||||
static std::size_t find_capacity(std::size_t current_capacity, std::size_t min_capacity)
|
||||
{
|
||||
current_capacity = std::max(current_capacity, std::size_t(16));
|
||||
while (current_capacity < min_capacity)
|
||||
current_capacity *= 2;
|
||||
return current_capacity;
|
||||
}
|
||||
|
||||
void ensure_capacity_for(std::size_t size)
|
||||
{
|
||||
std::size_t capacity = min_capacity_for_size(size);
|
||||
if (storage_.capacity < capacity)
|
||||
reallocate(find_capacity(storage_.capacity, capacity));
|
||||
}
|
||||
|
||||
void reallocate(std::size_t capacity)
|
||||
{
|
||||
hash_table_storage<T> storage;
|
||||
storage.reset(capacity);
|
||||
|
||||
std::swap(storage_, storage);
|
||||
|
||||
size_ = 0;
|
||||
|
||||
for (hash_table_entry<T> & entry : storage.entries())
|
||||
{
|
||||
if (entry.value)
|
||||
{
|
||||
insert_impl(std::move(*entry.value), entry.hash);
|
||||
entry.value.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t probe_index(std::size_t hash, std::size_t i) const
|
||||
{
|
||||
return (hash + (i * (i + 1)) / 2) % storage_.capacity;
|
||||
}
|
||||
|
||||
template <typename H>
|
||||
std::pair<hash_table_iterator<T>, bool> insert_impl(H && value, std::size_t hash)
|
||||
{
|
||||
std::size_t i = 0;
|
||||
while (true)
|
||||
{
|
||||
std::size_t index = probe_index(hash, i);
|
||||
auto & entry = storage_.table[index];
|
||||
if (!entry.value)
|
||||
{
|
||||
entry.value.emplace(std::forward<H>(value));
|
||||
entry.hash = hash;
|
||||
++size_;
|
||||
return {storage_.iterator(index), true};
|
||||
}
|
||||
else if (entry.hash == hash && equal()(value, *entry.value))
|
||||
{
|
||||
return {storage_.iterator(index), false};
|
||||
}
|
||||
else
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Key>
|
||||
hash_table_iterator<T> find_impl(Key const & key, std::size_t hash) const
|
||||
{
|
||||
std::size_t i = 0;
|
||||
while (true)
|
||||
{
|
||||
std::size_t index = probe_index(hash, i);
|
||||
auto & entry = storage_.table[index];
|
||||
if (!entry.value)
|
||||
{
|
||||
return storage_.iterator(storage_.capacity);
|
||||
}
|
||||
else if (entry.hash == hash && equal()(key, *entry.value))
|
||||
{
|
||||
return storage_.iterator(index);
|
||||
}
|
||||
else
|
||||
++i;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Key, typename Value, typename Hash>
|
||||
struct pair_hash
|
||||
: Hash
|
||||
{
|
||||
pair_hash(Hash const & hash)
|
||||
: Hash(hash)
|
||||
{}
|
||||
|
||||
std::size_t operator()(Key const & key) const
|
||||
{
|
||||
return static_cast<Hash const &>(*this)(key);
|
||||
}
|
||||
|
||||
std::size_t operator()(std::pair<Key const, Value> const & pair) const
|
||||
{
|
||||
return static_cast<Hash const &>(*this)(pair.first);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Key, typename Value, typename Equal>
|
||||
struct pair_equal
|
||||
: Equal
|
||||
{
|
||||
pair_equal(Equal const & equal)
|
||||
: Equal(equal)
|
||||
{}
|
||||
|
||||
bool operator()(Key const & key1, std::pair<Key const, Value> const & pair2) const
|
||||
{
|
||||
return static_cast<Equal const &>(*this)(key1, pair2.first);
|
||||
}
|
||||
|
||||
bool operator()(std::pair<Key const, Value> const & pair1, Key const & key2) const
|
||||
{
|
||||
return static_cast<Equal const &>(*this)(pair1.first, key2);
|
||||
}
|
||||
|
||||
bool operator()(std::pair<Key const, Value> const & pair1, std::pair<Key const, Value> const & pair2) const
|
||||
{
|
||||
return static_cast<Equal const &>(*this)(pair1.first, pair2.first);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template <typename T, typename Hash = std::hash<T>, typename Equal = std::equal_to<T>>
|
||||
struct hash_set
|
||||
{
|
||||
using iterator = detail::hash_table_iterator<T const>;
|
||||
|
||||
hash_set(Hash const & hash = {}, Equal const & equal = {})
|
||||
: impl_(hash, equal)
|
||||
{}
|
||||
|
||||
std::pair<iterator, bool> insert(T const & value)
|
||||
{
|
||||
auto result = impl_.insert(value);
|
||||
return {result.first.as_const(), result.second};
|
||||
}
|
||||
|
||||
std::pair<iterator, bool> insert(T && value)
|
||||
{
|
||||
auto result = impl_.insert(std::move(value));
|
||||
return {result.first.as_const(), result.second};
|
||||
}
|
||||
|
||||
iterator find(T const & value) const
|
||||
{
|
||||
return impl_.find(value).as_const();
|
||||
}
|
||||
|
||||
iterator begin() const
|
||||
{
|
||||
return impl_.begin().as_const();
|
||||
}
|
||||
|
||||
iterator end() const
|
||||
{
|
||||
return impl_.end().as_const();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
impl_.clear();
|
||||
}
|
||||
|
||||
std::size_t size() const
|
||||
{
|
||||
return impl_.size();
|
||||
}
|
||||
|
||||
private:
|
||||
detail::hash_table_impl<T, Hash, Equal> impl_;
|
||||
};
|
||||
|
||||
template <typename Key, typename Value, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>>
|
||||
struct hash_map
|
||||
{
|
||||
using iterator = detail::hash_table_iterator<std::pair<Key const, Value>>;
|
||||
|
||||
hash_map(Hash const & hash = {}, KeyEqual const & equal = {})
|
||||
: impl_(hash, equal)
|
||||
{}
|
||||
|
||||
std::pair<iterator, bool> insert(std::pair<Key, Value> const & value)
|
||||
{
|
||||
return impl_.insert(value);
|
||||
}
|
||||
|
||||
std::pair<iterator, bool> insert(std::pair<Key, Value> && value)
|
||||
{
|
||||
return impl_.insert(std::move(value));
|
||||
}
|
||||
|
||||
iterator find(Key const & key) const
|
||||
{
|
||||
return impl_.find(key);
|
||||
}
|
||||
|
||||
iterator begin() const
|
||||
{
|
||||
return impl_.begin();
|
||||
}
|
||||
|
||||
iterator end() const
|
||||
{
|
||||
return impl_.end();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
impl_.clear();
|
||||
}
|
||||
|
||||
std::size_t size() const
|
||||
{
|
||||
return impl_.size();
|
||||
}
|
||||
|
||||
private:
|
||||
detail::hash_table_impl<std::pair<Key const, Value>, detail::pair_hash<Key, Value, Hash>, detail::pair_equal<Key, Value, KeyEqual>> impl_;
|
||||
};
|
||||
|
||||
}
|
||||
184
libs/util/tests/hash_table.cpp
Normal file
184
libs/util/tests/hash_table.cpp
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
#include <psemek/test/test.hpp>
|
||||
|
||||
#include <psemek/util/hash_table.hpp>
|
||||
#include <psemek/random/generator.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
|
||||
using namespace psemek;
|
||||
using namespace psemek::util;
|
||||
|
||||
test_case(util_hash__set_benchmark)
|
||||
{
|
||||
random::generator rng;
|
||||
std::vector<int> values;
|
||||
int const count = 1024 * 1024;
|
||||
for (int i = 0; i < count; ++i)
|
||||
values.push_back(i);
|
||||
std::shuffle(values.begin(), values.end(), rng);
|
||||
|
||||
test_profile(hash_set_total)
|
||||
{
|
||||
hash_set<int> set;
|
||||
|
||||
test_profile(hash_set_insert)
|
||||
{
|
||||
for (auto value : values)
|
||||
set.insert(value);
|
||||
expect_equal(set.size(), count);
|
||||
}
|
||||
|
||||
test_profile(hash_set_iterate)
|
||||
{
|
||||
int size = 0;
|
||||
for (auto value : set)
|
||||
{
|
||||
expect(0 <= value && value < count);
|
||||
++size;
|
||||
}
|
||||
expect_equal(size, count);
|
||||
}
|
||||
|
||||
test_profile(hash_set_find)
|
||||
{
|
||||
for (auto value : values)
|
||||
{
|
||||
auto it = set.find(value);
|
||||
expect(it != set.end());
|
||||
expect_equal(*it, value);
|
||||
}
|
||||
}
|
||||
|
||||
test_profile(hash_set_clear)
|
||||
{
|
||||
set.clear();
|
||||
}
|
||||
}
|
||||
|
||||
test_profile(unordered_set_total)
|
||||
{
|
||||
std::unordered_set<int> set;
|
||||
|
||||
test_profile(unordered_set_insert)
|
||||
{
|
||||
for (auto value : values)
|
||||
set.insert(value);
|
||||
expect_equal(set.size(), count);
|
||||
}
|
||||
|
||||
test_profile(unordered_set_iterate)
|
||||
{
|
||||
int size = 0;
|
||||
for (auto value : set)
|
||||
{
|
||||
expect(0 <= value && value < count);
|
||||
++size;
|
||||
}
|
||||
expect_equal(size, count);
|
||||
}
|
||||
|
||||
test_profile(unordered_set_find)
|
||||
{
|
||||
for (auto value : values)
|
||||
{
|
||||
auto it = set.find(value);
|
||||
expect(it != set.end());
|
||||
expect_equal(*it, value);
|
||||
}
|
||||
}
|
||||
|
||||
test_profile(unordered_set_clear)
|
||||
{
|
||||
set.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_case(util_hash__map_benchmark)
|
||||
{
|
||||
random::generator rng;
|
||||
std::vector<int> keys;
|
||||
int const count = 1024 * 1024;
|
||||
for (int i = 0; i < count; ++i)
|
||||
keys.push_back(i);
|
||||
std::shuffle(keys.begin(), keys.end(), rng);
|
||||
|
||||
test_profile(hash_map_total)
|
||||
{
|
||||
hash_map<int, int> map;
|
||||
|
||||
test_profile(hash_map_insert)
|
||||
{
|
||||
for (auto key : keys)
|
||||
map.insert({key, -key});
|
||||
expect_equal(map.size(), count);
|
||||
}
|
||||
|
||||
test_profile(hash_map_iterate)
|
||||
{
|
||||
int size = 0;
|
||||
for (auto const & pair : map)
|
||||
{
|
||||
expect(0 <= pair.first && pair.first < count);
|
||||
expect_equal(pair.second, -pair.first);
|
||||
++size;
|
||||
}
|
||||
expect_equal(size, count);
|
||||
}
|
||||
|
||||
test_profile(hash_map_find)
|
||||
{
|
||||
for (auto key : keys)
|
||||
{
|
||||
auto it = map.find(key);
|
||||
expect(map.find(key) != map.end());
|
||||
expect_equal(it->second, -key);
|
||||
}
|
||||
}
|
||||
|
||||
test_profile(hash_map_clear)
|
||||
{
|
||||
map.clear();
|
||||
}
|
||||
}
|
||||
|
||||
test_profile(unordered_map_total)
|
||||
{
|
||||
std::unordered_map<int, int> map;
|
||||
|
||||
test_profile(unordered_map_insert)
|
||||
{
|
||||
for (auto key : keys)
|
||||
map.insert({key, -key});
|
||||
}
|
||||
|
||||
test_profile(unordered_map_iterate)
|
||||
{
|
||||
int size = 0;
|
||||
for (auto const & pair : map)
|
||||
{
|
||||
expect(0 <= pair.first && pair.first < count);
|
||||
expect_equal(pair.second, -pair.first);
|
||||
++size;
|
||||
}
|
||||
expect_equal(size, count);
|
||||
}
|
||||
|
||||
test_profile(unordered_map_find)
|
||||
{
|
||||
for (auto key : keys)
|
||||
{
|
||||
auto it = map.find(key);
|
||||
expect(map.find(key) != map.end());
|
||||
expect_equal(it->second, -key);
|
||||
}
|
||||
}
|
||||
|
||||
test_profile(unordered_map_clear)
|
||||
{
|
||||
map.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue