psemek/libs/prof/source/profiler.cpp

243 lines
5.7 KiB
C++

#include <psemek/prof/profiler.hpp>
#include <psemek/util/pretty_print.hpp>
#include <psemek/util/statistics.hpp>
#include <psemek/log/log.hpp>
#include <map>
#include <unordered_map>
#include <iomanip>
#include <thread>
#include <mutex>
#include <vector>
namespace psemek::prof
{
namespace
{
struct profiler_tree
{
profiler_tree * parent = nullptr;
std::mutex * mutex = nullptr;
util::statistics_log_bucket<double> execution_time{0.05};
std::map<std::string, profiler_tree, std::less<>> children;
std::vector<std::map<std::string, profiler_tree>::iterator> children_list;
profiler_tree * child(std::string_view const & name)
{
auto it = children.find(name);
if (it == children.end())
{
it = children.insert({std::string(name), profiler_tree{}}).first;
it->second.parent = this;
it->second.mutex = mutex;
children_list.push_back(it);
}
return &(it->second);
}
};
static std::map<std::thread::id, profiler_tree> roots;
static std::mutex roots_mutex;
static std::unordered_map<std::thread::id, std::mutex> thread_mutex;
static thread_local profiler_tree * current = nullptr;
void get_current()
{
if (!current)
{
auto const id = std::this_thread::get_id();
std::lock_guard lock{roots_mutex};
current = &roots[id];
current->mutex = &thread_mutex[id];
}
}
struct pretty_impl
{
double value;
};
pretty_impl pretty(double value)
{
return {value};
}
std::ostream & operator << (std::ostream & os, pretty_impl p)
{
char const * suf[] =
{
"ns",
"us",
"ms",
"s",
};
double s = 1e-9;
for (std::size_t i = 0; i < 4; ++i)
{
if (p.value < (s * 1000.0) || i == 3)
{
os << (p.value / s) << ' ' << suf[i];
return os;
}
s *= 1000.0;
}
return os;
}
void dump_impl(profiler_tree const & node, std::size_t depth, log::level level)
{
std::size_t max_name_length = 0;
for (auto it : node.children_list)
max_name_length = std::max(max_name_length, it->first.size());
for (auto it : node.children_list)
{
auto const & c = *it;
auto const & stat = c.second.execution_time;
if (stat.count() == 1)
log::log(level) << std::string(depth * 2, ' ') << std::setw(max_name_length + 3) << std::left << std::setfill(' ') << c.first
<< " " << pretty(stat.mean())
<< " (" << stat.count() << " call)";
else if (node.execution_time.count() > 0)
log::log(level) << std::string(depth * 2, ' ') << std::setw(max_name_length + 3) << std::left << std::setfill(' ') << c.first
<< " avg " << pretty(stat.mean())
<< " 25% " << pretty(stat.percentile(0.25))
<< " 50% " << pretty(stat.percentile(0.50))
<< " 75% " << pretty(stat.percentile(0.75))
<< " max " << pretty(stat.max())
<< " (" << stat.count() << " calls, " << std::setprecision(3) << (stat.count() * 1.f / node.execution_time.count() * 1.f) << "x, "
<< std::setprecision(2) << (100.f * stat.count() * stat.mean() / node.execution_time.mean() / node.execution_time.count()) << "%)";
else
log::log(level) << std::string(depth * 2, ' ') << std::setw(max_name_length + 3) << std::left << std::setfill(' ') << c.first
<< " avg " << pretty(stat.mean())
<< " 25% " << pretty(stat.percentile(0.25))
<< " 50% " << pretty(stat.percentile(0.50))
<< " 75% " << pretty(stat.percentile(0.75))
<< " max " << pretty(stat.max())
<< " (" << stat.count() << " calls)";
dump_impl(c.second, depth + 1, level);
}
}
profiler_tree merge(profiler_tree t1, profiler_tree const & t2)
{
t1.execution_time = merge(t1.execution_time, t2.execution_time);
for (auto it2 : t2.children_list)
{
auto it1 = t1.children.find(it2->first);
if (it1 == t1.children.end())
{
it1 = t1.children.insert({it2->first, profiler_tree{}}).first;
t1.children_list.push_back(it1);
}
it1->second = merge(std::move(it1->second), it2->second);
}
return t1;
}
profiler_tree merged_tree()
{
std::vector<profiler_tree const *> r;
{
std::lock_guard lock{roots_mutex};
for (auto const & p : roots)
r.push_back(&p.second);
}
profiler_tree result;
for (auto root : r)
{
std::lock_guard lock{*(root->mutex)};
result = merge(std::move(result), *root);
}
return result;
}
}
void dump(log::level level)
{
dump_impl(merged_tree(), 0, level);
}
profiler::profiler(std::string_view name)
{
get_current();
std::lock_guard lock{*(current->mutex)};
current = current->child(name);
clock_.restart();
}
profiler::profiler(profiler && other)
: clock_(other.clock_)
, moved_out_(other.moved_out_)
{
other.moved_out_ = true;
}
profiler & profiler::operator = (profiler && other)
{
report();
clock_ = other.clock_;
moved_out_ = other.moved_out_;
other.moved_out_ = true;
return *this;
}
profiler::~profiler()
{
report();
}
void profiler::start_frame(std::string_view name)
{
get_current();
std::lock_guard lock{*(current->mutex)};
current = current->child(name);
}
void profiler::push(std::string_view name, std::chrono::duration<double> duration, std::size_t count)
{
get_current();
std::lock_guard lock{*(current->mutex)};
current->child(name)->execution_time.push(duration.count(), count);
}
void profiler::end_frame(std::chrono::duration<double> duration)
{
std::lock_guard lock{*(current->mutex)};
current->execution_time.push(duration.count());
current = current->parent;
}
void profiler::report()
{
if (moved_out_)
return;
auto count = clock_.count();
std::lock_guard lock{*(current->mutex)};
current->execution_time.push(count);
current = current->parent;
}
}