Add ml::neural_net_learner implementation with backpropagation, gradient descent, and tests

This commit is contained in:
Nikita Lisitsa 2022-01-21 12:12:38 +03:00
parent def0615baa
commit 8630525dcf
5 changed files with 384 additions and 0 deletions

View file

@ -4,3 +4,5 @@ file(GLOB_RECURSE PSEMEK_ML_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "sour
psemek_add_library(psemek-ml ${PSEMEK_ML_HEADERS} ${PSEMEK_ML_SOURCES})
target_include_directories(psemek-ml PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
target_link_libraries(psemek-ml PUBLIC psemek-util psemek-geom psemek-random)
psemek_glob_tests(psemek-ml tests)

View file

@ -0,0 +1,163 @@
#pragma once
#include <psemek/ml/neural_net/neural_net.hpp>
#include <psemek/ml/neural_net/evaluator.hpp>
#include <psemek/geom/math.hpp>
namespace psemek::ml
{
// A helper class to facilitate allocation-free multiple
// evaluation of a neural-net followed by backpropagation
template <typename T>
struct neural_net_learner
{
std::vector<T> const & apply(neural_net<T> const & nn, std::vector<T> input) const;
std::vector<T> const & result() const { return layers_.back(); }
// Compute the gradient of a loss function (defined as 1/2 of L^2 norm
// of the difference between neural net output and desired output)
// wrt neural net weights and accumulate them to the already computed
// gradient
void backpropagate(neural_net<T> const & nn, std::vector<T> const & output);
util::span<T const> gradient() const { return gradient_; }
util::span<T> gradient() { return gradient_; }
T gradient_norm() const;
// Perform a single step of gradient descent in the direction
// of the computed gradient, multiplied by factor
// N.B.: this does **not** clear out the gradient
void descend(neural_net<T> & nn, T factor) const;
// Reset the accumulated gradient to zero
void clear();
private:
mutable std::vector<std::vector<T>> layers_;
std::vector<T> gradient_;
std::vector<T> error_, error_tmp_;
};
extern template struct neural_net_learner<float>;
extern template struct neural_net_learner<double>;
template <typename T>
std::vector<T> const & neural_net_learner<T>::apply(neural_net<T> const & nn, std::vector<T> input) const
{
if (nn.empty())
throw empty_neural_net_error{};
auto layer_sizes = nn.layer_sizes();
auto weights = nn.weights().begin();
layers_.resize(layer_sizes.size());
if (layer_sizes[0] != input.size())
throw wrong_neural_net_input_size{layer_sizes[0], input.size()};
layers_[0] = std::move(input);
for (std::size_t l = 0; l + 1 < layer_sizes.size(); ++l)
{
auto & layer = layers_[l + 1];
layer.resize(layer_sizes[l + 1]);
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
{
layer[i] = *weights++;
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
layer[i] += (*weights++) * layers_[l][j];
layer[i] = activation(layer[i], nn.activation_types()[l]);
}
}
return layers_.back();
}
template <typename T>
void neural_net_learner<T>::backpropagate(neural_net<T> const & nn, std::vector<T> const & output)
{
if (nn.empty())
throw empty_neural_net_error{};
auto const layer_sizes = nn.layer_sizes();
auto const activation_types = nn.activation_types();
auto const weights = nn.weights();
if (output.size() != layer_sizes.back())
throw wrong_neural_net_output_size(layer_sizes.back(), output.size());
gradient_.resize(nn.weights().size());
std::size_t offset = gradient_.size();
for (std::size_t l = layer_sizes.size() - 1; l --> 0;)
{
if (l + 2 == layer_sizes.size())
{
error_.resize(output.size());
for (std::size_t i = 0; i < output.size(); ++i)
{
T const value = layers_.back()[i];
error_[i] = (value - output[i]) * activation_derivative(value, activation_types.back());
}
}
else
{
error_tmp_.assign(layer_sizes[l + 1], 0.f);
for (std::size_t i = 0; i < layer_sizes[l + 2]; ++i)
{
std::size_t row_offset = offset + i * (layer_sizes[l + 1] + 1);
for (std::size_t j = 0; j < layer_sizes[l + 1]; ++j)
error_tmp_[j] += weights[row_offset + j + 1] * error_[i];
}
for (std::size_t i = 0; i < error_tmp_.size(); ++i)
error_tmp_[i] *= activation_derivative(layers_[l + 1][i], activation_types[l]);
error_ = std::move(error_tmp_);
}
offset -= (layer_sizes[l] + 1) * layer_sizes[l + 1];
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
{
std::size_t row_offset = offset + i * (layer_sizes[l] + 1);
gradient_[row_offset] += error_[i];
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
gradient_[row_offset + j + 1] += error_[i] * layers_[l][j];
}
}
}
template <typename T>
T neural_net_learner<T>::gradient_norm() const
{
T value = T{0};
for (auto g : gradient_)
value += geom::sqr(g);
return std::sqrt(value);
}
template <typename T>
void neural_net_learner<T>::descend(neural_net<T> & nn, T factor) const
{
auto gradient = gradient_.data();
for (auto & w : nn.weights())
w -= factor * (*gradient++);
}
template <typename T>
void neural_net_learner<T>::clear()
{
std::fill(gradient_.begin(), gradient_.end(), T{0});
}
}

View file

@ -0,0 +1,9 @@
#include <psemek/ml/neural_net/learner.hpp>
namespace psemek::ml
{
template struct neural_net_learner<float>;
template struct neural_net_learner<double>;
}

View file

@ -0,0 +1,60 @@
#include <psemek/test/test.hpp>
#include <psemek/ml/neural_net/learner.hpp>
#include <psemek/ml/neural_net/evaluator.hpp>
#include <psemek/ml/neural_net/randomize.hpp>
#include <psemek/ml/neural_net/loss.hpp>
#include <psemek/random/generator.hpp>
#include <psemek/random/uniform.hpp>
#include <psemek/geom/math.hpp>
using namespace psemek::ml;
using namespace psemek::random;
using namespace psemek::geom;
test_case(ml_neural__net_gradient)
{
generator rng;
for (std::size_t iteration = 0; iteration < 64; ++iteration)
{
std::vector<std::size_t> sizes;
sizes.resize(uniform<std::size_t>(rng, 2, 5));
for (auto & s : sizes)
s = uniform<std::size_t>(rng, 1, 50);
std::vector<activation_type> activations(sizes.size() - 1);
for (auto & a : activations)
a = static_cast<activation_type>(uniform<std::size_t>(rng, 0, static_cast<std::size_t>(activation_type::count) - 1));
neural_net<double> nn(std::move(sizes), std::move(activations));
randomize_normal(nn, rng);
std::vector<double> input(nn.layer_sizes().front());
for (auto & x : input)
x = uniform<double>(rng);
std::vector<double> output(nn.layer_sizes().back());
for (auto & x : output)
x = uniform<double>(rng);
neural_net_learner<double> learner;
learner.apply(nn, input);
learner.backpropagate(nn, output);
double const eps = 1e-6;
neural_net_evaluator<double> evaluator;
for (std::size_t i = 0; i < nn.weights().size(); ++i)
{
double old = nn.weights()[i];
nn.weights()[i] -= eps;
double v0 = l2_loss(evaluator.apply(nn, input), output);
nn.weights()[i] += 2.0 * eps;
double v1 = l2_loss(evaluator.apply(nn, input), output);
nn.weights()[i] = old;
double numeric_gradient = (v1 - v0) / 2.0 / eps;
expect_close(numeric_gradient, learner.gradient()[i], 1e-6);
}
}
}

View file

@ -0,0 +1,150 @@
#include <psemek/test/test.hpp>
#include <psemek/ml/neural_net/learner.hpp>
#include <psemek/ml/neural_net/evaluator.hpp>
#include <psemek/ml/neural_net/randomize.hpp>
#include <psemek/ml/neural_net/loss.hpp>
#include <psemek/random/generator.hpp>
#include <psemek/random/uniform.hpp>
#include <psemek/geom/math.hpp>
#include <psemek/log/log.hpp>
using namespace psemek::ml;
using namespace psemek::random;
using namespace psemek::geom;
using namespace psemek::log;
namespace
{
double learn_batch(neural_net<double> & nn, std::vector<std::pair<std::vector<double>, std::vector<double>>> const & batch, double mu, std::size_t iterations)
{
neural_net_learner<double> learner;
std::size_t const debug_report_frequency = iterations / 8;
double error = 0.0;
for (std::size_t iteration = 0; iteration < iterations; ++iteration)
{
learner.clear();
error = 0.0;
for (auto const & data : batch)
{
learner.apply(nn, data.first);
error += l2_loss(data.second, learner.result()) / batch.size();
learner.backpropagate(nn, data.second);
}
if ((iteration % debug_report_frequency) == 0)
debug() << "Iteration " << iteration << " error: " << error;
learner.descend(nn, mu);
}
return error;
}
}
test_case(ml_neural__net_learn_not__2__layers)
{
generator rng;
neural_net<double> nn({1, 1}, activation_type::sigmoid);
randomize_normal(nn, rng);
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
dataset.push_back({{0.0}, {1.0}});
dataset.push_back({{1.0}, {0.0}});
double error = learn_batch(nn, dataset, 16.0, 1024);
expect_less(error, 1e-3);
info() << "Error: " << error;
}
test_case(ml_neural__net_learn_and__2__layers)
{
generator rng;
neural_net<double> nn({2, 1}, activation_type::sigmoid);
randomize_normal(nn, rng);
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
dataset.push_back({{0.0, 0.0}, {0.0}});
dataset.push_back({{1.0, 0.0}, {0.0}});
dataset.push_back({{0.0, 1.0}, {0.0}});
dataset.push_back({{1.0, 1.0}, {1.0}});
double error = learn_batch(nn, dataset, 16.0, 1024);
expect_less(error, 1e-3);
info() << "Error: " << error;
}
test_case(ml_neural__net_learn_and__3__layers)
{
generator rng;
neural_net<double> nn({2, 2, 1}, activation_type::sigmoid);
randomize_normal(nn, rng);
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
dataset.push_back({{0.0, 0.0}, {0.0}});
dataset.push_back({{1.0, 0.0}, {0.0}});
dataset.push_back({{0.0, 1.0}, {0.0}});
dataset.push_back({{1.0, 1.0}, {1.0}});
double error = learn_batch(nn, dataset, 1.0, 1024);
expect_less(error, 1e-3);
info() << "Error: " << error;
}
test_case(ml_neural__net_learn_or__2__layers)
{
generator rng;
neural_net<double> nn({2, 1}, activation_type::sigmoid);
randomize_normal(nn, rng);
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
dataset.push_back({{0.0, 0.0}, {0.0}});
dataset.push_back({{1.0, 0.0}, {1.0}});
dataset.push_back({{0.0, 1.0}, {1.0}});
dataset.push_back({{1.0, 1.0}, {1.0}});
double error = learn_batch(nn, dataset, 16.0, 1024);
expect_less(error, 1e-3);
info() << "Error: " << error;
}
test_case(ml_neural__net_learn_or__3__layers)
{
generator rng;
neural_net<double> nn({2, 2, 1}, activation_type::sigmoid);
randomize_normal(nn, rng);
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
dataset.push_back({{0.0, 0.0}, {0.0}});
dataset.push_back({{1.0, 0.0}, {1.0}});
dataset.push_back({{0.0, 1.0}, {1.0}});
dataset.push_back({{1.0, 1.0}, {1.0}});
double error = learn_batch(nn, dataset, 1.0, 1024);
expect_less(error, 1e-3);
info() << "Error: " << error;
}
test_case(ml_neural__net_learn_xor__3__layers)
{
generator rng;
neural_net<double> nn({2, 2, 1}, activation_type::sigmoid);
randomize_normal(nn, rng);
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
dataset.push_back({{0.0, 0.0}, {1.0}});
dataset.push_back({{1.0, 0.0}, {0.0}});
dataset.push_back({{0.0, 1.0}, {0.0}});
dataset.push_back({{1.0, 1.0}, {1.0}});
double error = learn_batch(nn, dataset, 1.0, 1024);
expect_less(error, 1e-3);
info() << "Error: " << error;
}