Add ml::neural_net_learner implementation with backpropagation, gradient descent, and tests
This commit is contained in:
parent
def0615baa
commit
8630525dcf
5 changed files with 384 additions and 0 deletions
|
|
@ -4,3 +4,5 @@ file(GLOB_RECURSE PSEMEK_ML_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "sour
|
|||
psemek_add_library(psemek-ml ${PSEMEK_ML_HEADERS} ${PSEMEK_ML_SOURCES})
|
||||
target_include_directories(psemek-ml PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
target_link_libraries(psemek-ml PUBLIC psemek-util psemek-geom psemek-random)
|
||||
|
||||
psemek_glob_tests(psemek-ml tests)
|
||||
|
|
|
|||
163
libs/ml/include/psemek/ml/neural_net/learner.hpp
Normal file
163
libs/ml/include/psemek/ml/neural_net/learner.hpp
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
#pragma once
|
||||
|
||||
#include <psemek/ml/neural_net/neural_net.hpp>
|
||||
#include <psemek/ml/neural_net/evaluator.hpp>
|
||||
#include <psemek/geom/math.hpp>
|
||||
|
||||
namespace psemek::ml
|
||||
{
|
||||
|
||||
// A helper class to facilitate allocation-free multiple
|
||||
// evaluation of a neural-net followed by backpropagation
|
||||
template <typename T>
|
||||
struct neural_net_learner
|
||||
{
|
||||
std::vector<T> const & apply(neural_net<T> const & nn, std::vector<T> input) const;
|
||||
std::vector<T> const & result() const { return layers_.back(); }
|
||||
|
||||
// Compute the gradient of a loss function (defined as 1/2 of L^2 norm
|
||||
// of the difference between neural net output and desired output)
|
||||
// wrt neural net weights and accumulate them to the already computed
|
||||
// gradient
|
||||
void backpropagate(neural_net<T> const & nn, std::vector<T> const & output);
|
||||
|
||||
util::span<T const> gradient() const { return gradient_; }
|
||||
util::span<T> gradient() { return gradient_; }
|
||||
|
||||
T gradient_norm() const;
|
||||
|
||||
// Perform a single step of gradient descent in the direction
|
||||
// of the computed gradient, multiplied by factor
|
||||
// N.B.: this does **not** clear out the gradient
|
||||
void descend(neural_net<T> & nn, T factor) const;
|
||||
|
||||
// Reset the accumulated gradient to zero
|
||||
void clear();
|
||||
|
||||
private:
|
||||
mutable std::vector<std::vector<T>> layers_;
|
||||
std::vector<T> gradient_;
|
||||
std::vector<T> error_, error_tmp_;
|
||||
};
|
||||
|
||||
extern template struct neural_net_learner<float>;
|
||||
extern template struct neural_net_learner<double>;
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> const & neural_net_learner<T>::apply(neural_net<T> const & nn, std::vector<T> input) const
|
||||
{
|
||||
if (nn.empty())
|
||||
throw empty_neural_net_error{};
|
||||
|
||||
auto layer_sizes = nn.layer_sizes();
|
||||
auto weights = nn.weights().begin();
|
||||
|
||||
layers_.resize(layer_sizes.size());
|
||||
|
||||
if (layer_sizes[0] != input.size())
|
||||
throw wrong_neural_net_input_size{layer_sizes[0], input.size()};
|
||||
|
||||
layers_[0] = std::move(input);
|
||||
|
||||
for (std::size_t l = 0; l + 1 < layer_sizes.size(); ++l)
|
||||
{
|
||||
auto & layer = layers_[l + 1];
|
||||
layer.resize(layer_sizes[l + 1]);
|
||||
|
||||
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
|
||||
{
|
||||
layer[i] = *weights++;
|
||||
|
||||
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
|
||||
layer[i] += (*weights++) * layers_[l][j];
|
||||
|
||||
layer[i] = activation(layer[i], nn.activation_types()[l]);
|
||||
}
|
||||
}
|
||||
|
||||
return layers_.back();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void neural_net_learner<T>::backpropagate(neural_net<T> const & nn, std::vector<T> const & output)
|
||||
{
|
||||
if (nn.empty())
|
||||
throw empty_neural_net_error{};
|
||||
|
||||
auto const layer_sizes = nn.layer_sizes();
|
||||
auto const activation_types = nn.activation_types();
|
||||
auto const weights = nn.weights();
|
||||
|
||||
if (output.size() != layer_sizes.back())
|
||||
throw wrong_neural_net_output_size(layer_sizes.back(), output.size());
|
||||
|
||||
gradient_.resize(nn.weights().size());
|
||||
|
||||
std::size_t offset = gradient_.size();
|
||||
for (std::size_t l = layer_sizes.size() - 1; l --> 0;)
|
||||
{
|
||||
if (l + 2 == layer_sizes.size())
|
||||
{
|
||||
error_.resize(output.size());
|
||||
for (std::size_t i = 0; i < output.size(); ++i)
|
||||
{
|
||||
T const value = layers_.back()[i];
|
||||
error_[i] = (value - output[i]) * activation_derivative(value, activation_types.back());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error_tmp_.assign(layer_sizes[l + 1], 0.f);
|
||||
|
||||
for (std::size_t i = 0; i < layer_sizes[l + 2]; ++i)
|
||||
{
|
||||
std::size_t row_offset = offset + i * (layer_sizes[l + 1] + 1);
|
||||
|
||||
for (std::size_t j = 0; j < layer_sizes[l + 1]; ++j)
|
||||
error_tmp_[j] += weights[row_offset + j + 1] * error_[i];
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < error_tmp_.size(); ++i)
|
||||
error_tmp_[i] *= activation_derivative(layers_[l + 1][i], activation_types[l]);
|
||||
|
||||
error_ = std::move(error_tmp_);
|
||||
}
|
||||
|
||||
offset -= (layer_sizes[l] + 1) * layer_sizes[l + 1];
|
||||
|
||||
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
|
||||
{
|
||||
std::size_t row_offset = offset + i * (layer_sizes[l] + 1);
|
||||
gradient_[row_offset] += error_[i];
|
||||
|
||||
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
|
||||
gradient_[row_offset + j + 1] += error_[i] * layers_[l][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T neural_net_learner<T>::gradient_norm() const
|
||||
{
|
||||
T value = T{0};
|
||||
for (auto g : gradient_)
|
||||
value += geom::sqr(g);
|
||||
return std::sqrt(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void neural_net_learner<T>::descend(neural_net<T> & nn, T factor) const
|
||||
{
|
||||
auto gradient = gradient_.data();
|
||||
|
||||
for (auto & w : nn.weights())
|
||||
w -= factor * (*gradient++);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void neural_net_learner<T>::clear()
|
||||
{
|
||||
std::fill(gradient_.begin(), gradient_.end(), T{0});
|
||||
}
|
||||
|
||||
}
|
||||
9
libs/ml/source/neural_net/learner.cpp
Normal file
9
libs/ml/source/neural_net/learner.cpp
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#include <psemek/ml/neural_net/learner.hpp>
|
||||
|
||||
namespace psemek::ml
|
||||
{
|
||||
|
||||
template struct neural_net_learner<float>;
|
||||
template struct neural_net_learner<double>;
|
||||
|
||||
}
|
||||
60
libs/ml/tests/neural_net/gradient.cpp
Normal file
60
libs/ml/tests/neural_net/gradient.cpp
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#include <psemek/test/test.hpp>
|
||||
|
||||
#include <psemek/ml/neural_net/learner.hpp>
|
||||
#include <psemek/ml/neural_net/evaluator.hpp>
|
||||
#include <psemek/ml/neural_net/randomize.hpp>
|
||||
#include <psemek/ml/neural_net/loss.hpp>
|
||||
#include <psemek/random/generator.hpp>
|
||||
#include <psemek/random/uniform.hpp>
|
||||
#include <psemek/geom/math.hpp>
|
||||
|
||||
using namespace psemek::ml;
|
||||
using namespace psemek::random;
|
||||
using namespace psemek::geom;
|
||||
|
||||
test_case(ml_neural__net_gradient)
|
||||
{
|
||||
generator rng;
|
||||
for (std::size_t iteration = 0; iteration < 64; ++iteration)
|
||||
{
|
||||
std::vector<std::size_t> sizes;
|
||||
sizes.resize(uniform<std::size_t>(rng, 2, 5));
|
||||
for (auto & s : sizes)
|
||||
s = uniform<std::size_t>(rng, 1, 50);
|
||||
|
||||
std::vector<activation_type> activations(sizes.size() - 1);
|
||||
for (auto & a : activations)
|
||||
a = static_cast<activation_type>(uniform<std::size_t>(rng, 0, static_cast<std::size_t>(activation_type::count) - 1));
|
||||
|
||||
neural_net<double> nn(std::move(sizes), std::move(activations));
|
||||
randomize_normal(nn, rng);
|
||||
|
||||
std::vector<double> input(nn.layer_sizes().front());
|
||||
for (auto & x : input)
|
||||
x = uniform<double>(rng);
|
||||
|
||||
std::vector<double> output(nn.layer_sizes().back());
|
||||
for (auto & x : output)
|
||||
x = uniform<double>(rng);
|
||||
|
||||
neural_net_learner<double> learner;
|
||||
learner.apply(nn, input);
|
||||
learner.backpropagate(nn, output);
|
||||
|
||||
double const eps = 1e-6;
|
||||
|
||||
neural_net_evaluator<double> evaluator;
|
||||
for (std::size_t i = 0; i < nn.weights().size(); ++i)
|
||||
{
|
||||
double old = nn.weights()[i];
|
||||
nn.weights()[i] -= eps;
|
||||
double v0 = l2_loss(evaluator.apply(nn, input), output);
|
||||
nn.weights()[i] += 2.0 * eps;
|
||||
double v1 = l2_loss(evaluator.apply(nn, input), output);
|
||||
nn.weights()[i] = old;
|
||||
|
||||
double numeric_gradient = (v1 - v0) / 2.0 / eps;
|
||||
expect_close(numeric_gradient, learner.gradient()[i], 1e-6);
|
||||
}
|
||||
}
|
||||
}
|
||||
150
libs/ml/tests/neural_net/learn.cpp
Normal file
150
libs/ml/tests/neural_net/learn.cpp
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
#include <psemek/test/test.hpp>
|
||||
|
||||
#include <psemek/ml/neural_net/learner.hpp>
|
||||
#include <psemek/ml/neural_net/evaluator.hpp>
|
||||
#include <psemek/ml/neural_net/randomize.hpp>
|
||||
#include <psemek/ml/neural_net/loss.hpp>
|
||||
#include <psemek/random/generator.hpp>
|
||||
#include <psemek/random/uniform.hpp>
|
||||
#include <psemek/geom/math.hpp>
|
||||
#include <psemek/log/log.hpp>
|
||||
|
||||
using namespace psemek::ml;
|
||||
using namespace psemek::random;
|
||||
using namespace psemek::geom;
|
||||
using namespace psemek::log;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
double learn_batch(neural_net<double> & nn, std::vector<std::pair<std::vector<double>, std::vector<double>>> const & batch, double mu, std::size_t iterations)
|
||||
{
|
||||
neural_net_learner<double> learner;
|
||||
std::size_t const debug_report_frequency = iterations / 8;
|
||||
|
||||
double error = 0.0;
|
||||
for (std::size_t iteration = 0; iteration < iterations; ++iteration)
|
||||
{
|
||||
learner.clear();
|
||||
error = 0.0;
|
||||
for (auto const & data : batch)
|
||||
{
|
||||
learner.apply(nn, data.first);
|
||||
error += l2_loss(data.second, learner.result()) / batch.size();
|
||||
learner.backpropagate(nn, data.second);
|
||||
}
|
||||
if ((iteration % debug_report_frequency) == 0)
|
||||
debug() << "Iteration " << iteration << " error: " << error;
|
||||
learner.descend(nn, mu);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
test_case(ml_neural__net_learn_not__2__layers)
|
||||
{
|
||||
generator rng;
|
||||
neural_net<double> nn({1, 1}, activation_type::sigmoid);
|
||||
randomize_normal(nn, rng);
|
||||
|
||||
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
|
||||
|
||||
dataset.push_back({{0.0}, {1.0}});
|
||||
dataset.push_back({{1.0}, {0.0}});
|
||||
|
||||
double error = learn_batch(nn, dataset, 16.0, 1024);
|
||||
expect_less(error, 1e-3);
|
||||
info() << "Error: " << error;
|
||||
}
|
||||
|
||||
test_case(ml_neural__net_learn_and__2__layers)
|
||||
{
|
||||
generator rng;
|
||||
neural_net<double> nn({2, 1}, activation_type::sigmoid);
|
||||
randomize_normal(nn, rng);
|
||||
|
||||
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
|
||||
|
||||
dataset.push_back({{0.0, 0.0}, {0.0}});
|
||||
dataset.push_back({{1.0, 0.0}, {0.0}});
|
||||
dataset.push_back({{0.0, 1.0}, {0.0}});
|
||||
dataset.push_back({{1.0, 1.0}, {1.0}});
|
||||
|
||||
double error = learn_batch(nn, dataset, 16.0, 1024);
|
||||
expect_less(error, 1e-3);
|
||||
info() << "Error: " << error;
|
||||
}
|
||||
|
||||
test_case(ml_neural__net_learn_and__3__layers)
|
||||
{
|
||||
generator rng;
|
||||
neural_net<double> nn({2, 2, 1}, activation_type::sigmoid);
|
||||
randomize_normal(nn, rng);
|
||||
|
||||
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
|
||||
|
||||
dataset.push_back({{0.0, 0.0}, {0.0}});
|
||||
dataset.push_back({{1.0, 0.0}, {0.0}});
|
||||
dataset.push_back({{0.0, 1.0}, {0.0}});
|
||||
dataset.push_back({{1.0, 1.0}, {1.0}});
|
||||
|
||||
double error = learn_batch(nn, dataset, 1.0, 1024);
|
||||
expect_less(error, 1e-3);
|
||||
info() << "Error: " << error;
|
||||
}
|
||||
|
||||
test_case(ml_neural__net_learn_or__2__layers)
|
||||
{
|
||||
generator rng;
|
||||
neural_net<double> nn({2, 1}, activation_type::sigmoid);
|
||||
randomize_normal(nn, rng);
|
||||
|
||||
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
|
||||
|
||||
dataset.push_back({{0.0, 0.0}, {0.0}});
|
||||
dataset.push_back({{1.0, 0.0}, {1.0}});
|
||||
dataset.push_back({{0.0, 1.0}, {1.0}});
|
||||
dataset.push_back({{1.0, 1.0}, {1.0}});
|
||||
|
||||
double error = learn_batch(nn, dataset, 16.0, 1024);
|
||||
expect_less(error, 1e-3);
|
||||
info() << "Error: " << error;
|
||||
}
|
||||
|
||||
test_case(ml_neural__net_learn_or__3__layers)
|
||||
{
|
||||
generator rng;
|
||||
neural_net<double> nn({2, 2, 1}, activation_type::sigmoid);
|
||||
randomize_normal(nn, rng);
|
||||
|
||||
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
|
||||
|
||||
dataset.push_back({{0.0, 0.0}, {0.0}});
|
||||
dataset.push_back({{1.0, 0.0}, {1.0}});
|
||||
dataset.push_back({{0.0, 1.0}, {1.0}});
|
||||
dataset.push_back({{1.0, 1.0}, {1.0}});
|
||||
|
||||
double error = learn_batch(nn, dataset, 1.0, 1024);
|
||||
expect_less(error, 1e-3);
|
||||
info() << "Error: " << error;
|
||||
}
|
||||
|
||||
test_case(ml_neural__net_learn_xor__3__layers)
|
||||
{
|
||||
generator rng;
|
||||
neural_net<double> nn({2, 2, 1}, activation_type::sigmoid);
|
||||
randomize_normal(nn, rng);
|
||||
|
||||
std::vector<std::pair<std::vector<double>, std::vector<double>>> dataset;
|
||||
|
||||
dataset.push_back({{0.0, 0.0}, {1.0}});
|
||||
dataset.push_back({{1.0, 0.0}, {0.0}});
|
||||
dataset.push_back({{0.0, 1.0}, {0.0}});
|
||||
dataset.push_back({{1.0, 1.0}, {1.0}});
|
||||
|
||||
double error = learn_batch(nn, dataset, 1.0, 1024);
|
||||
expect_less(error, 1e-3);
|
||||
info() << "Error: " << error;
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue