From 8630525dcfb62c4a4a44357855fa25ca3f213e93 Mon Sep 17 00:00:00 2001 From: lisyarus Date: Fri, 21 Jan 2022 12:12:38 +0300 Subject: [PATCH] Add ml::neural_net_learner implementation with backpropagation, gradient descent, and tests --- libs/ml/CMakeLists.txt | 2 + .../include/psemek/ml/neural_net/learner.hpp | 163 ++++++++++++++++++ libs/ml/source/neural_net/learner.cpp | 9 + libs/ml/tests/neural_net/gradient.cpp | 60 +++++++ libs/ml/tests/neural_net/learn.cpp | 150 ++++++++++++++++ 5 files changed, 384 insertions(+) create mode 100644 libs/ml/include/psemek/ml/neural_net/learner.hpp create mode 100644 libs/ml/source/neural_net/learner.cpp create mode 100644 libs/ml/tests/neural_net/gradient.cpp create mode 100644 libs/ml/tests/neural_net/learn.cpp diff --git a/libs/ml/CMakeLists.txt b/libs/ml/CMakeLists.txt index 46124ad4..216f7f82 100644 --- a/libs/ml/CMakeLists.txt +++ b/libs/ml/CMakeLists.txt @@ -4,3 +4,5 @@ file(GLOB_RECURSE PSEMEK_ML_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "sour psemek_add_library(psemek-ml ${PSEMEK_ML_HEADERS} ${PSEMEK_ML_SOURCES}) target_include_directories(psemek-ml PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") target_link_libraries(psemek-ml PUBLIC psemek-util psemek-geom psemek-random) + +psemek_glob_tests(psemek-ml tests) diff --git a/libs/ml/include/psemek/ml/neural_net/learner.hpp b/libs/ml/include/psemek/ml/neural_net/learner.hpp new file mode 100644 index 00000000..5de2a28a --- /dev/null +++ b/libs/ml/include/psemek/ml/neural_net/learner.hpp @@ -0,0 +1,163 @@ +#pragma once + +#include +#include +#include + +namespace psemek::ml +{ + + // A helper class to facilitate allocation-free multiple + // evaluation of a neural-net followed by backpropagation + template + struct neural_net_learner + { + std::vector const & apply(neural_net const & nn, std::vector input) const; + std::vector const & result() const { return layers_.back(); } + + // Compute the gradient of a loss function (defined as 1/2 of L^2 norm + // of the difference between neural net output and desired output) + // wrt neural net weights and accumulate them to the already computed + // gradient + void backpropagate(neural_net const & nn, std::vector const & output); + + util::span gradient() const { return gradient_; } + util::span gradient() { return gradient_; } + + T gradient_norm() const; + + // Perform a single step of gradient descent in the direction + // of the computed gradient, multiplied by factor + // N.B.: this does **not** clear out the gradient + void descend(neural_net & nn, T factor) const; + + // Reset the accumulated gradient to zero + void clear(); + + private: + mutable std::vector> layers_; + std::vector gradient_; + std::vector error_, error_tmp_; + }; + + extern template struct neural_net_learner; + extern template struct neural_net_learner; + + template + std::vector const & neural_net_learner::apply(neural_net const & nn, std::vector input) const + { + if (nn.empty()) + throw empty_neural_net_error{}; + + auto layer_sizes = nn.layer_sizes(); + auto weights = nn.weights().begin(); + + layers_.resize(layer_sizes.size()); + + if (layer_sizes[0] != input.size()) + throw wrong_neural_net_input_size{layer_sizes[0], input.size()}; + + layers_[0] = std::move(input); + + for (std::size_t l = 0; l + 1 < layer_sizes.size(); ++l) + { + auto & layer = layers_[l + 1]; + layer.resize(layer_sizes[l + 1]); + + for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i) + { + layer[i] = *weights++; + + for (std::size_t j = 0; j < layer_sizes[l]; ++j) + layer[i] += (*weights++) * layers_[l][j]; + + layer[i] = activation(layer[i], nn.activation_types()[l]); + } + } + + return layers_.back(); + } + + template + void neural_net_learner::backpropagate(neural_net const & nn, std::vector const & output) + { + if (nn.empty()) + throw empty_neural_net_error{}; + + auto const layer_sizes = nn.layer_sizes(); + auto const activation_types = nn.activation_types(); + auto const weights = nn.weights(); + + if (output.size() != layer_sizes.back()) + throw wrong_neural_net_output_size(layer_sizes.back(), output.size()); + + gradient_.resize(nn.weights().size()); + + std::size_t offset = gradient_.size(); + for (std::size_t l = layer_sizes.size() - 1; l --> 0;) + { + if (l + 2 == layer_sizes.size()) + { + error_.resize(output.size()); + for (std::size_t i = 0; i < output.size(); ++i) + { + T const value = layers_.back()[i]; + error_[i] = (value - output[i]) * activation_derivative(value, activation_types.back()); + } + } + else + { + error_tmp_.assign(layer_sizes[l + 1], 0.f); + + for (std::size_t i = 0; i < layer_sizes[l + 2]; ++i) + { + std::size_t row_offset = offset + i * (layer_sizes[l + 1] + 1); + + for (std::size_t j = 0; j < layer_sizes[l + 1]; ++j) + error_tmp_[j] += weights[row_offset + j + 1] * error_[i]; + } + + for (std::size_t i = 0; i < error_tmp_.size(); ++i) + error_tmp_[i] *= activation_derivative(layers_[l + 1][i], activation_types[l]); + + error_ = std::move(error_tmp_); + } + + offset -= (layer_sizes[l] + 1) * layer_sizes[l + 1]; + + for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i) + { + std::size_t row_offset = offset + i * (layer_sizes[l] + 1); + gradient_[row_offset] += error_[i]; + + for (std::size_t j = 0; j < layer_sizes[l]; ++j) + gradient_[row_offset + j + 1] += error_[i] * layers_[l][j]; + } + } + } + + template + T neural_net_learner::gradient_norm() const + { + T value = T{0}; + for (auto g : gradient_) + value += geom::sqr(g); + return std::sqrt(value); + } + + template + void neural_net_learner::descend(neural_net & nn, T factor) const + { + auto gradient = gradient_.data(); + + for (auto & w : nn.weights()) + w -= factor * (*gradient++); + } + + template + void neural_net_learner::clear() + { + std::fill(gradient_.begin(), gradient_.end(), T{0}); + } + +} diff --git a/libs/ml/source/neural_net/learner.cpp b/libs/ml/source/neural_net/learner.cpp new file mode 100644 index 00000000..8c2e4e93 --- /dev/null +++ b/libs/ml/source/neural_net/learner.cpp @@ -0,0 +1,9 @@ +#include + +namespace psemek::ml +{ + + template struct neural_net_learner; + template struct neural_net_learner; + +} diff --git a/libs/ml/tests/neural_net/gradient.cpp b/libs/ml/tests/neural_net/gradient.cpp new file mode 100644 index 00000000..29125fab --- /dev/null +++ b/libs/ml/tests/neural_net/gradient.cpp @@ -0,0 +1,60 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace psemek::ml; +using namespace psemek::random; +using namespace psemek::geom; + +test_case(ml_neural__net_gradient) +{ + generator rng; + for (std::size_t iteration = 0; iteration < 64; ++iteration) + { + std::vector sizes; + sizes.resize(uniform(rng, 2, 5)); + for (auto & s : sizes) + s = uniform(rng, 1, 50); + + std::vector activations(sizes.size() - 1); + for (auto & a : activations) + a = static_cast(uniform(rng, 0, static_cast(activation_type::count) - 1)); + + neural_net nn(std::move(sizes), std::move(activations)); + randomize_normal(nn, rng); + + std::vector input(nn.layer_sizes().front()); + for (auto & x : input) + x = uniform(rng); + + std::vector output(nn.layer_sizes().back()); + for (auto & x : output) + x = uniform(rng); + + neural_net_learner learner; + learner.apply(nn, input); + learner.backpropagate(nn, output); + + double const eps = 1e-6; + + neural_net_evaluator evaluator; + for (std::size_t i = 0; i < nn.weights().size(); ++i) + { + double old = nn.weights()[i]; + nn.weights()[i] -= eps; + double v0 = l2_loss(evaluator.apply(nn, input), output); + nn.weights()[i] += 2.0 * eps; + double v1 = l2_loss(evaluator.apply(nn, input), output); + nn.weights()[i] = old; + + double numeric_gradient = (v1 - v0) / 2.0 / eps; + expect_close(numeric_gradient, learner.gradient()[i], 1e-6); + } + } +} diff --git a/libs/ml/tests/neural_net/learn.cpp b/libs/ml/tests/neural_net/learn.cpp new file mode 100644 index 00000000..8123b32d --- /dev/null +++ b/libs/ml/tests/neural_net/learn.cpp @@ -0,0 +1,150 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace psemek::ml; +using namespace psemek::random; +using namespace psemek::geom; +using namespace psemek::log; + +namespace +{ + + double learn_batch(neural_net & nn, std::vector, std::vector>> const & batch, double mu, std::size_t iterations) + { + neural_net_learner learner; + std::size_t const debug_report_frequency = iterations / 8; + + double error = 0.0; + for (std::size_t iteration = 0; iteration < iterations; ++iteration) + { + learner.clear(); + error = 0.0; + for (auto const & data : batch) + { + learner.apply(nn, data.first); + error += l2_loss(data.second, learner.result()) / batch.size(); + learner.backpropagate(nn, data.second); + } + if ((iteration % debug_report_frequency) == 0) + debug() << "Iteration " << iteration << " error: " << error; + learner.descend(nn, mu); + } + + return error; + } + +} + +test_case(ml_neural__net_learn_not__2__layers) +{ + generator rng; + neural_net nn({1, 1}, activation_type::sigmoid); + randomize_normal(nn, rng); + + std::vector, std::vector>> dataset; + + dataset.push_back({{0.0}, {1.0}}); + dataset.push_back({{1.0}, {0.0}}); + + double error = learn_batch(nn, dataset, 16.0, 1024); + expect_less(error, 1e-3); + info() << "Error: " << error; +} + +test_case(ml_neural__net_learn_and__2__layers) +{ + generator rng; + neural_net nn({2, 1}, activation_type::sigmoid); + randomize_normal(nn, rng); + + std::vector, std::vector>> dataset; + + dataset.push_back({{0.0, 0.0}, {0.0}}); + dataset.push_back({{1.0, 0.0}, {0.0}}); + dataset.push_back({{0.0, 1.0}, {0.0}}); + dataset.push_back({{1.0, 1.0}, {1.0}}); + + double error = learn_batch(nn, dataset, 16.0, 1024); + expect_less(error, 1e-3); + info() << "Error: " << error; +} + +test_case(ml_neural__net_learn_and__3__layers) +{ + generator rng; + neural_net nn({2, 2, 1}, activation_type::sigmoid); + randomize_normal(nn, rng); + + std::vector, std::vector>> dataset; + + dataset.push_back({{0.0, 0.0}, {0.0}}); + dataset.push_back({{1.0, 0.0}, {0.0}}); + dataset.push_back({{0.0, 1.0}, {0.0}}); + dataset.push_back({{1.0, 1.0}, {1.0}}); + + double error = learn_batch(nn, dataset, 1.0, 1024); + expect_less(error, 1e-3); + info() << "Error: " << error; +} + +test_case(ml_neural__net_learn_or__2__layers) +{ + generator rng; + neural_net nn({2, 1}, activation_type::sigmoid); + randomize_normal(nn, rng); + + std::vector, std::vector>> dataset; + + dataset.push_back({{0.0, 0.0}, {0.0}}); + dataset.push_back({{1.0, 0.0}, {1.0}}); + dataset.push_back({{0.0, 1.0}, {1.0}}); + dataset.push_back({{1.0, 1.0}, {1.0}}); + + double error = learn_batch(nn, dataset, 16.0, 1024); + expect_less(error, 1e-3); + info() << "Error: " << error; +} + +test_case(ml_neural__net_learn_or__3__layers) +{ + generator rng; + neural_net nn({2, 2, 1}, activation_type::sigmoid); + randomize_normal(nn, rng); + + std::vector, std::vector>> dataset; + + dataset.push_back({{0.0, 0.0}, {0.0}}); + dataset.push_back({{1.0, 0.0}, {1.0}}); + dataset.push_back({{0.0, 1.0}, {1.0}}); + dataset.push_back({{1.0, 1.0}, {1.0}}); + + double error = learn_batch(nn, dataset, 1.0, 1024); + expect_less(error, 1e-3); + info() << "Error: " << error; +} + +test_case(ml_neural__net_learn_xor__3__layers) +{ + generator rng; + neural_net nn({2, 2, 1}, activation_type::sigmoid); + randomize_normal(nn, rng); + + std::vector, std::vector>> dataset; + + dataset.push_back({{0.0, 0.0}, {1.0}}); + dataset.push_back({{1.0, 0.0}, {0.0}}); + dataset.push_back({{0.0, 1.0}, {0.0}}); + dataset.push_back({{1.0, 1.0}, {1.0}}); + + double error = learn_batch(nn, dataset, 1.0, 1024); + expect_less(error, 1e-3); + info() << "Error: " << error; +}