psemek/libs/ml/include/psemek/ml/neural_net/learner.hpp

163 lines
4.4 KiB
C++

#pragma once
#include <psemek/ml/neural_net/neural_net.hpp>
#include <psemek/ml/neural_net/evaluator.hpp>
#include <psemek/geom/math.hpp>
namespace psemek::ml
{
// A helper class to facilitate allocation-free multiple
// evaluation of a neural-net followed by backpropagation
template <typename T>
struct neural_net_learner
{
std::vector<T> const & apply(neural_net<T> const & nn, std::vector<T> input) const;
std::vector<T> const & result() const { return layers_.back(); }
// Compute the gradient of a loss function (defined as 1/2 of L^2 norm
// of the difference between neural net output and desired output)
// wrt neural net weights and accumulate them to the already computed
// gradient
void backpropagate(neural_net<T> const & nn, std::vector<T> const & output);
util::span<T const> gradient() const { return gradient_; }
util::span<T> gradient() { return gradient_; }
T gradient_norm() const;
// Perform a single step of gradient descent in the direction
// of the computed gradient, multiplied by factor
// N.B.: this does **not** clear out the gradient
void descend(neural_net<T> & nn, T factor) const;
// Reset the accumulated gradient to zero
void clear();
private:
mutable std::vector<std::vector<T>> layers_;
std::vector<T> gradient_;
std::vector<T> error_, error_tmp_;
};
extern template struct neural_net_learner<float>;
extern template struct neural_net_learner<double>;
template <typename T>
std::vector<T> const & neural_net_learner<T>::apply(neural_net<T> const & nn, std::vector<T> input) const
{
if (nn.empty())
throw empty_neural_net_error{};
auto layer_sizes = nn.layer_sizes();
auto weights = nn.weights().begin();
layers_.resize(layer_sizes.size());
if (layer_sizes[0] != input.size())
throw wrong_neural_net_input_size{layer_sizes[0], input.size()};
layers_[0] = std::move(input);
for (std::size_t l = 0; l + 1 < layer_sizes.size(); ++l)
{
auto & layer = layers_[l + 1];
layer.resize(layer_sizes[l + 1]);
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
{
layer[i] = *weights++;
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
layer[i] += (*weights++) * layers_[l][j];
layer[i] = activation(layer[i], nn.activation_types()[l]);
}
}
return layers_.back();
}
template <typename T>
void neural_net_learner<T>::backpropagate(neural_net<T> const & nn, std::vector<T> const & output)
{
if (nn.empty())
throw empty_neural_net_error{};
auto const layer_sizes = nn.layer_sizes();
auto const activation_types = nn.activation_types();
auto const weights = nn.weights();
if (output.size() != layer_sizes.back())
throw wrong_neural_net_output_size(layer_sizes.back(), output.size());
gradient_.resize(nn.weights().size());
std::size_t offset = gradient_.size();
for (std::size_t l = layer_sizes.size() - 1; l --> 0;)
{
if (l + 2 == layer_sizes.size())
{
error_.resize(output.size());
for (std::size_t i = 0; i < output.size(); ++i)
{
T const value = layers_.back()[i];
error_[i] = (value - output[i]) * activation_derivative(value, activation_types.back());
}
}
else
{
error_tmp_.assign(layer_sizes[l + 1], 0.f);
for (std::size_t i = 0; i < layer_sizes[l + 2]; ++i)
{
std::size_t row_offset = offset + i * (layer_sizes[l + 1] + 1);
for (std::size_t j = 0; j < layer_sizes[l + 1]; ++j)
error_tmp_[j] += weights[row_offset + j + 1] * error_[i];
}
for (std::size_t i = 0; i < error_tmp_.size(); ++i)
error_tmp_[i] *= activation_derivative(layers_[l + 1][i], activation_types[l]);
error_ = std::move(error_tmp_);
}
offset -= (layer_sizes[l] + 1) * layer_sizes[l + 1];
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
{
std::size_t row_offset = offset + i * (layer_sizes[l] + 1);
gradient_[row_offset] += error_[i];
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
gradient_[row_offset + j + 1] += error_[i] * layers_[l][j];
}
}
}
template <typename T>
T neural_net_learner<T>::gradient_norm() const
{
T value = T{0};
for (auto g : gradient_)
value += geom::sqr(g);
return std::sqrt(value);
}
template <typename T>
void neural_net_learner<T>::descend(neural_net<T> & nn, T factor) const
{
auto gradient = gradient_.data();
for (auto & w : nn.weights())
w -= factor * (*gradient++);
}
template <typename T>
void neural_net_learner<T>::clear()
{
std::fill(gradient_.begin(), gradient_.end(), T{0});
}
}