163 lines
4.4 KiB
C++
163 lines
4.4 KiB
C++
#pragma once
|
|
|
|
#include <psemek/ml/neural_net/neural_net.hpp>
|
|
#include <psemek/ml/neural_net/evaluator.hpp>
|
|
#include <psemek/geom/math.hpp>
|
|
|
|
namespace psemek::ml
|
|
{
|
|
|
|
// A helper class to facilitate allocation-free multiple
|
|
// evaluation of a neural-net followed by backpropagation
|
|
template <typename T>
|
|
struct neural_net_learner
|
|
{
|
|
std::vector<T> const & apply(neural_net<T> const & nn, std::vector<T> input) const;
|
|
std::vector<T> const & result() const { return layers_.back(); }
|
|
|
|
// Compute the gradient of a loss function (defined as 1/2 of L^2 norm
|
|
// of the difference between neural net output and desired output)
|
|
// wrt neural net weights and accumulate them to the already computed
|
|
// gradient
|
|
void backpropagate(neural_net<T> const & nn, std::vector<T> const & output);
|
|
|
|
util::span<T const> gradient() const { return gradient_; }
|
|
util::span<T> gradient() { return gradient_; }
|
|
|
|
T gradient_norm() const;
|
|
|
|
// Perform a single step of gradient descent in the direction
|
|
// of the computed gradient, multiplied by factor
|
|
// N.B.: this does **not** clear out the gradient
|
|
void descend(neural_net<T> & nn, T factor) const;
|
|
|
|
// Reset the accumulated gradient to zero
|
|
void clear();
|
|
|
|
private:
|
|
mutable std::vector<std::vector<T>> layers_;
|
|
std::vector<T> gradient_;
|
|
std::vector<T> error_, error_tmp_;
|
|
};
|
|
|
|
extern template struct neural_net_learner<float>;
|
|
extern template struct neural_net_learner<double>;
|
|
|
|
template <typename T>
|
|
std::vector<T> const & neural_net_learner<T>::apply(neural_net<T> const & nn, std::vector<T> input) const
|
|
{
|
|
if (nn.empty())
|
|
throw empty_neural_net_error{};
|
|
|
|
auto layer_sizes = nn.layer_sizes();
|
|
auto weights = nn.weights().begin();
|
|
|
|
layers_.resize(layer_sizes.size());
|
|
|
|
if (layer_sizes[0] != input.size())
|
|
throw wrong_neural_net_input_size{layer_sizes[0], input.size()};
|
|
|
|
layers_[0] = std::move(input);
|
|
|
|
for (std::size_t l = 0; l + 1 < layer_sizes.size(); ++l)
|
|
{
|
|
auto & layer = layers_[l + 1];
|
|
layer.resize(layer_sizes[l + 1]);
|
|
|
|
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
|
|
{
|
|
layer[i] = *weights++;
|
|
|
|
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
|
|
layer[i] += (*weights++) * layers_[l][j];
|
|
|
|
layer[i] = activation(layer[i], nn.activation_types()[l]);
|
|
}
|
|
}
|
|
|
|
return layers_.back();
|
|
}
|
|
|
|
template <typename T>
|
|
void neural_net_learner<T>::backpropagate(neural_net<T> const & nn, std::vector<T> const & output)
|
|
{
|
|
if (nn.empty())
|
|
throw empty_neural_net_error{};
|
|
|
|
auto const layer_sizes = nn.layer_sizes();
|
|
auto const activation_types = nn.activation_types();
|
|
auto const weights = nn.weights();
|
|
|
|
if (output.size() != layer_sizes.back())
|
|
throw wrong_neural_net_output_size(layer_sizes.back(), output.size());
|
|
|
|
gradient_.resize(nn.weights().size());
|
|
|
|
std::size_t offset = gradient_.size();
|
|
for (std::size_t l = layer_sizes.size() - 1; l --> 0;)
|
|
{
|
|
if (l + 2 == layer_sizes.size())
|
|
{
|
|
error_.resize(output.size());
|
|
for (std::size_t i = 0; i < output.size(); ++i)
|
|
{
|
|
T const value = layers_.back()[i];
|
|
error_[i] = (value - output[i]) * activation_derivative(value, activation_types.back());
|
|
}
|
|
}
|
|
else
|
|
{
|
|
error_tmp_.assign(layer_sizes[l + 1], 0.f);
|
|
|
|
for (std::size_t i = 0; i < layer_sizes[l + 2]; ++i)
|
|
{
|
|
std::size_t row_offset = offset + i * (layer_sizes[l + 1] + 1);
|
|
|
|
for (std::size_t j = 0; j < layer_sizes[l + 1]; ++j)
|
|
error_tmp_[j] += weights[row_offset + j + 1] * error_[i];
|
|
}
|
|
|
|
for (std::size_t i = 0; i < error_tmp_.size(); ++i)
|
|
error_tmp_[i] *= activation_derivative(layers_[l + 1][i], activation_types[l]);
|
|
|
|
error_ = std::move(error_tmp_);
|
|
}
|
|
|
|
offset -= (layer_sizes[l] + 1) * layer_sizes[l + 1];
|
|
|
|
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
|
|
{
|
|
std::size_t row_offset = offset + i * (layer_sizes[l] + 1);
|
|
gradient_[row_offset] += error_[i];
|
|
|
|
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
|
|
gradient_[row_offset + j + 1] += error_[i] * layers_[l][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
T neural_net_learner<T>::gradient_norm() const
|
|
{
|
|
T value = T{0};
|
|
for (auto g : gradient_)
|
|
value += geom::sqr(g);
|
|
return std::sqrt(value);
|
|
}
|
|
|
|
template <typename T>
|
|
void neural_net_learner<T>::descend(neural_net<T> & nn, T factor) const
|
|
{
|
|
auto gradient = gradient_.data();
|
|
|
|
for (auto & w : nn.weights())
|
|
w -= factor * (*gradient++);
|
|
}
|
|
|
|
template <typename T>
|
|
void neural_net_learner<T>::clear()
|
|
{
|
|
std::fill(gradient_.begin(), gradient_.end(), T{0});
|
|
}
|
|
|
|
}
|