Support retrieving nn gradient wrt inputs in backpropagation & add tests

This commit is contained in:
Nikita Lisitsa 2022-01-23 19:37:42 +03:00
parent 790deb19ff
commit d369abc61b
2 changed files with 72 additions and 28 deletions

View file

@ -31,6 +31,8 @@ namespace psemek::ml
T gradient_norm() const;
util::span<T const> arg_gradient() const { return error_; }
// Perform a single step of gradient descent in the direction
// of the computed gradient, multiplied by factor
// N.B.: this does **not** clear out the gradient
@ -96,38 +98,18 @@ namespace psemek::ml
if (gradient.size() != layer_sizes.back())
throw wrong_neural_net_output_size(layer_sizes.back(), gradient.size());
error_.resize(gradient.size());
for (std::size_t i = 0; i < gradient.size(); ++i)
{
T const value = layers_.back()[i];
error_[i] = gradient[i] * activation_derivative(value, activation_types.back());
}
gradient_.resize(nn.weights().size());
std::size_t offset = gradient_.size();
for (std::size_t l = layer_sizes.size() - 1; l --> 0;)
{
if (l + 2 == layer_sizes.size())
{
error_.resize(gradient.size());
for (std::size_t i = 0; i < gradient.size(); ++i)
{
T const value = layers_.back()[i];
error_[i] = gradient[i] * activation_derivative(value, activation_types.back());
}
}
else
{
error_tmp_.assign(layer_sizes[l + 1], 0.f);
for (std::size_t i = 0; i < layer_sizes[l + 2]; ++i)
{
std::size_t row_offset = offset + i * (layer_sizes[l + 1] + 1);
for (std::size_t j = 0; j < layer_sizes[l + 1]; ++j)
error_tmp_[j] += weights[row_offset + j + 1] * error_[i];
}
for (std::size_t i = 0; i < error_tmp_.size(); ++i)
error_tmp_[i] *= activation_derivative(layers_[l + 1][i], activation_types[l]);
error_ = std::move(error_tmp_);
}
offset -= (layer_sizes[l] + 1) * layer_sizes[l + 1];
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
@ -138,6 +120,21 @@ namespace psemek::ml
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
gradient_[row_offset + j + 1] += error_[i] * layers_[l][j];
}
error_tmp_.assign(layer_sizes[l], 0.f);
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
{
std::size_t row_offset = offset + i * (layer_sizes[l] + 1);
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
error_tmp_[j] += weights[row_offset + j + 1] * error_[i];
}
if (l > 0) for (std::size_t i = 0; i < error_tmp_.size(); ++i)
error_tmp_[i] *= activation_derivative(layers_[l][i], activation_types[l - 1]);
std::swap(error_, error_tmp_);
}
}

View file

@ -54,7 +54,54 @@ test_case(ml_neural__net_gradient)
nn.weights()[i] = old;
double numeric_gradient = (v1 - v0) / 2.0 / eps;
expect_close(numeric_gradient, learner.gradient()[i], 1e-6);
expect_close(numeric_gradient, learner.gradient()[i], 1e-4);
}
}
}
test_case(ml_neural__net_arg__gradient)
{
generator rng;
for (std::size_t iteration = 0; iteration < 64; ++iteration)
{
std::vector<std::size_t> sizes;
sizes.resize(uniform<std::size_t>(rng, 2, 5));
for (auto & s : sizes)
s = uniform<std::size_t>(rng, 1, 50);
std::vector<activation_type> activations(sizes.size() - 1);
for (auto & a : activations)
a = static_cast<activation_type>(uniform<std::size_t>(rng, 0, static_cast<std::size_t>(activation_type::count) - 1));
neural_net<double> nn(std::move(sizes), std::move(activations));
randomize_normal(nn, rng);
std::vector<double> input(nn.layer_sizes().front());
for (auto & x : input)
x = uniform<double>(rng);
std::vector<double> output(nn.layer_sizes().back());
for (auto & x : output)
x = uniform<double>(rng);
neural_net_learner<double> learner;
learner.apply(nn, input);
learner.backpropagate_l2(nn, output);
double const eps = 1e-6;
neural_net_evaluator<double> evaluator;
for (std::size_t i = 0; i < input.size(); ++i)
{
double old = input[i];
input[i] -= eps;
double v0 = l2_loss(evaluator.apply(nn, input), output);
input[i] += 2.0 * eps;
double v1 = l2_loss(evaluator.apply(nn, input), output);
input[i] = old;
double numeric_gradient = (v1 - v0) / 2.0 / eps;
expect_close(numeric_gradient, learner.arg_gradient()[i], 1e-4);
}
}
}