Support retrieving nn gradient wrt inputs in backpropagation & add tests
This commit is contained in:
parent
790deb19ff
commit
d369abc61b
2 changed files with 72 additions and 28 deletions
|
|
@ -31,6 +31,8 @@ namespace psemek::ml
|
|||
|
||||
T gradient_norm() const;
|
||||
|
||||
util::span<T const> arg_gradient() const { return error_; }
|
||||
|
||||
// Perform a single step of gradient descent in the direction
|
||||
// of the computed gradient, multiplied by factor
|
||||
// N.B.: this does **not** clear out the gradient
|
||||
|
|
@ -96,38 +98,18 @@ namespace psemek::ml
|
|||
if (gradient.size() != layer_sizes.back())
|
||||
throw wrong_neural_net_output_size(layer_sizes.back(), gradient.size());
|
||||
|
||||
error_.resize(gradient.size());
|
||||
for (std::size_t i = 0; i < gradient.size(); ++i)
|
||||
{
|
||||
T const value = layers_.back()[i];
|
||||
error_[i] = gradient[i] * activation_derivative(value, activation_types.back());
|
||||
}
|
||||
|
||||
gradient_.resize(nn.weights().size());
|
||||
|
||||
std::size_t offset = gradient_.size();
|
||||
for (std::size_t l = layer_sizes.size() - 1; l --> 0;)
|
||||
{
|
||||
if (l + 2 == layer_sizes.size())
|
||||
{
|
||||
error_.resize(gradient.size());
|
||||
for (std::size_t i = 0; i < gradient.size(); ++i)
|
||||
{
|
||||
T const value = layers_.back()[i];
|
||||
error_[i] = gradient[i] * activation_derivative(value, activation_types.back());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error_tmp_.assign(layer_sizes[l + 1], 0.f);
|
||||
|
||||
for (std::size_t i = 0; i < layer_sizes[l + 2]; ++i)
|
||||
{
|
||||
std::size_t row_offset = offset + i * (layer_sizes[l + 1] + 1);
|
||||
|
||||
for (std::size_t j = 0; j < layer_sizes[l + 1]; ++j)
|
||||
error_tmp_[j] += weights[row_offset + j + 1] * error_[i];
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < error_tmp_.size(); ++i)
|
||||
error_tmp_[i] *= activation_derivative(layers_[l + 1][i], activation_types[l]);
|
||||
|
||||
error_ = std::move(error_tmp_);
|
||||
}
|
||||
|
||||
offset -= (layer_sizes[l] + 1) * layer_sizes[l + 1];
|
||||
|
||||
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
|
||||
|
|
@ -138,6 +120,21 @@ namespace psemek::ml
|
|||
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
|
||||
gradient_[row_offset + j + 1] += error_[i] * layers_[l][j];
|
||||
}
|
||||
|
||||
error_tmp_.assign(layer_sizes[l], 0.f);
|
||||
|
||||
for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i)
|
||||
{
|
||||
std::size_t row_offset = offset + i * (layer_sizes[l] + 1);
|
||||
|
||||
for (std::size_t j = 0; j < layer_sizes[l]; ++j)
|
||||
error_tmp_[j] += weights[row_offset + j + 1] * error_[i];
|
||||
}
|
||||
|
||||
if (l > 0) for (std::size_t i = 0; i < error_tmp_.size(); ++i)
|
||||
error_tmp_[i] *= activation_derivative(layers_[l][i], activation_types[l - 1]);
|
||||
|
||||
std::swap(error_, error_tmp_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,54 @@ test_case(ml_neural__net_gradient)
|
|||
nn.weights()[i] = old;
|
||||
|
||||
double numeric_gradient = (v1 - v0) / 2.0 / eps;
|
||||
expect_close(numeric_gradient, learner.gradient()[i], 1e-6);
|
||||
expect_close(numeric_gradient, learner.gradient()[i], 1e-4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_case(ml_neural__net_arg__gradient)
|
||||
{
|
||||
generator rng;
|
||||
for (std::size_t iteration = 0; iteration < 64; ++iteration)
|
||||
{
|
||||
std::vector<std::size_t> sizes;
|
||||
sizes.resize(uniform<std::size_t>(rng, 2, 5));
|
||||
for (auto & s : sizes)
|
||||
s = uniform<std::size_t>(rng, 1, 50);
|
||||
|
||||
std::vector<activation_type> activations(sizes.size() - 1);
|
||||
for (auto & a : activations)
|
||||
a = static_cast<activation_type>(uniform<std::size_t>(rng, 0, static_cast<std::size_t>(activation_type::count) - 1));
|
||||
|
||||
neural_net<double> nn(std::move(sizes), std::move(activations));
|
||||
randomize_normal(nn, rng);
|
||||
|
||||
std::vector<double> input(nn.layer_sizes().front());
|
||||
for (auto & x : input)
|
||||
x = uniform<double>(rng);
|
||||
|
||||
std::vector<double> output(nn.layer_sizes().back());
|
||||
for (auto & x : output)
|
||||
x = uniform<double>(rng);
|
||||
|
||||
neural_net_learner<double> learner;
|
||||
learner.apply(nn, input);
|
||||
learner.backpropagate_l2(nn, output);
|
||||
|
||||
double const eps = 1e-6;
|
||||
|
||||
neural_net_evaluator<double> evaluator;
|
||||
for (std::size_t i = 0; i < input.size(); ++i)
|
||||
{
|
||||
double old = input[i];
|
||||
input[i] -= eps;
|
||||
double v0 = l2_loss(evaluator.apply(nn, input), output);
|
||||
input[i] += 2.0 * eps;
|
||||
double v1 = l2_loss(evaluator.apply(nn, input), output);
|
||||
input[i] = old;
|
||||
|
||||
double numeric_gradient = (v1 - v0) / 2.0 / eps;
|
||||
expect_close(numeric_gradient, learner.arg_gradient()[i], 1e-4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue