diff --git a/libs/ml/include/psemek/ml/neural_net/learner.hpp b/libs/ml/include/psemek/ml/neural_net/learner.hpp index 69472594..fbbe865d 100644 --- a/libs/ml/include/psemek/ml/neural_net/learner.hpp +++ b/libs/ml/include/psemek/ml/neural_net/learner.hpp @@ -31,6 +31,8 @@ namespace psemek::ml T gradient_norm() const; + util::span arg_gradient() const { return error_; } + // Perform a single step of gradient descent in the direction // of the computed gradient, multiplied by factor // N.B.: this does **not** clear out the gradient @@ -96,38 +98,18 @@ namespace psemek::ml if (gradient.size() != layer_sizes.back()) throw wrong_neural_net_output_size(layer_sizes.back(), gradient.size()); + error_.resize(gradient.size()); + for (std::size_t i = 0; i < gradient.size(); ++i) + { + T const value = layers_.back()[i]; + error_[i] = gradient[i] * activation_derivative(value, activation_types.back()); + } + gradient_.resize(nn.weights().size()); std::size_t offset = gradient_.size(); for (std::size_t l = layer_sizes.size() - 1; l --> 0;) { - if (l + 2 == layer_sizes.size()) - { - error_.resize(gradient.size()); - for (std::size_t i = 0; i < gradient.size(); ++i) - { - T const value = layers_.back()[i]; - error_[i] = gradient[i] * activation_derivative(value, activation_types.back()); - } - } - else - { - error_tmp_.assign(layer_sizes[l + 1], 0.f); - - for (std::size_t i = 0; i < layer_sizes[l + 2]; ++i) - { - std::size_t row_offset = offset + i * (layer_sizes[l + 1] + 1); - - for (std::size_t j = 0; j < layer_sizes[l + 1]; ++j) - error_tmp_[j] += weights[row_offset + j + 1] * error_[i]; - } - - for (std::size_t i = 0; i < error_tmp_.size(); ++i) - error_tmp_[i] *= activation_derivative(layers_[l + 1][i], activation_types[l]); - - error_ = std::move(error_tmp_); - } - offset -= (layer_sizes[l] + 1) * layer_sizes[l + 1]; for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i) @@ -138,6 +120,21 @@ namespace psemek::ml for (std::size_t j = 0; j < layer_sizes[l]; ++j) gradient_[row_offset + j + 1] += error_[i] * layers_[l][j]; } + + error_tmp_.assign(layer_sizes[l], 0.f); + + for (std::size_t i = 0; i < layer_sizes[l + 1]; ++i) + { + std::size_t row_offset = offset + i * (layer_sizes[l] + 1); + + for (std::size_t j = 0; j < layer_sizes[l]; ++j) + error_tmp_[j] += weights[row_offset + j + 1] * error_[i]; + } + + if (l > 0) for (std::size_t i = 0; i < error_tmp_.size(); ++i) + error_tmp_[i] *= activation_derivative(layers_[l][i], activation_types[l - 1]); + + std::swap(error_, error_tmp_); } } diff --git a/libs/ml/tests/neural_net/gradient.cpp b/libs/ml/tests/neural_net/gradient.cpp index 214efe11..a12e0c0f 100644 --- a/libs/ml/tests/neural_net/gradient.cpp +++ b/libs/ml/tests/neural_net/gradient.cpp @@ -54,7 +54,54 @@ test_case(ml_neural__net_gradient) nn.weights()[i] = old; double numeric_gradient = (v1 - v0) / 2.0 / eps; - expect_close(numeric_gradient, learner.gradient()[i], 1e-6); + expect_close(numeric_gradient, learner.gradient()[i], 1e-4); + } + } +} + +test_case(ml_neural__net_arg__gradient) +{ + generator rng; + for (std::size_t iteration = 0; iteration < 64; ++iteration) + { + std::vector sizes; + sizes.resize(uniform(rng, 2, 5)); + for (auto & s : sizes) + s = uniform(rng, 1, 50); + + std::vector activations(sizes.size() - 1); + for (auto & a : activations) + a = static_cast(uniform(rng, 0, static_cast(activation_type::count) - 1)); + + neural_net nn(std::move(sizes), std::move(activations)); + randomize_normal(nn, rng); + + std::vector input(nn.layer_sizes().front()); + for (auto & x : input) + x = uniform(rng); + + std::vector output(nn.layer_sizes().back()); + for (auto & x : output) + x = uniform(rng); + + neural_net_learner learner; + learner.apply(nn, input); + learner.backpropagate_l2(nn, output); + + double const eps = 1e-6; + + neural_net_evaluator evaluator; + for (std::size_t i = 0; i < input.size(); ++i) + { + double old = input[i]; + input[i] -= eps; + double v0 = l2_loss(evaluator.apply(nn, input), output); + input[i] += 2.0 * eps; + double v1 = l2_loss(evaluator.apply(nn, input), output); + input[i] = old; + + double numeric_gradient = (v1 - v0) / 2.0 / eps; + expect_close(numeric_gradient, learner.arg_gradient()[i], 1e-4); } } }