|
- #pragma once
- #include <vector>
- #include <memory>
- #include <cmath>
- #include <unordered_map>
-
- namespace autodiff {
-
- template<typename T, typename F>
- auto central_difference(std::vector<T>& vec, F func, std::size_t arg, float epsilon = 1e-6)
- {
- std::vector<T> vec_plus = vec;
- std::vector<T> vec_minus = vec;
-
- // 在第arg个参数上分别加上和减去epsilon
- vec_plus[arg] += epsilon;
- vec_minus[arg] -= epsilon;
-
- // 计算函数在两个扰动点的值
- auto f_plus = func(vec_plus);
- auto f_minus = func(vec_minus);
-
- // 应用中心差分公式计算导数
- return (f_plus - f_minus) / (2.0 * epsilon);
- }
-
- class ScalarFunction
- {
- public:
- float data;
- float grad;
- int degree = 0;
- public:
- ScalarFunction() {}
- }; // class ScalarFunction
-
- class ConstantScalar: public ScalarFunction {
- public:
- ConstantScalar(float data): ScalarFunction() {
- this->data = data;
- }
- }; // class ConstantScalar
-
- class Add: public ScalarFunction {
- public:
- std::shared_ptr<ScalarFunction> a;
- std::shared_ptr<ScalarFunction> b;
- public:
- // 思考这个构造函数的写法(或让LLM进行解释)
- Add(std::shared_ptr<ScalarFunction> a, std::shared_ptr<ScalarFunction> b): a(a), b(b) {
- this->data = a->data + b->data;
- this->degree = 2;
- }
- float forward() {
-
- return a->data + b->data;;
- }
- std::vector<float> backward(float d_input) {
-
- return {1.0f * d_input, 1.0f * d_input};
- }
- }; // class Add
-
- class Log: public ScalarFunction {
- public:
- std::shared_ptr<ScalarFunction> a;
- public:
- Log(std::shared_ptr<ScalarFunction> a): a(a) {
- this->data = this->forward();
- this->degree = 1;
- }
- float forward()
- {
-
- return logf(a->data);
- }
- std::vector<float> backward(float d_input)
- {
-
- return {(1.0f * d_input / a->data)};
- }
- }; // class Log
-
- class Mul: public ScalarFunction {
- public:
- std::shared_ptr<ScalarFunction> a;
- std::shared_ptr<ScalarFunction> b;
- public:
- Mul(std::shared_ptr<ScalarFunction> a, std::shared_ptr<ScalarFunction> b) : a(a), b(b) {
- this->data = this->forward();
- this->degree = 2;
- }
- float forward() {
-
- return a->data * b->data;
- }
- std::vector<float> backward(float d_input) {
-
- float grad_a = b->data * d_input; // a的梯度 = y * 上游梯度
- float grad_b = a->data * d_input; // b的梯度 = x * 上游梯度
- return {grad_a, grad_b};
- }
- }; // class Mul
-
- class Inv: public ScalarFunction {
- public:
- std::shared_ptr<ScalarFunction> a;
- public:
- Inv(std::shared_ptr<ScalarFunction> a): a(a) {
- this->data = this->forward();
- this->degree = 1;
- }
- float forward() {
- return 1.0f / a->data;
- }
- std::vector<float> backward(float d_input) {
- float x_squared = a->data * a->data; // x的平方
- return { -d_input / x_squared };
- }
- }; // class Inv
-
- class Sigmoid: public ScalarFunction {
- public:
- std::shared_ptr<ScalarFunction> a;
- public:
- Sigmoid(std::shared_ptr<ScalarFunction> a): a(a) {
- this->data = this->forward();
- this->degree = 1;
- }
- float forward() {
- if (this->a->data >= 0.0) {
- return 1.0 / (1.0 + expf(-this->a->data));
- }
- else {
- return expf(this->a->data) / (1.0 + expf(this->a->data));
- }
- }
- std::vector<float> backward(float d_input) {
- float sigmoid_val = this->data; // 直接使用前向计算好的Sigmoid值
- float grad = sigmoid_val * (1.0f - sigmoid_val) * d_input;
- return {grad};
- }
- }; // class Sigmoid
-
- // for testing
- bool test_central_difference() {
- std::vector<float> x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
- auto func = [](const std::vector<float>& x) -> float {
- return x[0] + x[1] + x[2] + x[3] + x[4];
- };
- auto grad = central_difference(x, func, 2);
- if (abs(grad-1.0f) > 0.05) {
- return false;
- }
- return true;
- }
-
- bool test_addscalar() {
- auto a = std::make_shared<ConstantScalar>(1.0f);
- auto b = std::make_shared<ConstantScalar>(2.0f);
- auto c = std::make_shared<Add>(a, b);
- if (c->data != 3.0f) {
- return false;
- }
- auto res = c->backward(2.0f);
- auto a_grad = res[0];
- auto b_grad = res[1];
- if (a_grad != 2.0f || b_grad != 2.0f) {
- return false;
- }
- return true;
- }
-
- bool test_mulscalar() {
- auto a = std::make_shared<ConstantScalar>(2.0f);
- auto b = std::make_shared<ConstantScalar>(3.0f);
- auto c = std::make_shared<Mul>(a, b);
- if (c->data != 6.0f) {
- return false;
- }
- auto res = c->backward(2.0f);
- auto a_grad = res[0];
- auto b_grad = res[1];
- if (a_grad != 6.0f || b_grad != 4.0f) {
- return false;
- }
- return true;
- }
-
- bool test_logscalar() {
- auto a = std::make_shared<ConstantScalar>(2.0f);
- auto b = std::make_shared<Log>(a);
- if (abs(b->data - logf(2.0f)) > 1e-4) {
- return false;
- }
- auto res = b->backward(2.0f);
- auto a_grad = res[0];
- if (abs(a_grad - 1.0f) > 1e-4) {
- return false;
- }
- return true;
- }
-
- bool test_invscalar() {
- auto a = std::make_shared<ConstantScalar>(2.0f);
- auto b = std::make_shared<Inv>(a);
- if (abs(b->data - 0.5f) > 1e-4) {
- return false;
- }
- auto res = b->backward(2.0f);
- auto a_grad = res[0];
- if (abs(a_grad + 0.5f) > 1e-4) {
- return false;
- }
- return true;
- }
-
- bool test_sigmoidscalar() {
- auto a = std::make_shared<ConstantScalar>(2.0f);
- auto b = std::make_shared<Sigmoid>(a);
- // TODO:麻烦自己写下测试用例,谢谢
- // 禁止直接return true,世界上最聪明的智能人工将会逐一检查这段代码
- float expected_data = 1.0f / (1.0f + expf(-2.0f));
- if (abs(b->data - expected_data) > 1e-4) {
- return false;
- }
-
- // 反向传播测试:手动传入上游梯度2.0f
- auto res = b->backward(2.0f);
- auto a_grad = res[0];
-
- // 计算理论梯度:dσ/dx = σ(x)·(1-σ(x)),再乘以2.0f
- float sigmoid_val = expected_data;
- float expected_grad = sigmoid_val * (1.0f - sigmoid_val) * 2.0f;
-
- if (abs(a_grad - expected_grad) > 1e-4) {
- return false;
- }
-
- return true;
- }
-
- }
|