Browse Source

1

pull/2/head
“hechengen24” 11 months ago
parent
commit
0764fb3071
1 changed files with 102 additions and 7 deletions
  1. +102
    -7
      cc/operators/nn.h

+ 102
- 7
cc/operators/nn.h View File

@@ -136,15 +136,46 @@ public:
auto bias = this->objects[1]; auto bias = this->objects[1];
auto outNode = std::make_shared<tensor::Tensor>(features->data->shape); auto outNode = std::make_shared<tensor::Tensor>(features->data->shape);
// for循环写加法总会写吧🤔 // for循环写加法总会写吧🤔
size_t batch_size = features->data->shape[0];
size_t num_features = features->data->shape[1];

// 使用嵌套循环将 features 的每个元素与 bias 的对应元素相加
for (size_t i = 0; i < batch_size; ++i) {
for (size_t j = 0; j < num_features; ++j) {
// 计算当前元素在一维向量中的索引
size_t index = i * num_features + j;
outNode->data[index] = features->data->data[index] + bias->data->data[j];
}
}
// 补全这里的代码 // 补全这里的代码
return outNode; return outNode;
} }
std::vector<std::shared_ptr<tensor::Tensor>> backward(std::shared_ptr<tensor::Tensor> gradient) override { std::vector<std::shared_ptr<tensor::Tensor>> backward(std::shared_ptr<tensor::Tensor> gradient) override {
// assertion needed
auto g_bias = std::make_shared<tensor::Tensor>(this->objects[1]->data->shape);
// 补全这里的代码
return {gradient, g_bias};
// 获取 features 和 bias 的信息
auto features = this->objects[0];
auto bias = this->objects[1];
// 获取 batch_size 和 num_features
size_t batch_size = features->data->shape[0];
size_t num_features = features->data->shape[1];
// 计算 grad_features,直接复制 gradient
auto grad_features = std::make_shared<tensor::Tensor>(features->data->shape);
grad_features->data = gradient->data;
// 计算 grad_bias,将 gradient 每一列元素相加
auto grad_bias = std::make_shared<tensor::Tensor>(bias->data->shape);
for (size_t j = 0; j < num_features; ++j) {
float column_sum = 0.0f;
for (size_t i = 0; i < batch_size; ++i) {
// 计算当前元素在一维向量中的索引
size_t index = i * num_features + j;
column_sum += gradient->data[index];
}
grad_bias->data[j] = column_sum;
}
return {grad_features, grad_bias};
} }
std::vector<float> get_data() { std::vector<float> get_data() {
return this->data->data; return this->data->data;
@@ -194,17 +225,21 @@ class ReLU: public FunctionNode {
public: public:
ReLU(std::shared_ptr<Node> a) : FunctionNode(a) { ReLU(std::shared_ptr<Node> a) : FunctionNode(a) {
// 补全这里 // 补全这里
this->data = this->forward();
} }
std::shared_ptr<tensor::Tensor> forward() override { std::shared_ptr<tensor::Tensor> forward() override {
// x: a Node with shape (batch_size x num_features) // x: a Node with shape (batch_size x num_features)
auto outNode = std::make_shared<tensor::Tensor>(this->objects[0]->data->shape); auto outNode = std::make_shared<tensor::Tensor>(this->objects[0]->data->shape);
// 补全这里,调用arith::vector_scalar_max // 补全这里,调用arith::vector_scalar_max
arith::vector_scalar_max(this->objects[0]->data->data, outNode->data, this->objects[0]->data->size, 0);
return outNode; return outNode;
} }
std::vector<std::shared_ptr<tensor::Tensor>> backward(std::shared_ptr<tensor::Tensor> gradient) override { std::vector<std::shared_ptr<tensor::Tensor>> backward(std::shared_ptr<tensor::Tensor> gradient) override {
auto grads = std::make_shared<tensor::Tensor>(this->objects[0]->data->shape); auto grads = std::make_shared<tensor::Tensor>(this->objects[0]->data->shape);
// 补全这里,一个for循环 // 补全这里,一个for循环
for (size_t i = 0; i < this->objects[0]->data->size; ++i) {
grads->data[i] = (this->objects[0]->data->data[i] > 0) ? gradient->data[i] : 0;
}
return {grads}; return {grads};
} }
}; // class ReLU }; // class ReLU
@@ -220,14 +255,29 @@ class SquareLoss: public Loss {
public: public:
SquareLoss(std::shared_ptr<Node> a, std::shared_ptr<Node> b): Loss(a, b) { SquareLoss(std::shared_ptr<Node> a, std::shared_ptr<Node> b): Loss(a, b) {
// 补全这里的代码 // 补全这里的代码
this->data = this->forward();
} }
std::shared_ptr<tensor::Tensor> forward() { std::shared_ptr<tensor::Tensor> forward() {
// a: a Node with shape (batch_size x dim) // a: a Node with shape (batch_size x dim)
// b: a Node with shape (batch_size x dim) // b: a Node with shape (batch_size x dim)
// 这个简单,就是要注意返回的res需要是一个tensor就行 // 这个简单,就是要注意返回的res需要是一个tensor就行
// 修改下面的代码 // 修改下面的代码
auto a = this->objects[0];
auto b = this->objects[1];
float loss = 0.0f;

// 遍历所有元素,计算均方误差损失
for (size_t i = 0; i < a->data->size; ++i) {
float diff = a->data->data[i] - b->data->data[i];
loss += diff * diff;
}

// 除以 2 得到最终损失
loss /= 2.0f;

std::vector<size_t> res_shape = {1}; std::vector<size_t> res_shape = {1};
auto res = std::make_shared<tensor::Tensor>(res_shape); auto res = std::make_shared<tensor::Tensor>(res_shape);
res->data[0] = loss;
return res; return res;
} }
std::vector<std::shared_ptr<tensor::Tensor>> backward(std::shared_ptr<tensor::Tensor> gradient) override { std::vector<std::shared_ptr<tensor::Tensor>> backward(std::shared_ptr<tensor::Tensor> gradient) override {
@@ -237,6 +287,16 @@ public:
auto grad_a = std::make_shared<tensor::Tensor>(a->data->shape); auto grad_a = std::make_shared<tensor::Tensor>(a->data->shape);
auto grad_b = std::make_shared<tensor::Tensor>(b->data->shape); auto grad_b = std::make_shared<tensor::Tensor>(b->data->shape);
// 补全下面的代码 // 补全下面的代码
// 计算元素数量
size_t size = a->data->size;
// 遍历所有元素,计算梯度
for (size_t i = 0; i < size; ++i) {
float diff = a->data->data[i] - b->data->data[i];
// 计算 grad_a 的第 i 个元素的梯度
grad_a->data[i] = g * diff / size;
// 计算 grad_b 的第 i 个元素的梯度
grad_b->data[i] = -g * diff / size;
}
return {grad_a, grad_b}; return {grad_a, grad_b};
} }
}; // class SquareLoss }; // class SquareLoss
@@ -253,6 +313,22 @@ public:
// 我们已经帮你写好log_softmax // 我们已经帮你写好log_softmax
auto log_probs = log_softmax(this->objects[0]->data); auto log_probs = log_softmax(this->objects[0]->data);
// 补全下面的代码,计算softmax loss // 补全下面的代码,计算softmax loss
// 获取真实标签
auto labels = this->objects[1]->data;
// 初始化损失值
float loss = 0.0f;
// 遍历每个样本
for (size_t i = 0; i < log_probs->shape[0]; ++i) {
// 遍历每个类别
for (size_t j = 0; j < log_probs->shape[1]; ++j) {
// 计算当前样本当前类别的索引
size_t index = i * log_probs->shape[1] + j;
// 累加损失值
loss += -labels->data[index] * log_probs->data[index];
}
}
// 计算平均损失
loss /= log_probs->shape[0];
std::vector<size_t> res_shape = {1}; std::vector<size_t> res_shape = {1};
auto res = std::make_shared<tensor::Tensor>(res_shape); auto res = std::make_shared<tensor::Tensor>(res_shape);
return res; return res;
@@ -264,7 +340,26 @@ public:
auto num_classes = log_probs->shape[1]; auto num_classes = log_probs->shape[1];
auto grad_logits = std::make_shared<tensor::Tensor>(log_probs->shape); auto grad_logits = std::make_shared<tensor::Tensor>(log_probs->shape);
auto grad_labels = std::make_shared<tensor::Tensor>(labels->shape); auto grad_labels = std::make_shared<tensor::Tensor>(labels->shape);
// 补全下面的代码

// 计算 softmax 概率,因为 log_probs 是 log_softmax 的结果,所以需要 exp 还原
std::shared_ptr<tensor::Tensor> probs = std::make_shared<tensor::Tensor>(log_probs->shape);
for (size_t i = 0; i < log_probs->size; ++i) {
probs->data[i] = std::exp(log_probs->data[i]);
}

float g = gradient->data[0];
// 计算 grad_logits
for (size_t i = 0; i < batch_size; ++i) {
for (size_t j = 0; j < num_classes; ++j) {
size_t index = i * num_classes + j;
// 计算梯度,公式为 softmax(logits)_i - y_true_i
grad_logits->data[index] = g * (probs->data[index] - labels->data[index]) / batch_size;
}
}

// grad_labels 通常不需要计算梯度,设为 0
std::fill(grad_labels->data.begin(), grad_labels->data.end(), 0);

return {grad_logits, grad_labels}; return {grad_logits, grad_labels};
} }
}; // class SoftmaxLoss }; // class SoftmaxLoss


Loading…
Cancel
Save