Merge pull request !6204 from yonibaehr/exporttags/v1.0.0
| @@ -30,14 +30,14 @@ namespace session { | |||
| class TrainSession : public lite::LiteSession { | |||
| public: | |||
| TrainSession(); | |||
| ~TrainSession() = default; | |||
| ~TrainSession(); | |||
| int RunGraph(const session::KernelCallBack &before = nullptr, | |||
| const session::KernelCallBack &after = nullptr) override; | |||
| int CompileGraph(lite::Model *model) override; | |||
| virtual void ReplaceOps(); | |||
| virtual void *ExportToBuf(void *buf, size_t *len) const; | |||
| virtual void* ExportToBuf(lite::Model *model, void* buf, size_t* len) const; | |||
| // todo: output tensors by tensor name | |||
| std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> GetOutputMap() const; | |||
| @@ -31,9 +31,9 @@ int DoArithmeticSelfGrad(const TensorPtrVector &in_tensors, const TensorPtrVecto | |||
| mindspore::lite::Allocator *allocator) { | |||
| size_t data_size = in_tensors[0]->ElementsNum(); | |||
| OpParameter *param = node->primitive_; | |||
| float *dy_data = (float *)in_tensors[0]->data_; | |||
| float *x_data = (float *)in_tensors[1]->data_; | |||
| float *dx_data = (float *)(float *)out_tensors[0]->data_; | |||
| float *dy_data = reinterpret_cast<float *>(in_tensors[0]->data_); | |||
| float *x_data = reinterpret_cast<float *>(in_tensors[1]->data_); | |||
| float *dx_data = reinterpret_cast<float *>(out_tensors[0]->data_); | |||
| int ret; | |||
| if (param->type_ == KernelType::LogGrad) { | |||
| ret = ElementDiv(dy_data, x_data, dx_data, data_size); | |||
| @@ -28,7 +28,7 @@ void sumSpatialBatch(const float *in, int size, int ch, float *out) { | |||
| } | |||
| static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) { | |||
| float N = (float)size; | |||
| float N = (float)(size); | |||
| sumSpatialBatch(in, N, ch, mean); | |||
| for (int f = 0; f < ch; ++f) { | |||
| mean[f] /= N; | |||
| @@ -31,63 +31,29 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter | |||
| int output_h = pooling_param->output_h_; | |||
| int output_batch = pooling_param->output_batch_; | |||
| const float *inPtr = NULL; | |||
| // for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0; | |||
| for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0; | |||
| float kk = (float)(win_h * win_w); | |||
| for (uint16_t ib = 0; ib < output_batch; ib++) { | |||
| float *out; | |||
| // out = &output_ptr[(ib * output_h * output_w)]; | |||
| out = &output_ptr[(ib * in_h * in_w * channel)]; | |||
| // inPtr = (float *)(&input_ptr[(ib * in_h * in_w)]); | |||
| inPtr = (float *)(&input_ptr[(ib * output_h * output_w * channel)]); | |||
| if (1) { // in->layout() == Tensor::nhwc) | |||
| // iterate over yt | |||
| for (uint16_t yh = 0; yh < output_h; yh++) { | |||
| for (uint16_t yw = 0; yw < output_w; yw++) { | |||
| for (uint16_t ic = 0; ic < channel; ic++) { | |||
| int idx = (yw + yh * output_w) * channel + ic; // (ic*in_h*in_w) + (in_w*yh) + yw; | |||
| float delta = inPtr[idx] / kk; | |||
| for (int32_t kh = 0; kh < win_h; kh++) { | |||
| int xh = yh * stride_h + kh - pad_h; | |||
| if ((xh < 0) || (xh >= in_h)) { | |||
| continue; | |||
| } | |||
| for (int32_t kw = 0; kw < win_w; kw++) { | |||
| int xw = yw * stride_w + kw - pad_w; | |||
| if ((xw < 0) || (xw >= in_w)) { | |||
| continue; | |||
| } | |||
| // out[(xw + output_w * xh) * channel + ic] += delta; | |||
| out[(xw + in_w * xh) * channel + ic] += delta; | |||
| } | |||
| float *out = &output_ptr[(ib * in_h * in_w * channel)]; | |||
| const float *inPtr = &input_ptr[(ib * output_h * output_w * channel)]; | |||
| // iterate over yt | |||
| for (uint16_t yh = 0; yh < output_h; yh++) { | |||
| for (uint16_t yw = 0; yw < output_w; yw++) { | |||
| for (uint16_t ic = 0; ic < channel; ic++) { | |||
| int idx = (yw + yh * output_w) * channel + ic; // (ic*in_h*in_w) + (in_w*yh) + yw; | |||
| float delta = inPtr[idx] / kk; | |||
| for (int32_t kh = 0; kh < win_h; kh++) { | |||
| int xh = yh * stride_h + kh - pad_h; | |||
| if ((xh < 0) || (xh >= in_h)) { | |||
| continue; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } else { // nchw | |||
| for (uint16_t ic = 0; ic < channel; ic++) { | |||
| // iterate over yt | |||
| for (uint16_t yh = 0; yh < output_h; yh++) { | |||
| for (uint16_t yw = 0; yw < output_w; yw++) { | |||
| int idx = (ic * output_h * output_w) + (output_w * yh) + yw; | |||
| float delta = inPtr[idx] / kk; | |||
| for (int32_t kh = 0; kh < win_h; kh++) { | |||
| int xh = yh * stride_h + kh - pad_h; | |||
| if ((xh < 0) || (xh >= in_h)) { | |||
| for (int32_t kw = 0; kw < win_w; kw++) { | |||
| int xw = yw * stride_w + kw - pad_w; | |||
| if ((xw < 0) || (xw >= in_w)) { | |||
| continue; | |||
| } | |||
| for (int32_t kw = 0; kw < win_w; kw++) { | |||
| int xw = yw * stride_w + kw - pad_w; | |||
| if ((xw < 0) || (xw >= in_w)) { | |||
| continue; | |||
| } | |||
| // out[(ic * output_h * output_w) + (xh * output_w) + xw] += delta; | |||
| out[(ic * in_h * in_w) + (xh * in_w) + xw] += delta; | |||
| } | |||
| out[(xw + in_w * xh) * channel + ic] += delta; | |||
| } | |||
| } | |||
| } | |||
| @@ -111,73 +77,39 @@ void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy | |||
| int output_h = pooling_param->output_h_; | |||
| int output_batch = pooling_param->output_batch_; | |||
| const float *inPtr; | |||
| const float *dyPtr; | |||
| for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0; | |||
| for (uint16_t ib = 0; ib < output_batch; ib++) { | |||
| float *out; | |||
| out = &output_ptr[(ib * in_h * in_w * channel)]; | |||
| inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]); | |||
| dyPtr = (const float *)(&dy_ptr[(ib * output_h * output_w * channel)]); | |||
| float *out = &output_ptr[(ib * in_h * in_w * channel)]; | |||
| const float *inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]); | |||
| const float *dyPtr = (const float *)(&dy_ptr[(ib * output_h * output_w * channel)]); | |||
| if (1) { // nhwc | |||
| for (uint16_t yh = 0; yh < output_h; yh++) { | |||
| for (uint16_t yw = 0; yw < output_w; yw++) { | |||
| for (uint16_t ic = 0; ic < channel; ic++) { | |||
| int idx = (yw + yh * output_w) * channel + ic; | |||
| for (uint16_t yh = 0; yh < output_h; yh++) { | |||
| for (uint16_t yw = 0; yw < output_w; yw++) { | |||
| for (uint16_t ic = 0; ic < channel; ic++) { | |||
| int idx = (yw + yh * output_w) * channel + ic; | |||
| float delta = dyPtr[idx]; | |||
| float max_val = -FLT_MAX; | |||
| int max_idx = 0; | |||
| for (int32_t kh = 0; kh < win_h; kh++) { | |||
| int xh = yh * stride_h + kh - pad_h; | |||
| if ((xh < 0) || (xh >= in_h)) { | |||
| continue; | |||
| } | |||
| for (int32_t kw = 0; kw < win_w; kw++) { | |||
| int xw = yw * stride_w + kw - pad_w; | |||
| if ((xw < 0) || (xw >= in_w)) { | |||
| continue; | |||
| } | |||
| if (inPtr[(xw + in_w * xh) * channel + ic] > max_val) { | |||
| max_val = inPtr[(xw + in_w * xh) * channel + ic]; | |||
| max_idx = (xw + in_w * xh) * channel + ic; | |||
| } | |||
| } | |||
| float delta = dyPtr[idx]; | |||
| float max_val = -FLT_MAX; | |||
| int max_idx = 0; | |||
| for (int32_t kh = 0; kh < win_h; kh++) { | |||
| int xh = yh * stride_h + kh - pad_h; | |||
| if ((xh < 0) || (xh >= in_h)) { | |||
| continue; | |||
| } | |||
| out[max_idx] += delta; | |||
| } | |||
| } | |||
| } | |||
| } else { // nchw | |||
| for (uint16_t yh = 0; yh < output_h; yh++) { | |||
| for (uint16_t yw = 0; yw < output_w; yw++) { | |||
| for (uint16_t ic = 0; ic < channel; ic++) { | |||
| int idx = (ic * output_h * output_w) + (output_w * yh) + yw; | |||
| float delta = dyPtr[idx]; | |||
| float max_val = -FLT_MAX; | |||
| int max_idx = 0; | |||
| for (int32_t kh = 0; kh < win_h; kh++) { | |||
| int xh = yh * stride_h + kh - pad_h; | |||
| if ((xh < 0) || (xh >= in_h)) { | |||
| for (int32_t kw = 0; kw < win_w; kw++) { | |||
| int xw = yw * stride_w + kw - pad_w; | |||
| if ((xw < 0) || (xw >= in_w)) { | |||
| continue; | |||
| } | |||
| for (int32_t kw = 0; kw < win_w; kw++) { | |||
| int xw = yw * stride_w + kw - pad_w; | |||
| if ((xw < 0) || (xw >= in_w)) { | |||
| continue; | |||
| } | |||
| if (inPtr[(ic * in_h * in_w) + (xh * in_w) + xw] > max_val) { | |||
| max_val = inPtr[(ic * in_h * in_w) + (xh * in_w) + xw]; | |||
| max_idx = (ic * in_h * in_w) + (xh * in_w) + xw; | |||
| } | |||
| if (inPtr[(xw + in_w * xh) * channel + ic] > max_val) { | |||
| max_val = inPtr[(xw + in_w * xh) * channel + ic]; | |||
| max_idx = (xw + in_w * xh) * channel + ic; | |||
| } | |||
| } | |||
| out[max_idx] += delta; | |||
| } | |||
| out[max_idx] += delta; | |||
| } | |||
| } | |||
| } | |||
| @@ -42,11 +42,9 @@ int CompareRelativeOutput(float *output_data, std::string file_path) { | |||
| size_t output_size; | |||
| auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); | |||
| size_t output_num = output_size / sizeof(float); | |||
| // std::cout << "output num : " << output_num << "\n"; | |||
| int error = CompareOutputRelativeData(output_data, ground_truth, output_num); | |||
| delete [] ground_truth; | |||
| if (error > 1e-4) { | |||
| std::cout << "has accuracy error!\n" << error << "\n"; | |||
| return 1; | |||
| } | |||
| return 0; | |||
| @@ -56,7 +54,6 @@ float RelativeOutputError(float *output_data, std::string file_path) { | |||
| size_t output_size; | |||
| auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); | |||
| size_t output_num = output_size / sizeof(float); | |||
| std::cout << "output num : " << output_num << "\n"; | |||
| float error = CompareOutputRelativeData(output_data, ground_truth, output_num); | |||
| delete [] ground_truth; | |||
| return error; | |||
| @@ -51,8 +51,6 @@ int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_ | |||
| MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name(); | |||
| } | |||
| } | |||
| // JBDEBUG | |||
| // std::cout << "executing kernel " << kernel->name() << "\n"; | |||
| auto ret = kernel->Run(); | |||
| if (0 != ret) { | |||
| MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); | |||
| @@ -68,19 +68,12 @@ int ApplyMomentumCPUKernel::Init() { | |||
| for (size_t i = 0; i < elem_num; i++) accumulate[i] = 0.0; | |||
| workspace = new float[elem_num]; | |||
| return 0; | |||
| } | |||
| #if 0 | |||
| OpParameter *PopulateApplyMomentumParameter(const lite::Primitive *primitive) { | |||
| OpParameter *param = new (std::nothrow) OpParameter(); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "new Param for OptMomentum failed."; | |||
| return nullptr; | |||
| if (workspace == nullptr) { | |||
| MS_LOG(ERROR) << "apply momentum workspace fail to malloc!"; | |||
| return RET_ERROR; | |||
| } | |||
| param->type_ = primitive->Type(); | |||
| return param; | |||
| return 0; | |||
| } | |||
| #endif | |||
| kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| @@ -27,8 +27,11 @@ class ApplyMomentumCPUKernel : public LiteKernel { | |||
| explicit ApplyMomentumCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ApplyMomentumCPUKernel() override { delete[] workspace; } | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {} | |||
| ~ApplyMomentumCPUKernel() override { | |||
| if (workspace) | |||
| delete[] workspace; | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -38,8 +41,6 @@ class ApplyMomentumCPUKernel : public LiteKernel { | |||
| float *workspace; | |||
| }; | |||
| // OpParameter *PopulateApplyMomentumParameter(const lite::Primitive *primitive); | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_APPLY_MOMENTUM_H_ | |||
| @@ -58,7 +58,6 @@ int ArithmeticGradCPUKernel::Init() { | |||
| tile_data1 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()]; | |||
| if (tile_data1 == nullptr) { | |||
| MS_LOG(ERROR) << "new data1 fail!"; | |||
| delete tile_data0; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -66,8 +65,6 @@ int ArithmeticGradCPUKernel::Init() { | |||
| tile_data2 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()]; | |||
| if (tile_data2 == nullptr) { | |||
| MS_LOG(ERROR) << "new data2 fail!"; | |||
| delete tile_data0; | |||
| delete tile_data1; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| @@ -29,30 +29,8 @@ using mindspore::lite::RET_OK; | |||
| // using mindspore::lite::REG_OP; | |||
| using mindspore::schema::PrimitiveType_BNGrad; | |||
| /* | |||
| {dy} | |||
| {x } | |||
| {scale } | |||
| {save_mean } | |||
| {save_inv_variance } | |||
| */ | |||
| namespace mindspore::kernel { | |||
| #if 0 | |||
| OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive) { | |||
| BNGradParameter *param = new (std::nothrow) BNGradParameter(); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "new Param for conv grad filter failed."; | |||
| return nullptr; | |||
| } | |||
| param->op_parameter_.type_ = primitive->Type(); | |||
| auto bngrad_primitive = primitive->Value()->value_as_BNGrad(); | |||
| param->epsilon_ = bngrad_primitive->eps(); | |||
| param->momentum_ = bngrad_primitive->momentum(); | |||
| return reinterpret_cast<OpParameter *>(param); | |||
| } | |||
| #endif | |||
| namespace mindspore::kernel { | |||
| int BNGradCPUKernel::Init() { | |||
| auto *input_x = in_tensors_.at(1); | |||
| int channels = input_x->shape().at(kNHWC_C); | |||
| @@ -68,7 +46,6 @@ int BNGradCPUKernel::Init() { | |||
| int BNGradCPUKernel::ReSize() { return RET_OK; } | |||
| int BNGradCPUKernel::Run() { | |||
| // std::cout << "run succ" << std::endl; | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| @@ -28,8 +28,12 @@ class BNGradCPUKernel : public LiteKernel { | |||
| explicit BNGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~BNGradCPUKernel() override { delete [] workspace; } | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr), | |||
| workspace_size(0) {} | |||
| ~BNGradCPUKernel() override { | |||
| if (workspace) | |||
| delete [] workspace; | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -47,6 +47,10 @@ int ConvolutionTrainCPUKernel::Init() { | |||
| conv_param_->input_channel_ / conv_param_->group_; | |||
| workspace = new (std::nothrow) float[ws_size]; | |||
| if (workspace == nullptr) { | |||
| MS_LOG(ERROR) << "new workspace fail!"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -95,8 +99,6 @@ int ConvolutionTrainCPUKernel::Run() { | |||
| gemm(0, 1, m, n, k, 1, mat_a, k, mat_b, k, 1, mat_c, out_ch); | |||
| } | |||
| } | |||
| // std::cout << "run succ" << std::endl; | |||
| return RET_OK; | |||
| } | |||
| @@ -27,8 +27,11 @@ class ConvolutionTrainCPUKernel : public LiteKernel { | |||
| explicit ConvolutionTrainCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionTrainCPUKernel() override { delete[] workspace; } | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {} | |||
| ~ConvolutionTrainCPUKernel() override { | |||
| if (workspace) | |||
| delete[] workspace; | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -37,10 +37,7 @@ int ConvolutionGradFilterCPUKernel::Init() { | |||
| MS_ASSERT(x_tensor != nullptr); | |||
| auto *dy_tensor = in_tensors_.at(0); | |||
| MS_ASSERT(dy_tensor != nullptr); | |||
| #if 0 | |||
| auto *weight_tensor = out_tensors_.at(0); | |||
| MS_ASSERT(weight_tensor != nullptr); | |||
| #endif | |||
| auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_); | |||
| conv_param->output_batch_ = dy_tensor->shape().at(kNHWC_N); | |||
| conv_param->input_batch_ = x_tensor->shape().at(kNHWC_N); | |||
| @@ -49,7 +46,7 @@ int ConvolutionGradFilterCPUKernel::Init() { | |||
| // assume OutCh|kh|kw|InCh | |||
| conv_param->input_channel_ = x_tensor->shape().at(kNHWC_C); | |||
| conv_param->output_channel_ = dy_tensor->shape().at(kNHWC_C); | |||
| // TBD | |||
| conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; | |||
| conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; | |||
| @@ -113,52 +110,9 @@ int ConvolutionGradFilterCPUKernel::Run() { | |||
| gemm(1, 1, k, n, m, 1, mat_a, out_ch, mat_b, m, 1, mat_c, n); | |||
| } | |||
| } | |||
| // std::cout << "run succ" << std::endl; | |||
| return RET_OK; | |||
| } | |||
| #if 0 | |||
| OpParameter *PopulateConvolutionGradFilterParameter(const lite::Primitive *primitive) { | |||
| ConvParameter *param = new (std::nothrow) ConvParameter(); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "new Param for conv grad filter failed."; | |||
| return nullptr; | |||
| } | |||
| param->op_parameter_.type_ = primitive->Type(); | |||
| auto convg_primitive = primitive->Value()->value_as_Conv2DGradFilter(); | |||
| param->kernel_h_ = convg_primitive->kernelH(); | |||
| param->kernel_w_ = convg_primitive->kernelW(); | |||
| param->stride_h_ = convg_primitive->strideH(); | |||
| param->stride_w_ = convg_primitive->strideW(); | |||
| param->dilation_h_ = convg_primitive->dilateH(); | |||
| param->dilation_w_ = convg_primitive->dilateW(); | |||
| param->pad_h_ = convg_primitive->padUp(); | |||
| param->pad_w_ = convg_primitive->padLeft(); | |||
| param->pad_u_ = convg_primitive->padUp(); | |||
| param->pad_d_ = convg_primitive->padDown(); | |||
| param->pad_l_ = convg_primitive->padLeft(); | |||
| param->pad_r_ = convg_primitive->padRight(); | |||
| param->group_ = convg_primitive->group(); | |||
| auto act_type = convg_primitive->activationType(); | |||
| switch (act_type) { | |||
| case schema::ActivationType_RELU: | |||
| param->is_relu_ = true; | |||
| param->is_relu6_ = false; | |||
| break; | |||
| case schema::ActivationType_RELU6: | |||
| param->is_relu_ = false; | |||
| param->is_relu6_ = true; | |||
| break; | |||
| default: | |||
| param->is_relu_ = false; | |||
| param->is_relu6_ = false; | |||
| break; | |||
| } | |||
| return reinterpret_cast<OpParameter *>(param); | |||
| } | |||
| #endif | |||
| kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| @@ -27,8 +27,11 @@ class ConvolutionGradFilterCPUKernel : public LiteKernel { | |||
| explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionGradFilterCPUKernel() override { delete[] workspace; } | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {} | |||
| ~ConvolutionGradFilterCPUKernel() override { | |||
| if (workspace) | |||
| delete[] workspace; | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -47,7 +47,6 @@ int ConvolutionGradInputCPUKernel::Init() { | |||
| conv_param->input_channel_ = dx_tensor->shape()[(kNHWC_C)]; | |||
| conv_param->output_channel_ = weight_tensor->shape()[(kNHWC_N)]; | |||
| // TBD | |||
| conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; | |||
| conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; | |||
| @@ -59,7 +58,7 @@ int ConvolutionGradInputCPUKernel::Init() { | |||
| MS_LOG(ERROR) << "new workspace fail!"; | |||
| return RET_ERROR; | |||
| } | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionGradInputCPUKernel::ReSize() { return 0; } | |||
| @@ -108,53 +107,8 @@ int ConvolutionGradInputCPUKernel::Run() { | |||
| col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), conv_param); | |||
| } | |||
| } | |||
| // std::cout << "run succ" << std::endl; | |||
| return 0; | |||
| } | |||
| #if 0 | |||
| OpParameter *PopulateConvolutionGradInputParameter(const lite::Primitive *primitive) { | |||
| ConvParameter *param = new (std::nothrow) ConvParameter(); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "new Param for conv grad input failed."; | |||
| return nullptr; | |||
| } | |||
| param->op_parameter_.type_ = primitive->Type(); | |||
| auto convg_primitive = primitive->Value()->value_as_Conv2DGradInput(); | |||
| param->kernel_h_ = convg_primitive->kernelH(); | |||
| param->kernel_w_ = convg_primitive->kernelW(); | |||
| param->stride_h_ = convg_primitive->strideH(); | |||
| param->stride_w_ = convg_primitive->strideW(); | |||
| param->dilation_h_ = convg_primitive->dilateH(); | |||
| param->dilation_w_ = convg_primitive->dilateW(); | |||
| param->pad_h_ = convg_primitive->padUp(); | |||
| param->pad_w_ = convg_primitive->padLeft(); | |||
| param->pad_u_ = convg_primitive->padUp(); | |||
| param->pad_d_ = convg_primitive->padDown(); | |||
| param->pad_l_ = convg_primitive->padLeft(); | |||
| param->pad_r_ = convg_primitive->padRight(); | |||
| param->group_ = convg_primitive->group(); | |||
| auto act_type = convg_primitive->activationType(); | |||
| switch (act_type) { | |||
| case schema::ActivationType_RELU: | |||
| param->is_relu_ = true; | |||
| param->is_relu6_ = false; | |||
| break; | |||
| case schema::ActivationType_RELU6: | |||
| param->is_relu_ = false; | |||
| param->is_relu6_ = true; | |||
| break; | |||
| default: | |||
| param->is_relu_ = false; | |||
| param->is_relu6_ = false; | |||
| break; | |||
| } | |||
| return reinterpret_cast<OpParameter *>(param); | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| @@ -27,8 +27,11 @@ class ConvolutionGradInputCPUKernel : public LiteKernel { | |||
| explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionGradInputCPUKernel() override { delete[] workspace; } | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {} | |||
| ~ConvolutionGradInputCPUKernel() override { | |||
| if (workspace) | |||
| delete[] workspace; | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -20,7 +20,6 @@ | |||
| #include "nnacl/fp32/pooling.h" | |||
| #include "nnacl/fp32_grad/pooling_grad.h" | |||
| #include "include/errorcode.h" | |||
| // #include "src/train/ops/train_ops.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| @@ -51,11 +50,6 @@ int PoolingGradCPUKernel::Init() { | |||
| pool_param->input_w_ = in_shape[kNHWC_W]; | |||
| pool_param->input_batch_ = in_shape[kNHWC_N]; | |||
| pool_param->input_channel_ = in_shape[kNHWC_C]; | |||
| // Emir -- here I assume we get the outputshape in the output tensor | |||
| // auto *out_tensor = out_tensors_.front(); | |||
| // auto out_shape = in_tensors_.at(1)->shape(); | |||
| pool_param->output_h_ = out_shape[kNHWC_H]; | |||
| pool_param->output_w_ = out_shape[kNHWC_W]; | |||
| pool_param->output_batch_ = out_shape[kNHWC_N]; | |||
| @@ -55,53 +55,6 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab | |||
| } | |||
| output2[0] = total_loss / param_->batch_size_; | |||
| } | |||
| #if 0 | |||
| void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *labels, const float *losses, | |||
| float *output) const { | |||
| float total_loss = 0; | |||
| for (int i = 0; i < param_->batch_size_; ++i) { | |||
| if (labels[i] < 0) { | |||
| MS_LOG(EXCEPTION) << "label value must >= 0"; | |||
| } | |||
| size_t label = labels[i]; | |||
| if (label > param->number_of_classes_) { | |||
| MS_LOG(EXCEPTION) << "error label input!"; | |||
| } else { | |||
| total_loss -= logf(losses[i * param->number_of_classes_ + label]); | |||
| } | |||
| } | |||
| output[0] = total_loss / param->batch_size_; | |||
| } | |||
| void SoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses, float *grads, | |||
| float *output) const { | |||
| size_t row_start = 0; | |||
| float total_loss = 0; | |||
| for (int i = 0; i < param->batch_size_; ++i) { | |||
| if (labels[i] < 0) { | |||
| MS_LOG(EXCEPTION) << "label value must >= 0"; | |||
| } | |||
| size_t label = labels[i]; | |||
| if (label > param->number_of_classes_) { | |||
| MS_LOG(EXCEPTION) << "error label input!"; | |||
| } else { | |||
| total_loss -= logf(losses[i * param->number_of_classes_ + label]); | |||
| for (size_t j = 0; j < param->number_of_classes_; ++j) { | |||
| size_t index = row_start + j; | |||
| if (j == label) { | |||
| grads[index] = (losses[index] - 1) / param->batch_size_; | |||
| } else { | |||
| grads[index] = losses[index] / param->batch_size_; | |||
| } | |||
| } | |||
| } | |||
| row_start += param->number_of_classes_; | |||
| } | |||
| output[0] = total_loss / param->batch_size_; | |||
| } | |||
| #endif | |||
| int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| @@ -117,11 +70,6 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { | |||
| grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); | |||
| } | |||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | |||
| float *losses = new (std::nothrow) float[data_size]; | |||
| if (losses == nullptr) { | |||
| MS_LOG(ERROR) << "losses is null"; | |||
| return RET_ERROR; | |||
| } | |||
| MS_ASSERT(out != nullptr); | |||
| MS_ASSERT(labels != nullptr); | |||
| @@ -151,9 +99,16 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Init() { | |||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | |||
| losses_ = new (std::nothrow) float[data_size]; | |||
| if (losses_ == nullptr) { | |||
| MS_LOG(ERROR) << "failed to malloc losses!"; | |||
| return RET_ERROR; | |||
| } | |||
| sum_data_ = new (std::nothrow) float[dims[0]]; | |||
| MS_ASSERT(losses_ != nullptr); | |||
| MS_ASSERT(sum_data_ != nullptr); | |||
| if (sum_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "failed to malloc sum_data_!"; | |||
| return RET_ERROR; | |||
| } | |||
| sm_params_.n_dim_ = 2; | |||
| sm_params_.element_size_ = data_size; | |||
| @@ -33,12 +33,14 @@ class SoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel { | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LossKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| : LossKernel(parameter, inputs, outputs, ctx, primitive), losses_(nullptr), sum_data_(nullptr) { | |||
| param_ = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter); | |||
| } | |||
| ~SoftmaxCrossEntropyWithLogitsCPUKernel() override { | |||
| delete[] losses_; | |||
| delete[] sum_data_; | |||
| if (losses_) | |||
| delete[] losses_; | |||
| if (sum_data_) | |||
| delete[] sum_data_; | |||
| } | |||
| void ForwardPostExecute(const float *labels, const float *logits, | |||
| @@ -30,8 +30,6 @@ using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| int SoftmaxGradCPUKernel::Init() { | |||
| // auto input_tensor =in_tensors_.at(0); | |||
| param = reinterpret_cast<SoftmaxParameter *>(op_parameter_); | |||
| auto in_shape = in_tensors_.at(0)->shape(); | |||
| auto in_dims = in_shape.size(); | |||
| @@ -43,7 +41,6 @@ int SoftmaxGradCPUKernel::Init() { | |||
| } | |||
| param->element_size_ = ele_size; | |||
| // malloc tmp buffer | |||
| auto axis = param->axis_; | |||
| if ((axis < -1) || (axis > param->n_dim_)) { | |||
| MS_LOG(ERROR) << "SoftmaxGrad axis is invalid!"; | |||
| @@ -57,9 +54,17 @@ int SoftmaxGradCPUKernel::Init() { | |||
| } | |||
| sum_data_ = new (std::nothrow) float[inner_size]; | |||
| MS_ASSERT(sum_data_ != nullptr); | |||
| if (sum_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "failed to malloc sum_data_!"; | |||
| return RET_ERROR; | |||
| } | |||
| sum_mul_ = new (std::nothrow) float[inner_size * in_shape[axis]]; | |||
| MS_ASSERT(sum_mul_ != nullptr); | |||
| if (sum_mul_ == nullptr) { | |||
| MS_LOG(ERROR) << "failed to malloc sum_mul_!"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -28,11 +28,15 @@ class SoftmaxGradCPUKernel : public LiteKernel { | |||
| explicit SoftmaxGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), sum_data_(nullptr), sum_mul_(nullptr) { | |||
| param = reinterpret_cast<SoftmaxParameter *>(parameter); | |||
| } | |||
| ~SoftmaxGradCPUKernel() override = default; | |||
| ~SoftmaxGradCPUKernel() override { | |||
| if (sum_data_) | |||
| delete[] sum_data_; | |||
| if (sum_mul_) | |||
| delete[] sum_mul_; | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| @@ -89,12 +89,6 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() { | |||
| grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); | |||
| } | |||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | |||
| float *losses = new (std::nothrow) float[data_size]; | |||
| if (losses == nullptr) { | |||
| MS_LOG(ERROR) << "losses is null"; | |||
| return RET_ERROR; | |||
| } | |||
| MS_ASSERT(out != nullptr); | |||
| MS_ASSERT(labels != nullptr); | |||
| MS_ASSERT(ins != nullptr); | |||
| @@ -128,12 +122,18 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() { | |||
| MS_LOG(ERROR) << "softmax etropy loss in0 have no data"; | |||
| return RET_ERROR; | |||
| } | |||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | |||
| losses_ = new (std::nothrow) float[data_size]; | |||
| if (losses_ == nullptr) { | |||
| MS_LOG(ERROR) << "failed to malloc losses!"; | |||
| return RET_ERROR; | |||
| } | |||
| sum_data_ = new (std::nothrow) float[dims[0]]; | |||
| MS_ASSERT(losses_ != nullptr); | |||
| MS_ASSERT(sum_data_ != nullptr); | |||
| if (sum_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "failed to malloc sum_data_!"; | |||
| return RET_ERROR; | |||
| } | |||
| sm_params_.n_dim_ = 2; | |||
| sm_params_.element_size_ = data_size; | |||
| @@ -33,12 +33,14 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel { | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LossKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| : LossKernel(parameter, inputs, outputs, ctx, primitive) , losses_(nullptr), sum_data_(nullptr) { | |||
| param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter); | |||
| } | |||
| ~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override { | |||
| delete[] losses_; | |||
| delete[] sum_data_; | |||
| if (losses_) | |||
| delete[] losses_; | |||
| if (sum_data_) | |||
| delete[] sum_data_; | |||
| } | |||
| void ForwardPostExecute(const int *labels, const float *losses, float *output) const; | |||
| @@ -47,9 +47,19 @@ int TrainSession::CompileGraph(lite::Model *model) { | |||
| return LiteSession::CompileGraph(model); | |||
| } | |||
| void *TrainSession::ExportToBuf(void *buf, size_t *len) const { | |||
| // auto train_model_impl = (dynamic_cast<lite::train::TrainModelImpl*>(model_->model_impl())); | |||
| // return train_model_impl->ExportToBuf(buf, len); | |||
| TrainSession::~TrainSession() { | |||
| for (auto it1 = ext_output_map_.begin(); it1 != ext_output_map_.end(); ++it1) { | |||
| if ((output_node_map_.find(it1->first) == output_node_map_.end()) || train_mode_) { | |||
| // Delete if not from output_node_map_ | |||
| auto tensor_ptr = it1->second.back(); | |||
| delete tensor_ptr; | |||
| it1->second.pop_back(); | |||
| } | |||
| } | |||
| } | |||
| void *TrainSession::ExportToBuf(lite::Model *model, void *buf, size_t *len) const { | |||
| // return model->ExportBuf(buf, len); | |||
| return nullptr; | |||
| } | |||
| @@ -61,7 +71,7 @@ int TrainSession::RunGraph(const session::KernelCallBack &before, const session: | |||
| if (train_mode_) return LiteSession::RunGraph(before, after); | |||
| // object is expected to run only inference part of graph | |||
| // prepare a lit of kernels till the loss function -- temporary solution | |||
| // prepare a list of kernels till the loss function -- temporary solution | |||
| std::vector<kernel::LiteKernel *> infference_kernels; | |||
| for (auto kernel : this->kernels_) { | |||
| if (dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) break; | |||
| @@ -86,8 +96,16 @@ void TrainSession::train() { | |||
| MS_ASSERT(nullptr != kernel); | |||
| kernel->train(); | |||
| } | |||
| train_mode_ = true; | |||
| for (auto it1 = ext_output_map_.begin(); it1 != ext_output_map_.end(); ++it1) { | |||
| if ((output_node_map_.find(it1->first) == output_node_map_.end()) || train_mode_) { | |||
| // Delete if not from output_node_map_ | |||
| auto tensor_ptr = it1->second.back(); | |||
| delete tensor_ptr; | |||
| it1->second.pop_back(); | |||
| } | |||
| } | |||
| ext_output_map_.clear(); | |||
| train_mode_ = true; | |||
| for (auto kernel : this->kernels_) { | |||
| if (dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) { | |||
| auto *ms_tensor = new lite::Tensor(*kernel->out_tensors().at(0)); | |||
| @@ -101,14 +119,23 @@ void TrainSession::eval() { | |||
| MS_ASSERT(nullptr != kernel); | |||
| kernel->eval(); | |||
| } | |||
| train_mode_ = false; | |||
| kernel::LiteKernel *last_kernel = nullptr; | |||
| // We should get in_kernels and then get all last kernels | |||
| for (auto it1 = ext_output_map_.begin(); it1 != ext_output_map_.end(); ++it1) { | |||
| if ((output_node_map_.find(it1->first) == output_node_map_.end()) || train_mode_) { | |||
| // Delete if not from output_node_map_ | |||
| auto tensor_ptr = it1->second.back(); | |||
| delete tensor_ptr; | |||
| it1->second.pop_back(); | |||
| } | |||
| } | |||
| ext_output_map_ = output_node_map_; | |||
| train_mode_ = false; | |||
| for (auto kernel : this->kernels_) { | |||
| if ((dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) && (last_kernel != nullptr)) { | |||
| auto *ms_tensor = new lite::Tensor(*last_kernel->out_tensors().at(0)); | |||
| ext_output_map_[last_kernel->name()].emplace_back(ms_tensor); | |||
| if (ext_output_map_.find(last_kernel->name()) == ext_output_map_.end()) { | |||
| auto *ms_tensor = new lite::Tensor(*last_kernel->out_tensors().at(0)); | |||
| ext_output_map_[last_kernel->name()].emplace_back(ms_tensor); | |||
| } | |||
| } | |||
| last_kernel = kernel; | |||
| } | |||
| @@ -110,4 +110,18 @@ TEST_F(TestBNGradFp32, BNGradFp32) { | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "BNGradFp32 passed"; | |||
| } | |||
| #if 0 | |||
| TEST_F(TestBNGradFp32, BNTtrainFp32) { | |||
| auto bn_param = static_cast<BNGradParameter*>(malloc(sizeof(BNGradParameter))); | |||
| bn_param->epsilon_ = 0.00001; | |||
| bn_param->momentum_ = 0.1; | |||
| const int batch = 2; | |||
| const int channels = 3; | |||
| const int height = 4; | |||
| const int width = 5; | |||
| auto x_tensor = CreateInTensor("./test_data/bngrad/input_x_2_4_5_3.bin", {batch, height, width, channels}); | |||
| std::vector<lite::Tensor *> inputs = {x_tensor, x_tensor, scale_tensor, mean_tensor, var_tensor}; | |||
| } | |||
| #endif | |||
| } // namespace mindspore | |||
| @@ -73,7 +73,6 @@ class NetworkTest : public mindspore::CommonTest { | |||
| // +-------------+ | | |||
| // V dw(9) | | |||
| // +-----------Update-----+ | |||
| #if 0 | |||
| TEST_F(NetworkTest, tuning_layer) { | |||
| const int BATCH_SIZE = 32; | |||
| const int NUM_CLASSES = 10; | |||
| @@ -248,12 +247,15 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| label->nodeType = schema::NodeType::NodeType_ValueNode; | |||
| label->format = schema::Format_NHWC; | |||
| label->dataType = TypeId::kNumberTypeInt32; | |||
| label->dims = {BATCH_SIZE}; | |||
| label->dims = {BATCH_SIZE*NUM_CLASSES}; | |||
| label->offset = -1; | |||
| label->data.resize(BATCH_SIZE * NUM_CLASSES * sizeof(float)); | |||
| int *data = reinterpret_cast<int *>(label->data.data()); | |||
| for (int i = 0; i < BATCH_SIZE; i++) | |||
| for (int j = 0; j < NUM_CLASSES; j++) *(data + i * NUM_CLASSES + j) = j; | |||
| // label->data.resize(BATCH_SIZE * NUM_CLASSES * sizeof(float)); | |||
| // int *data = reinterpret_cast<int *>(label->data.data()); | |||
| // for (int i = 0; i < BATCH_SIZE; i++) { | |||
| // for (int j = 0; j < NUM_CLASSES; j++) { | |||
| // *(data + i * NUM_CLASSES + j) = j; | |||
| // } | |||
| // } | |||
| meta_graph->allTensors.emplace_back(std::move(label)); | |||
| } | |||
| // tensor 7 - Softmaxentropy | |||
| @@ -378,6 +380,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| auto ret = session->CompileGraph(model); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| session->train(); | |||
| session->train(); // Just double check that calling train twice does not cause a problem | |||
| auto inputs = session->GetInputs(); | |||
| ASSERT_EQ(inputs.size(), 2); | |||
| @@ -397,7 +400,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| delete [] buf; | |||
| auto labelTensor = inputs.at(1); | |||
| ASSERT_NE(nullptr, labelTensor); | |||
| ASSERT_EQ(BATCH_SIZE, labelTensor->ElementsNum()); | |||
| ASSERT_EQ(BATCH_SIZE*NUM_CLASSES, labelTensor->ElementsNum()); | |||
| auto labels = reinterpret_cast<int *>(labelTensor->MutableData()); | |||
| for (int i = 0; i < BATCH_SIZE; i++) labels[i] = (i * 97) % NUM_CLASSES; | |||
| @@ -411,32 +414,67 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| auto *outData = reinterpret_cast<float *>(outTensor->MutableData()); | |||
| ASSERT_NE(nullptr, outData); | |||
| std::cout << "==============Initial=Scores===================" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| for (int i = 0; i < 10; i++) { | |||
| std::cout << outData[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| session->eval(); | |||
| session->eval(); // Just double check that calling eval twice does not cause a problem | |||
| ret = session->RunGraph(); | |||
| outputs = session->GetOutputsByName("BiasAdd"); | |||
| ASSERT_EQ(outputs.size(), 1); | |||
| outTensor = (outputs.at(0)); | |||
| ASSERT_NE(nullptr, outTensor); | |||
| // ASSERT_EQ(28 * 28 * 32, outTensor->ElementsNum()); | |||
| ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); | |||
| outData = reinterpret_cast<float *>(outTensor->MutableData()); | |||
| ASSERT_NE(nullptr, outData); | |||
| std::cout << "==============Scores=after-single=train========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| for (int i = 0; i < 10; i++) { | |||
| std::cout << outData[i] << ", "; | |||
| } | |||
| std::string output_path = "./test_data/train/train_output_32_10.bin"; | |||
| auto error = lite::RelativeOutputError(outData, output_path); | |||
| EXPECT_LT(error, 2e-3); | |||
| MS_LOG(INFO) << "TuningLayer passed"; | |||
| ret = session->RunGraph(); | |||
| outputs = session->GetOutputsByName("BiasAdd"); | |||
| ASSERT_EQ(outputs.size(), 1); | |||
| outTensor = (outputs.at(0)); | |||
| ASSERT_NE(nullptr, outTensor); | |||
| ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); | |||
| outData = reinterpret_cast<float *>(outTensor->MutableData()); | |||
| ASSERT_NE(nullptr, outData); | |||
| std::cout << "==============Scores=eval-second-time==========" << std::endl; | |||
| for (int i = 0; i < 10; i++) { | |||
| std::cout << outData[i] << ", "; | |||
| } | |||
| error = lite::RelativeOutputError(outData, output_path); | |||
| EXPECT_LT(error, 2e-3); | |||
| session->train(); | |||
| session->eval(); // do some more zig-zags | |||
| ret = session->RunGraph(); | |||
| outputs = session->GetOutputsByName("BiasAdd"); | |||
| ASSERT_EQ(outputs.size(), 1); | |||
| outTensor = (outputs.at(0)); | |||
| ASSERT_NE(nullptr, outTensor); | |||
| ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); | |||
| outData = reinterpret_cast<float *>(outTensor->MutableData()); | |||
| ASSERT_NE(nullptr, outData); | |||
| std::cout << "==============Scores=Just Checking 3rd time====" << std::endl; | |||
| for (int i = 0; i < 10; i++) { | |||
| std::cout << outData[i] << ", "; | |||
| } | |||
| error = lite::RelativeOutputError(outData, output_path); | |||
| EXPECT_LT(error, 2e-3); | |||
| delete model; | |||
| delete session; | |||
| MS_LOG(INFO) << "TuningLayer passed"; | |||
| } | |||
| #endif | |||
| int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path, | |||
| std::function<int32_t(mindspore::session::TrainSession *session, const std::string &)> cb) { | |||
| int32_t res = 0; | |||
| @@ -30,7 +30,7 @@ class TestSoftmaxCrossEntropyFp32 : public mindspore::CommonTest { | |||
| TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { | |||
| // prepare stage | |||
| SoftmaxCrossEntropyParameter *sce_param = new SoftmaxCrossEntropyParameter(); | |||
| auto sce_param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(malloc(sizeof(SoftmaxCrossEntropyParameter))); | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/operators/sce_fp32_1_y_6_4.bin"; | |||
| @@ -83,9 +83,16 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { | |||
| std::string grad_path = "./test_data/operators/sce_fp32_1_dy_6_4.bin"; | |||
| lite::CompareOutput(grad, grad_path); | |||
| delete sce_param; | |||
| l_tensor.SetData(NULL); | |||
| y_tensor.SetData(NULL); | |||
| delete [] ll_labels; | |||
| delete [] labels; | |||
| delete [] input_data; | |||
| delete [] loss; | |||
| delete [] grad; | |||
| l_tensor.SetData(nullptr); | |||
| y_tensor.SetData(nullptr); | |||
| loss_tensor.SetData(nullptr); | |||
| grad_tensor.SetData(nullptr); | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "SoftmaxCrossEntropyFp32 passed"; | |||
| } | |||