Browse Source

!6204 tod add and fix tests

Merge pull request !6204 from yonibaehr/export
tags/v1.0.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
a08d452c79
28 changed files with 239 additions and 373 deletions
  1. +2
    -2
      mindspore/lite/include/train_session.h
  2. +3
    -3
      mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.cc
  3. +1
    -1
      mindspore/lite/nnacl/fp32_grad/batch_norm.c
  4. +38
    -106
      mindspore/lite/nnacl/fp32_grad/pooling_grad.c
  5. +0
    -3
      mindspore/lite/src/common/file_utils_ext.cc
  6. +0
    -2
      mindspore/lite/src/executor.cc
  7. +4
    -11
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc
  8. +5
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.h
  9. +0
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc
  10. +1
    -24
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc
  11. +6
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h
  12. +4
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc
  13. +5
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h
  14. +2
    -48
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc
  15. +5
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h
  16. +2
    -48
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc
  17. +5
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h
  18. +0
    -6
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc
  19. +9
    -54
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc
  20. +5
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h
  21. +10
    -5
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc
  22. +7
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h
  23. +9
    -9
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc
  24. +5
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h
  25. +36
    -9
      mindspore/lite/src/train/train_session.cc
  26. +14
    -0
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc
  27. +50
    -12
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc
  28. +11
    -4
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc

+ 2
- 2
mindspore/lite/include/train_session.h View File

@@ -30,14 +30,14 @@ namespace session {
class TrainSession : public lite::LiteSession { class TrainSession : public lite::LiteSession {
public: public:
TrainSession(); TrainSession();
~TrainSession() = default;
~TrainSession();


int RunGraph(const session::KernelCallBack &before = nullptr, int RunGraph(const session::KernelCallBack &before = nullptr,
const session::KernelCallBack &after = nullptr) override; const session::KernelCallBack &after = nullptr) override;


int CompileGraph(lite::Model *model) override; int CompileGraph(lite::Model *model) override;
virtual void ReplaceOps(); virtual void ReplaceOps();
virtual void *ExportToBuf(void *buf, size_t *len) const;
virtual void* ExportToBuf(lite::Model *model, void* buf, size_t* len) const;


// todo: output tensors by tensor name // todo: output tensors by tensor name
std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> GetOutputMap() const; std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> GetOutputMap() const;


+ 3
- 3
mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.cc View File

@@ -31,9 +31,9 @@ int DoArithmeticSelfGrad(const TensorPtrVector &in_tensors, const TensorPtrVecto
mindspore::lite::Allocator *allocator) { mindspore::lite::Allocator *allocator) {
size_t data_size = in_tensors[0]->ElementsNum(); size_t data_size = in_tensors[0]->ElementsNum();
OpParameter *param = node->primitive_; OpParameter *param = node->primitive_;
float *dy_data = (float *)in_tensors[0]->data_;
float *x_data = (float *)in_tensors[1]->data_;
float *dx_data = (float *)(float *)out_tensors[0]->data_;
float *dy_data = reinterpret_cast<float *>(in_tensors[0]->data_);
float *x_data = reinterpret_cast<float *>(in_tensors[1]->data_);
float *dx_data = reinterpret_cast<float *>(out_tensors[0]->data_);
int ret; int ret;
if (param->type_ == KernelType::LogGrad) { if (param->type_ == KernelType::LogGrad) {
ret = ElementDiv(dy_data, x_data, dx_data, data_size); ret = ElementDiv(dy_data, x_data, dx_data, data_size);


+ 1
- 1
mindspore/lite/nnacl/fp32_grad/batch_norm.c View File

@@ -28,7 +28,7 @@ void sumSpatialBatch(const float *in, int size, int ch, float *out) {
} }


static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) { static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) {
float N = (float)size;
float N = (float)(size);
sumSpatialBatch(in, N, ch, mean); sumSpatialBatch(in, N, ch, mean);
for (int f = 0; f < ch; ++f) { for (int f = 0; f < ch; ++f) {
mean[f] /= N; mean[f] /= N;


+ 38
- 106
mindspore/lite/nnacl/fp32_grad/pooling_grad.c View File

@@ -31,63 +31,29 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter
int output_h = pooling_param->output_h_; int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_; int output_batch = pooling_param->output_batch_;


const float *inPtr = NULL;
// for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0;
for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0; for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0;


float kk = (float)(win_h * win_w); float kk = (float)(win_h * win_w);

for (uint16_t ib = 0; ib < output_batch; ib++) { for (uint16_t ib = 0; ib < output_batch; ib++) {
float *out;
// out = &output_ptr[(ib * output_h * output_w)];
out = &output_ptr[(ib * in_h * in_w * channel)];
// inPtr = (float *)(&input_ptr[(ib * in_h * in_w)]);
inPtr = (float *)(&input_ptr[(ib * output_h * output_w * channel)]);
if (1) { // in->layout() == Tensor::nhwc)
// iterate over yt
for (uint16_t yh = 0; yh < output_h; yh++) {
for (uint16_t yw = 0; yw < output_w; yw++) {
for (uint16_t ic = 0; ic < channel; ic++) {
int idx = (yw + yh * output_w) * channel + ic; // (ic*in_h*in_w) + (in_w*yh) + yw;
float delta = inPtr[idx] / kk;
for (int32_t kh = 0; kh < win_h; kh++) {
int xh = yh * stride_h + kh - pad_h;
if ((xh < 0) || (xh >= in_h)) {
continue;
}
for (int32_t kw = 0; kw < win_w; kw++) {
int xw = yw * stride_w + kw - pad_w;
if ((xw < 0) || (xw >= in_w)) {
continue;
}

// out[(xw + output_w * xh) * channel + ic] += delta;
out[(xw + in_w * xh) * channel + ic] += delta;
}
float *out = &output_ptr[(ib * in_h * in_w * channel)];
const float *inPtr = &input_ptr[(ib * output_h * output_w * channel)];
// iterate over yt
for (uint16_t yh = 0; yh < output_h; yh++) {
for (uint16_t yw = 0; yw < output_w; yw++) {
for (uint16_t ic = 0; ic < channel; ic++) {
int idx = (yw + yh * output_w) * channel + ic; // (ic*in_h*in_w) + (in_w*yh) + yw;
float delta = inPtr[idx] / kk;
for (int32_t kh = 0; kh < win_h; kh++) {
int xh = yh * stride_h + kh - pad_h;
if ((xh < 0) || (xh >= in_h)) {
continue;
} }
}
}
}
} else { // nchw
for (uint16_t ic = 0; ic < channel; ic++) {
// iterate over yt
for (uint16_t yh = 0; yh < output_h; yh++) {
for (uint16_t yw = 0; yw < output_w; yw++) {
int idx = (ic * output_h * output_w) + (output_w * yh) + yw;
float delta = inPtr[idx] / kk;
for (int32_t kh = 0; kh < win_h; kh++) {
int xh = yh * stride_h + kh - pad_h;
if ((xh < 0) || (xh >= in_h)) {
for (int32_t kw = 0; kw < win_w; kw++) {
int xw = yw * stride_w + kw - pad_w;
if ((xw < 0) || (xw >= in_w)) {
continue; continue;
} }
for (int32_t kw = 0; kw < win_w; kw++) {
int xw = yw * stride_w + kw - pad_w;
if ((xw < 0) || (xw >= in_w)) {
continue;
}
// out[(ic * output_h * output_w) + (xh * output_w) + xw] += delta;
out[(ic * in_h * in_w) + (xh * in_w) + xw] += delta;
}
out[(xw + in_w * xh) * channel + ic] += delta;
} }
} }
} }
@@ -111,73 +77,39 @@ void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy
int output_h = pooling_param->output_h_; int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_; int output_batch = pooling_param->output_batch_;


const float *inPtr;
const float *dyPtr;

for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0; for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0;


for (uint16_t ib = 0; ib < output_batch; ib++) { for (uint16_t ib = 0; ib < output_batch; ib++) {
float *out;
out = &output_ptr[(ib * in_h * in_w * channel)];
inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]);
dyPtr = (const float *)(&dy_ptr[(ib * output_h * output_w * channel)]);
float *out = &output_ptr[(ib * in_h * in_w * channel)];
const float *inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]);
const float *dyPtr = (const float *)(&dy_ptr[(ib * output_h * output_w * channel)]);


if (1) { // nhwc
for (uint16_t yh = 0; yh < output_h; yh++) {
for (uint16_t yw = 0; yw < output_w; yw++) {
for (uint16_t ic = 0; ic < channel; ic++) {
int idx = (yw + yh * output_w) * channel + ic;
for (uint16_t yh = 0; yh < output_h; yh++) {
for (uint16_t yw = 0; yw < output_w; yw++) {
for (uint16_t ic = 0; ic < channel; ic++) {
int idx = (yw + yh * output_w) * channel + ic;


float delta = dyPtr[idx];
float max_val = -FLT_MAX;
int max_idx = 0;
for (int32_t kh = 0; kh < win_h; kh++) {
int xh = yh * stride_h + kh - pad_h;
if ((xh < 0) || (xh >= in_h)) {
continue;
}
for (int32_t kw = 0; kw < win_w; kw++) {
int xw = yw * stride_w + kw - pad_w;
if ((xw < 0) || (xw >= in_w)) {
continue;
}

if (inPtr[(xw + in_w * xh) * channel + ic] > max_val) {
max_val = inPtr[(xw + in_w * xh) * channel + ic];
max_idx = (xw + in_w * xh) * channel + ic;
}
}
float delta = dyPtr[idx];
float max_val = -FLT_MAX;
int max_idx = 0;
for (int32_t kh = 0; kh < win_h; kh++) {
int xh = yh * stride_h + kh - pad_h;
if ((xh < 0) || (xh >= in_h)) {
continue;
} }
out[max_idx] += delta;
}
}
}
} else { // nchw
for (uint16_t yh = 0; yh < output_h; yh++) {
for (uint16_t yw = 0; yw < output_w; yw++) {
for (uint16_t ic = 0; ic < channel; ic++) {
int idx = (ic * output_h * output_w) + (output_w * yh) + yw;
float delta = dyPtr[idx];
float max_val = -FLT_MAX;
int max_idx = 0;
for (int32_t kh = 0; kh < win_h; kh++) {
int xh = yh * stride_h + kh - pad_h;
if ((xh < 0) || (xh >= in_h)) {
for (int32_t kw = 0; kw < win_w; kw++) {
int xw = yw * stride_w + kw - pad_w;
if ((xw < 0) || (xw >= in_w)) {
continue; continue;
} }
for (int32_t kw = 0; kw < win_w; kw++) {
int xw = yw * stride_w + kw - pad_w;
if ((xw < 0) || (xw >= in_w)) {
continue;
}
if (inPtr[(ic * in_h * in_w) + (xh * in_w) + xw] > max_val) {
max_val = inPtr[(ic * in_h * in_w) + (xh * in_w) + xw];
max_idx = (ic * in_h * in_w) + (xh * in_w) + xw;
}

if (inPtr[(xw + in_w * xh) * channel + ic] > max_val) {
max_val = inPtr[(xw + in_w * xh) * channel + ic];
max_idx = (xw + in_w * xh) * channel + ic;
} }
} }
out[max_idx] += delta;
} }
out[max_idx] += delta;
} }
} }
} }


+ 0
- 3
mindspore/lite/src/common/file_utils_ext.cc View File

@@ -42,11 +42,9 @@ int CompareRelativeOutput(float *output_data, std::string file_path) {
size_t output_size; size_t output_size;
auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size));
size_t output_num = output_size / sizeof(float); size_t output_num = output_size / sizeof(float);
// std::cout << "output num : " << output_num << "\n";
int error = CompareOutputRelativeData(output_data, ground_truth, output_num); int error = CompareOutputRelativeData(output_data, ground_truth, output_num);
delete [] ground_truth; delete [] ground_truth;
if (error > 1e-4) { if (error > 1e-4) {
std::cout << "has accuracy error!\n" << error << "\n";
return 1; return 1;
} }
return 0; return 0;
@@ -56,7 +54,6 @@ float RelativeOutputError(float *output_data, std::string file_path) {
size_t output_size; size_t output_size;
auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size));
size_t output_num = output_size / sizeof(float); size_t output_num = output_size / sizeof(float);
std::cout << "output num : " << output_num << "\n";
float error = CompareOutputRelativeData(output_data, ground_truth, output_num); float error = CompareOutputRelativeData(output_data, ground_truth, output_num);
delete [] ground_truth; delete [] ground_truth;
return error; return error;


+ 0
- 2
mindspore/lite/src/executor.cc View File

@@ -51,8 +51,6 @@ int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_
MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name(); MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name();
} }
} }
// JBDEBUG
// std::cout << "executing kernel " << kernel->name() << "\n";
auto ret = kernel->Run(); auto ret = kernel->Run();
if (0 != ret) { if (0 != ret) {
MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();


+ 4
- 11
mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc View File

@@ -68,19 +68,12 @@ int ApplyMomentumCPUKernel::Init() {
for (size_t i = 0; i < elem_num; i++) accumulate[i] = 0.0; for (size_t i = 0; i < elem_num; i++) accumulate[i] = 0.0;


workspace = new float[elem_num]; workspace = new float[elem_num];
return 0;
}
#if 0
OpParameter *PopulateApplyMomentumParameter(const lite::Primitive *primitive) {
OpParameter *param = new (std::nothrow) OpParameter();
if (param == nullptr) {
MS_LOG(ERROR) << "new Param for OptMomentum failed.";
return nullptr;
if (workspace == nullptr) {
MS_LOG(ERROR) << "apply momentum workspace fail to malloc!";
return RET_ERROR;
} }
param->type_ = primitive->Type();
return param;
return 0;
} }
#endif


kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const std::vector<lite::Tensor *> &outputs,


+ 5
- 4
mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.h View File

@@ -27,8 +27,11 @@ class ApplyMomentumCPUKernel : public LiteKernel {
explicit ApplyMomentumCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, explicit ApplyMomentumCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive) const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ApplyMomentumCPUKernel() override { delete[] workspace; }
: LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {}
~ApplyMomentumCPUKernel() override {
if (workspace)
delete[] workspace;
}


int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
@@ -38,8 +41,6 @@ class ApplyMomentumCPUKernel : public LiteKernel {
float *workspace; float *workspace;
}; };


// OpParameter *PopulateApplyMomentumParameter(const lite::Primitive *primitive);

} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_APPLY_MOMENTUM_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_APPLY_MOMENTUM_H_

+ 0
- 3
mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc View File

@@ -58,7 +58,6 @@ int ArithmeticGradCPUKernel::Init() {
tile_data1 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()]; tile_data1 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()];
if (tile_data1 == nullptr) { if (tile_data1 == nullptr) {
MS_LOG(ERROR) << "new data1 fail!"; MS_LOG(ERROR) << "new data1 fail!";
delete tile_data0;
return RET_ERROR; return RET_ERROR;
} }


@@ -66,8 +65,6 @@ int ArithmeticGradCPUKernel::Init() {
tile_data2 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()]; tile_data2 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()];
if (tile_data2 == nullptr) { if (tile_data2 == nullptr) {
MS_LOG(ERROR) << "new data2 fail!"; MS_LOG(ERROR) << "new data2 fail!";
delete tile_data0;
delete tile_data1;
return RET_ERROR; return RET_ERROR;
} }
} }


+ 1
- 24
mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc View File

@@ -29,30 +29,8 @@ using mindspore::lite::RET_OK;
// using mindspore::lite::REG_OP; // using mindspore::lite::REG_OP;
using mindspore::schema::PrimitiveType_BNGrad; using mindspore::schema::PrimitiveType_BNGrad;


/*
{dy}
{x }
{scale }
{save_mean }
{save_inv_variance }
*/
namespace mindspore::kernel {

#if 0
OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive) {
BNGradParameter *param = new (std::nothrow) BNGradParameter();
if (param == nullptr) {
MS_LOG(ERROR) << "new Param for conv grad filter failed.";
return nullptr;
}
param->op_parameter_.type_ = primitive->Type();


auto bngrad_primitive = primitive->Value()->value_as_BNGrad();
param->epsilon_ = bngrad_primitive->eps();
param->momentum_ = bngrad_primitive->momentum();
return reinterpret_cast<OpParameter *>(param);
}
#endif
namespace mindspore::kernel {
int BNGradCPUKernel::Init() { int BNGradCPUKernel::Init() {
auto *input_x = in_tensors_.at(1); auto *input_x = in_tensors_.at(1);
int channels = input_x->shape().at(kNHWC_C); int channels = input_x->shape().at(kNHWC_C);
@@ -68,7 +46,6 @@ int BNGradCPUKernel::Init() {
int BNGradCPUKernel::ReSize() { return RET_OK; } int BNGradCPUKernel::ReSize() { return RET_OK; }


int BNGradCPUKernel::Run() { int BNGradCPUKernel::Run() {
// std::cout << "run succ" << std::endl;
auto prepare_ret = Prepare(); auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) { if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;


+ 6
- 2
mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h View File

@@ -28,8 +28,12 @@ class BNGradCPUKernel : public LiteKernel {
explicit BNGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, explicit BNGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive) const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~BNGradCPUKernel() override { delete [] workspace; }
: LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr),
workspace_size(0) {}
~BNGradCPUKernel() override {
if (workspace)
delete [] workspace;
}


int Init() override; int Init() override;
int ReSize() override; int ReSize() override;


+ 4
- 2
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc View File

@@ -47,6 +47,10 @@ int ConvolutionTrainCPUKernel::Init() {
conv_param_->input_channel_ / conv_param_->group_; conv_param_->input_channel_ / conv_param_->group_;


workspace = new (std::nothrow) float[ws_size]; workspace = new (std::nothrow) float[ws_size];
if (workspace == nullptr) {
MS_LOG(ERROR) << "new workspace fail!";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }


@@ -95,8 +99,6 @@ int ConvolutionTrainCPUKernel::Run() {
gemm(0, 1, m, n, k, 1, mat_a, k, mat_b, k, 1, mat_c, out_ch); gemm(0, 1, m, n, k, 1, mat_a, k, mat_b, k, 1, mat_c, out_ch);
} }
} }

// std::cout << "run succ" << std::endl;
return RET_OK; return RET_OK;
} }




+ 5
- 2
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h View File

@@ -27,8 +27,11 @@ class ConvolutionTrainCPUKernel : public LiteKernel {
explicit ConvolutionTrainCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, explicit ConvolutionTrainCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
const lite::PrimitiveC *primitive) const lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionTrainCPUKernel() override { delete[] workspace; }
: LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {}
~ConvolutionTrainCPUKernel() override {
if (workspace)
delete[] workspace;
}


int Init() override; int Init() override;
int ReSize() override; int ReSize() override;


+ 2
- 48
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc View File

@@ -37,10 +37,7 @@ int ConvolutionGradFilterCPUKernel::Init() {
MS_ASSERT(x_tensor != nullptr); MS_ASSERT(x_tensor != nullptr);
auto *dy_tensor = in_tensors_.at(0); auto *dy_tensor = in_tensors_.at(0);
MS_ASSERT(dy_tensor != nullptr); MS_ASSERT(dy_tensor != nullptr);
#if 0
auto *weight_tensor = out_tensors_.at(0);
MS_ASSERT(weight_tensor != nullptr);
#endif

auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_); auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
conv_param->output_batch_ = dy_tensor->shape().at(kNHWC_N); conv_param->output_batch_ = dy_tensor->shape().at(kNHWC_N);
conv_param->input_batch_ = x_tensor->shape().at(kNHWC_N); conv_param->input_batch_ = x_tensor->shape().at(kNHWC_N);
@@ -49,7 +46,7 @@ int ConvolutionGradFilterCPUKernel::Init() {
// assume OutCh|kh|kw|InCh // assume OutCh|kh|kw|InCh
conv_param->input_channel_ = x_tensor->shape().at(kNHWC_C); conv_param->input_channel_ = x_tensor->shape().at(kNHWC_C);
conv_param->output_channel_ = dy_tensor->shape().at(kNHWC_C); conv_param->output_channel_ = dy_tensor->shape().at(kNHWC_C);
// TBD
conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_h_ = dy_tensor->shape()[kNHWC_H];
conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W];


@@ -113,52 +110,9 @@ int ConvolutionGradFilterCPUKernel::Run() {
gemm(1, 1, k, n, m, 1, mat_a, out_ch, mat_b, m, 1, mat_c, n); gemm(1, 1, k, n, m, 1, mat_a, out_ch, mat_b, m, 1, mat_c, n);
} }
} }

// std::cout << "run succ" << std::endl;
return RET_OK; return RET_OK;
} }
#if 0
OpParameter *PopulateConvolutionGradFilterParameter(const lite::Primitive *primitive) {
ConvParameter *param = new (std::nothrow) ConvParameter();
if (param == nullptr) {
MS_LOG(ERROR) << "new Param for conv grad filter failed.";
return nullptr;
}
param->op_parameter_.type_ = primitive->Type();

auto convg_primitive = primitive->Value()->value_as_Conv2DGradFilter();
param->kernel_h_ = convg_primitive->kernelH();
param->kernel_w_ = convg_primitive->kernelW();
param->stride_h_ = convg_primitive->strideH();
param->stride_w_ = convg_primitive->strideW();
param->dilation_h_ = convg_primitive->dilateH();
param->dilation_w_ = convg_primitive->dilateW();
param->pad_h_ = convg_primitive->padUp();
param->pad_w_ = convg_primitive->padLeft();
param->pad_u_ = convg_primitive->padUp();
param->pad_d_ = convg_primitive->padDown();
param->pad_l_ = convg_primitive->padLeft();
param->pad_r_ = convg_primitive->padRight();
param->group_ = convg_primitive->group();
auto act_type = convg_primitive->activationType();
switch (act_type) {
case schema::ActivationType_RELU:
param->is_relu_ = true;
param->is_relu6_ = false;
break;
case schema::ActivationType_RELU6:
param->is_relu_ = false;
param->is_relu6_ = true;
break;
default:
param->is_relu_ = false;
param->is_relu6_ = false;
break;
}


return reinterpret_cast<OpParameter *>(param);
}
#endif
kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,


+ 5
- 2
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h View File

@@ -27,8 +27,11 @@ class ConvolutionGradFilterCPUKernel : public LiteKernel {
explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive) const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionGradFilterCPUKernel() override { delete[] workspace; }
: LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {}
~ConvolutionGradFilterCPUKernel() override {
if (workspace)
delete[] workspace;
}


int Init() override; int Init() override;
int ReSize() override; int ReSize() override;


+ 2
- 48
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc View File

@@ -47,7 +47,6 @@ int ConvolutionGradInputCPUKernel::Init() {
conv_param->input_channel_ = dx_tensor->shape()[(kNHWC_C)]; conv_param->input_channel_ = dx_tensor->shape()[(kNHWC_C)];
conv_param->output_channel_ = weight_tensor->shape()[(kNHWC_N)]; conv_param->output_channel_ = weight_tensor->shape()[(kNHWC_N)];


// TBD
conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_h_ = dy_tensor->shape()[kNHWC_H];
conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W];


@@ -59,7 +58,7 @@ int ConvolutionGradInputCPUKernel::Init() {
MS_LOG(ERROR) << "new workspace fail!"; MS_LOG(ERROR) << "new workspace fail!";
return RET_ERROR; return RET_ERROR;
} }
return 0;
return RET_OK;
} }


int ConvolutionGradInputCPUKernel::ReSize() { return 0; } int ConvolutionGradInputCPUKernel::ReSize() { return 0; }
@@ -108,53 +107,8 @@ int ConvolutionGradInputCPUKernel::Run() {
col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), conv_param); col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), conv_param);
} }
} }

// std::cout << "run succ" << std::endl;
return 0;
}

#if 0
OpParameter *PopulateConvolutionGradInputParameter(const lite::Primitive *primitive) {
ConvParameter *param = new (std::nothrow) ConvParameter();
if (param == nullptr) {
MS_LOG(ERROR) << "new Param for conv grad input failed.";
return nullptr;
}
param->op_parameter_.type_ = primitive->Type();

auto convg_primitive = primitive->Value()->value_as_Conv2DGradInput();
param->kernel_h_ = convg_primitive->kernelH();
param->kernel_w_ = convg_primitive->kernelW();
param->stride_h_ = convg_primitive->strideH();
param->stride_w_ = convg_primitive->strideW();
param->dilation_h_ = convg_primitive->dilateH();
param->dilation_w_ = convg_primitive->dilateW();
param->pad_h_ = convg_primitive->padUp();
param->pad_w_ = convg_primitive->padLeft();
param->pad_u_ = convg_primitive->padUp();
param->pad_d_ = convg_primitive->padDown();
param->pad_l_ = convg_primitive->padLeft();
param->pad_r_ = convg_primitive->padRight();
param->group_ = convg_primitive->group();
auto act_type = convg_primitive->activationType();
switch (act_type) {
case schema::ActivationType_RELU:
param->is_relu_ = true;
param->is_relu6_ = false;
break;
case schema::ActivationType_RELU6:
param->is_relu_ = false;
param->is_relu6_ = true;
break;
default:
param->is_relu_ = false;
param->is_relu6_ = false;
break;
}

return reinterpret_cast<OpParameter *>(param);
return RET_OK;
} }
#endif


kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const std::vector<lite::Tensor *> &outputs,


+ 5
- 2
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h View File

@@ -27,8 +27,11 @@ class ConvolutionGradInputCPUKernel : public LiteKernel {
explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive) const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionGradInputCPUKernel() override { delete[] workspace; }
: LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {}
~ConvolutionGradInputCPUKernel() override {
if (workspace)
delete[] workspace;
}


int Init() override; int Init() override;
int ReSize() override; int ReSize() override;


+ 0
- 6
mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc View File

@@ -20,7 +20,6 @@
#include "nnacl/fp32/pooling.h" #include "nnacl/fp32/pooling.h"
#include "nnacl/fp32_grad/pooling_grad.h" #include "nnacl/fp32_grad/pooling_grad.h"
#include "include/errorcode.h" #include "include/errorcode.h"
// #include "src/train/ops/train_ops.h"


using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
@@ -51,11 +50,6 @@ int PoolingGradCPUKernel::Init() {
pool_param->input_w_ = in_shape[kNHWC_W]; pool_param->input_w_ = in_shape[kNHWC_W];
pool_param->input_batch_ = in_shape[kNHWC_N]; pool_param->input_batch_ = in_shape[kNHWC_N];
pool_param->input_channel_ = in_shape[kNHWC_C]; pool_param->input_channel_ = in_shape[kNHWC_C];

// Emir -- here I assume we get the outputshape in the output tensor
// auto *out_tensor = out_tensors_.front();
// auto out_shape = in_tensors_.at(1)->shape();

pool_param->output_h_ = out_shape[kNHWC_H]; pool_param->output_h_ = out_shape[kNHWC_H];
pool_param->output_w_ = out_shape[kNHWC_W]; pool_param->output_w_ = out_shape[kNHWC_W];
pool_param->output_batch_ = out_shape[kNHWC_N]; pool_param->output_batch_ = out_shape[kNHWC_N];


+ 9
- 54
mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc View File

@@ -55,53 +55,6 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab
} }
output2[0] = total_loss / param_->batch_size_; output2[0] = total_loss / param_->batch_size_;
} }

#if 0
void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *labels, const float *losses,
float *output) const {
float total_loss = 0;
for (int i = 0; i < param_->batch_size_; ++i) {
if (labels[i] < 0) {
MS_LOG(EXCEPTION) << "label value must >= 0";
}
size_t label = labels[i];
if (label > param->number_of_classes_) {
MS_LOG(EXCEPTION) << "error label input!";
} else {
total_loss -= logf(losses[i * param->number_of_classes_ + label]);
}
}
output[0] = total_loss / param->batch_size_;
}

void SoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses, float *grads,
float *output) const {
size_t row_start = 0;
float total_loss = 0;
for (int i = 0; i < param->batch_size_; ++i) {
if (labels[i] < 0) {
MS_LOG(EXCEPTION) << "label value must >= 0";
}
size_t label = labels[i];
if (label > param->number_of_classes_) {
MS_LOG(EXCEPTION) << "error label input!";
} else {
total_loss -= logf(losses[i * param->number_of_classes_ + label]);
for (size_t j = 0; j < param->number_of_classes_; ++j) {
size_t index = row_start + j;
if (j == label) {
grads[index] = (losses[index] - 1) / param->batch_size_;
} else {
grads[index] = losses[index] / param->batch_size_;
}
}
}
row_start += param->number_of_classes_;
}
output[0] = total_loss / param->batch_size_;
}
#endif

int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
auto ret = Prepare(); auto ret = Prepare();
if (ret != RET_OK) { if (ret != RET_OK) {
@@ -117,11 +70,6 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
} }
size_t data_size = in_tensors_.at(0)->ElementsNum(); size_t data_size = in_tensors_.at(0)->ElementsNum();
float *losses = new (std::nothrow) float[data_size];
if (losses == nullptr) {
MS_LOG(ERROR) << "losses is null";
return RET_ERROR;
}


MS_ASSERT(out != nullptr); MS_ASSERT(out != nullptr);
MS_ASSERT(labels != nullptr); MS_ASSERT(labels != nullptr);
@@ -151,9 +99,16 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Init() {


size_t data_size = in_tensors_.at(0)->ElementsNum(); size_t data_size = in_tensors_.at(0)->ElementsNum();
losses_ = new (std::nothrow) float[data_size]; losses_ = new (std::nothrow) float[data_size];
if (losses_ == nullptr) {
MS_LOG(ERROR) << "failed to malloc losses!";
return RET_ERROR;
}

sum_data_ = new (std::nothrow) float[dims[0]]; sum_data_ = new (std::nothrow) float[dims[0]];
MS_ASSERT(losses_ != nullptr);
MS_ASSERT(sum_data_ != nullptr);
if (sum_data_ == nullptr) {
MS_LOG(ERROR) << "failed to malloc sum_data_!";
return RET_ERROR;
}


sm_params_.n_dim_ = 2; sm_params_.n_dim_ = 2;
sm_params_.element_size_ = data_size; sm_params_.element_size_ = data_size;


+ 5
- 3
mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h View File

@@ -33,12 +33,14 @@ class SoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
const std::vector<lite::Tensor *> &outputs, const std::vector<lite::Tensor *> &outputs,
const lite::Context *ctx, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive) const mindspore::lite::PrimitiveC *primitive)
: LossKernel(parameter, inputs, outputs, ctx, primitive) {
: LossKernel(parameter, inputs, outputs, ctx, primitive), losses_(nullptr), sum_data_(nullptr) {
param_ = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter); param_ = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
} }
~SoftmaxCrossEntropyWithLogitsCPUKernel() override { ~SoftmaxCrossEntropyWithLogitsCPUKernel() override {
delete[] losses_;
delete[] sum_data_;
if (losses_)
delete[] losses_;
if (sum_data_)
delete[] sum_data_;
} }


void ForwardPostExecute(const float *labels, const float *logits, void ForwardPostExecute(const float *labels, const float *logits,


+ 10
- 5
mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc View File

@@ -30,8 +30,6 @@ using mindspore::lite::RET_OK;


namespace mindspore::kernel { namespace mindspore::kernel {
int SoftmaxGradCPUKernel::Init() { int SoftmaxGradCPUKernel::Init() {
// auto input_tensor =in_tensors_.at(0);

param = reinterpret_cast<SoftmaxParameter *>(op_parameter_); param = reinterpret_cast<SoftmaxParameter *>(op_parameter_);
auto in_shape = in_tensors_.at(0)->shape(); auto in_shape = in_tensors_.at(0)->shape();
auto in_dims = in_shape.size(); auto in_dims = in_shape.size();
@@ -43,7 +41,6 @@ int SoftmaxGradCPUKernel::Init() {
} }
param->element_size_ = ele_size; param->element_size_ = ele_size;


// malloc tmp buffer
auto axis = param->axis_; auto axis = param->axis_;
if ((axis < -1) || (axis > param->n_dim_)) { if ((axis < -1) || (axis > param->n_dim_)) {
MS_LOG(ERROR) << "SoftmaxGrad axis is invalid!"; MS_LOG(ERROR) << "SoftmaxGrad axis is invalid!";
@@ -57,9 +54,17 @@ int SoftmaxGradCPUKernel::Init() {
} }


sum_data_ = new (std::nothrow) float[inner_size]; sum_data_ = new (std::nothrow) float[inner_size];
MS_ASSERT(sum_data_ != nullptr);
if (sum_data_ == nullptr) {
MS_LOG(ERROR) << "failed to malloc sum_data_!";
return RET_ERROR;
}

sum_mul_ = new (std::nothrow) float[inner_size * in_shape[axis]]; sum_mul_ = new (std::nothrow) float[inner_size * in_shape[axis]];
MS_ASSERT(sum_mul_ != nullptr);
if (sum_mul_ == nullptr) {
MS_LOG(ERROR) << "failed to malloc sum_mul_!";
return RET_ERROR;
}

return RET_OK; return RET_OK;
} }




+ 7
- 3
mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h View File

@@ -28,11 +28,15 @@ class SoftmaxGradCPUKernel : public LiteKernel {
explicit SoftmaxGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, explicit SoftmaxGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
const lite::PrimitiveC *primitive) const lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
: LiteKernel(parameter, inputs, outputs, ctx, primitive), sum_data_(nullptr), sum_mul_(nullptr) {
param = reinterpret_cast<SoftmaxParameter *>(parameter); param = reinterpret_cast<SoftmaxParameter *>(parameter);
} }
~SoftmaxGradCPUKernel() override = default;

~SoftmaxGradCPUKernel() override {
if (sum_data_)
delete[] sum_data_;
if (sum_mul_)
delete[] sum_mul_;
}
int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;


+ 9
- 9
mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc View File

@@ -89,12 +89,6 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
} }
size_t data_size = in_tensors_.at(0)->ElementsNum(); size_t data_size = in_tensors_.at(0)->ElementsNum();
float *losses = new (std::nothrow) float[data_size];
if (losses == nullptr) {
MS_LOG(ERROR) << "losses is null";
return RET_ERROR;
}

MS_ASSERT(out != nullptr); MS_ASSERT(out != nullptr);
MS_ASSERT(labels != nullptr); MS_ASSERT(labels != nullptr);
MS_ASSERT(ins != nullptr); MS_ASSERT(ins != nullptr);
@@ -128,12 +122,18 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() {
MS_LOG(ERROR) << "softmax etropy loss in0 have no data"; MS_LOG(ERROR) << "softmax etropy loss in0 have no data";
return RET_ERROR; return RET_ERROR;
} }

size_t data_size = in_tensors_.at(0)->ElementsNum(); size_t data_size = in_tensors_.at(0)->ElementsNum();
losses_ = new (std::nothrow) float[data_size]; losses_ = new (std::nothrow) float[data_size];
if (losses_ == nullptr) {
MS_LOG(ERROR) << "failed to malloc losses!";
return RET_ERROR;
}

sum_data_ = new (std::nothrow) float[dims[0]]; sum_data_ = new (std::nothrow) float[dims[0]];
MS_ASSERT(losses_ != nullptr);
MS_ASSERT(sum_data_ != nullptr);
if (sum_data_ == nullptr) {
MS_LOG(ERROR) << "failed to malloc sum_data_!";
return RET_ERROR;
}


sm_params_.n_dim_ = 2; sm_params_.n_dim_ = 2;
sm_params_.element_size_ = data_size; sm_params_.element_size_ = data_size;


+ 5
- 3
mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h View File

@@ -33,12 +33,14 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
const std::vector<lite::Tensor *> &outputs, const std::vector<lite::Tensor *> &outputs,
const lite::Context *ctx, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive) const mindspore::lite::PrimitiveC *primitive)
: LossKernel(parameter, inputs, outputs, ctx, primitive) {
: LossKernel(parameter, inputs, outputs, ctx, primitive) , losses_(nullptr), sum_data_(nullptr) {
param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter); param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
} }
~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override { ~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override {
delete[] losses_;
delete[] sum_data_;
if (losses_)
delete[] losses_;
if (sum_data_)
delete[] sum_data_;
} }


void ForwardPostExecute(const int *labels, const float *losses, float *output) const; void ForwardPostExecute(const int *labels, const float *losses, float *output) const;


+ 36
- 9
mindspore/lite/src/train/train_session.cc View File

@@ -47,9 +47,19 @@ int TrainSession::CompileGraph(lite::Model *model) {
return LiteSession::CompileGraph(model); return LiteSession::CompileGraph(model);
} }


void *TrainSession::ExportToBuf(void *buf, size_t *len) const {
// auto train_model_impl = (dynamic_cast<lite::train::TrainModelImpl*>(model_->model_impl()));
// return train_model_impl->ExportToBuf(buf, len);
TrainSession::~TrainSession() {
for (auto it1 = ext_output_map_.begin(); it1 != ext_output_map_.end(); ++it1) {
if ((output_node_map_.find(it1->first) == output_node_map_.end()) || train_mode_) {
// Delete if not from output_node_map_
auto tensor_ptr = it1->second.back();
delete tensor_ptr;
it1->second.pop_back();
}
}
}

void *TrainSession::ExportToBuf(lite::Model *model, void *buf, size_t *len) const {
// return model->ExportBuf(buf, len);
return nullptr; return nullptr;
} }


@@ -61,7 +71,7 @@ int TrainSession::RunGraph(const session::KernelCallBack &before, const session:
if (train_mode_) return LiteSession::RunGraph(before, after); if (train_mode_) return LiteSession::RunGraph(before, after);


// object is expected to run only inference part of graph // object is expected to run only inference part of graph
// prepare a lit of kernels till the loss function -- temporary solution
// prepare a list of kernels till the loss function -- temporary solution
std::vector<kernel::LiteKernel *> infference_kernels; std::vector<kernel::LiteKernel *> infference_kernels;
for (auto kernel : this->kernels_) { for (auto kernel : this->kernels_) {
if (dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) break; if (dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) break;
@@ -86,8 +96,16 @@ void TrainSession::train() {
MS_ASSERT(nullptr != kernel); MS_ASSERT(nullptr != kernel);
kernel->train(); kernel->train();
} }
train_mode_ = true;
for (auto it1 = ext_output_map_.begin(); it1 != ext_output_map_.end(); ++it1) {
if ((output_node_map_.find(it1->first) == output_node_map_.end()) || train_mode_) {
// Delete if not from output_node_map_
auto tensor_ptr = it1->second.back();
delete tensor_ptr;
it1->second.pop_back();
}
}
ext_output_map_.clear(); ext_output_map_.clear();
train_mode_ = true;
for (auto kernel : this->kernels_) { for (auto kernel : this->kernels_) {
if (dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) { if (dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) {
auto *ms_tensor = new lite::Tensor(*kernel->out_tensors().at(0)); auto *ms_tensor = new lite::Tensor(*kernel->out_tensors().at(0));
@@ -101,14 +119,23 @@ void TrainSession::eval() {
MS_ASSERT(nullptr != kernel); MS_ASSERT(nullptr != kernel);
kernel->eval(); kernel->eval();
} }
train_mode_ = false;
kernel::LiteKernel *last_kernel = nullptr; kernel::LiteKernel *last_kernel = nullptr;
// We should get in_kernels and then get all last kernels
for (auto it1 = ext_output_map_.begin(); it1 != ext_output_map_.end(); ++it1) {
if ((output_node_map_.find(it1->first) == output_node_map_.end()) || train_mode_) {
// Delete if not from output_node_map_
auto tensor_ptr = it1->second.back();
delete tensor_ptr;
it1->second.pop_back();
}
}
ext_output_map_ = output_node_map_; ext_output_map_ = output_node_map_;
train_mode_ = false;
for (auto kernel : this->kernels_) { for (auto kernel : this->kernels_) {
if ((dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) && (last_kernel != nullptr)) { if ((dynamic_cast<const kernel::LossKernel *>(kernel) != nullptr) && (last_kernel != nullptr)) {
auto *ms_tensor = new lite::Tensor(*last_kernel->out_tensors().at(0));
ext_output_map_[last_kernel->name()].emplace_back(ms_tensor);
if (ext_output_map_.find(last_kernel->name()) == ext_output_map_.end()) {
auto *ms_tensor = new lite::Tensor(*last_kernel->out_tensors().at(0));
ext_output_map_[last_kernel->name()].emplace_back(ms_tensor);
}
} }
last_kernel = kernel; last_kernel = kernel;
} }


+ 14
- 0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc View File

@@ -110,4 +110,18 @@ TEST_F(TestBNGradFp32, BNGradFp32) {
delete kernel_obj; delete kernel_obj;
MS_LOG(INFO) << "BNGradFp32 passed"; MS_LOG(INFO) << "BNGradFp32 passed";
} }

#if 0
TEST_F(TestBNGradFp32, BNTtrainFp32) {
auto bn_param = static_cast<BNGradParameter*>(malloc(sizeof(BNGradParameter)));
bn_param->epsilon_ = 0.00001;
bn_param->momentum_ = 0.1;
const int batch = 2;
const int channels = 3;
const int height = 4;
const int width = 5;
auto x_tensor = CreateInTensor("./test_data/bngrad/input_x_2_4_5_3.bin", {batch, height, width, channels});
std::vector<lite::Tensor *> inputs = {x_tensor, x_tensor, scale_tensor, mean_tensor, var_tensor};
}
#endif
} // namespace mindspore } // namespace mindspore

+ 50
- 12
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc View File

@@ -73,7 +73,6 @@ class NetworkTest : public mindspore::CommonTest {
// +-------------+ | // +-------------+ |
// V dw(9) | // V dw(9) |
// +-----------Update-----+ // +-----------Update-----+
#if 0
TEST_F(NetworkTest, tuning_layer) { TEST_F(NetworkTest, tuning_layer) {
const int BATCH_SIZE = 32; const int BATCH_SIZE = 32;
const int NUM_CLASSES = 10; const int NUM_CLASSES = 10;
@@ -248,12 +247,15 @@ TEST_F(NetworkTest, tuning_layer) {
label->nodeType = schema::NodeType::NodeType_ValueNode; label->nodeType = schema::NodeType::NodeType_ValueNode;
label->format = schema::Format_NHWC; label->format = schema::Format_NHWC;
label->dataType = TypeId::kNumberTypeInt32; label->dataType = TypeId::kNumberTypeInt32;
label->dims = {BATCH_SIZE};
label->dims = {BATCH_SIZE*NUM_CLASSES};
label->offset = -1; label->offset = -1;
label->data.resize(BATCH_SIZE * NUM_CLASSES * sizeof(float));
int *data = reinterpret_cast<int *>(label->data.data());
for (int i = 0; i < BATCH_SIZE; i++)
for (int j = 0; j < NUM_CLASSES; j++) *(data + i * NUM_CLASSES + j) = j;
// label->data.resize(BATCH_SIZE * NUM_CLASSES * sizeof(float));
// int *data = reinterpret_cast<int *>(label->data.data());
// for (int i = 0; i < BATCH_SIZE; i++) {
// for (int j = 0; j < NUM_CLASSES; j++) {
// *(data + i * NUM_CLASSES + j) = j;
// }
// }
meta_graph->allTensors.emplace_back(std::move(label)); meta_graph->allTensors.emplace_back(std::move(label));
} }
// tensor 7 - Softmaxentropy // tensor 7 - Softmaxentropy
@@ -378,6 +380,7 @@ TEST_F(NetworkTest, tuning_layer) {
auto ret = session->CompileGraph(model); auto ret = session->CompileGraph(model);
ASSERT_EQ(lite::RET_OK, ret); ASSERT_EQ(lite::RET_OK, ret);
session->train(); session->train();
session->train(); // Just double check that calling train twice does not cause a problem


auto inputs = session->GetInputs(); auto inputs = session->GetInputs();
ASSERT_EQ(inputs.size(), 2); ASSERT_EQ(inputs.size(), 2);
@@ -397,7 +400,7 @@ TEST_F(NetworkTest, tuning_layer) {
delete [] buf; delete [] buf;
auto labelTensor = inputs.at(1); auto labelTensor = inputs.at(1);
ASSERT_NE(nullptr, labelTensor); ASSERT_NE(nullptr, labelTensor);
ASSERT_EQ(BATCH_SIZE, labelTensor->ElementsNum());
ASSERT_EQ(BATCH_SIZE*NUM_CLASSES, labelTensor->ElementsNum());
auto labels = reinterpret_cast<int *>(labelTensor->MutableData()); auto labels = reinterpret_cast<int *>(labelTensor->MutableData());
for (int i = 0; i < BATCH_SIZE; i++) labels[i] = (i * 97) % NUM_CLASSES; for (int i = 0; i < BATCH_SIZE; i++) labels[i] = (i * 97) % NUM_CLASSES;


@@ -411,32 +414,67 @@ TEST_F(NetworkTest, tuning_layer) {
auto *outData = reinterpret_cast<float *>(outTensor->MutableData()); auto *outData = reinterpret_cast<float *>(outTensor->MutableData());
ASSERT_NE(nullptr, outData); ASSERT_NE(nullptr, outData);
std::cout << "==============Initial=Scores===================" << std::endl; std::cout << "==============Initial=Scores===================" << std::endl;
for (int i = 0; i < 20; i++) {
for (int i = 0; i < 10; i++) {
std::cout << outData[i] << ", "; std::cout << outData[i] << ", ";
} }
std::cout << std::endl; std::cout << std::endl;
session->eval();
session->eval(); // Just double check that calling eval twice does not cause a problem
ret = session->RunGraph(); ret = session->RunGraph();
outputs = session->GetOutputsByName("BiasAdd"); outputs = session->GetOutputsByName("BiasAdd");
ASSERT_EQ(outputs.size(), 1); ASSERT_EQ(outputs.size(), 1);
outTensor = (outputs.at(0)); outTensor = (outputs.at(0));
ASSERT_NE(nullptr, outTensor); ASSERT_NE(nullptr, outTensor);
// ASSERT_EQ(28 * 28 * 32, outTensor->ElementsNum());
ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
outData = reinterpret_cast<float *>(outTensor->MutableData()); outData = reinterpret_cast<float *>(outTensor->MutableData());
ASSERT_NE(nullptr, outData); ASSERT_NE(nullptr, outData);
std::cout << "==============Scores=after-single=train========" << std::endl; std::cout << "==============Scores=after-single=train========" << std::endl;
for (int i = 0; i < 20; i++) {
for (int i = 0; i < 10; i++) {
std::cout << outData[i] << ", "; std::cout << outData[i] << ", ";
} }
std::string output_path = "./test_data/train/train_output_32_10.bin"; std::string output_path = "./test_data/train/train_output_32_10.bin";
auto error = lite::RelativeOutputError(outData, output_path); auto error = lite::RelativeOutputError(outData, output_path);
EXPECT_LT(error, 2e-3); EXPECT_LT(error, 2e-3);
MS_LOG(INFO) << "TuningLayer passed";

ret = session->RunGraph();
outputs = session->GetOutputsByName("BiasAdd");
ASSERT_EQ(outputs.size(), 1);
outTensor = (outputs.at(0));
ASSERT_NE(nullptr, outTensor);
ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
outData = reinterpret_cast<float *>(outTensor->MutableData());
ASSERT_NE(nullptr, outData);
std::cout << "==============Scores=eval-second-time==========" << std::endl;
for (int i = 0; i < 10; i++) {
std::cout << outData[i] << ", ";
}
error = lite::RelativeOutputError(outData, output_path);
EXPECT_LT(error, 2e-3);

session->train();
session->eval(); // do some more zig-zags
ret = session->RunGraph();
outputs = session->GetOutputsByName("BiasAdd");
ASSERT_EQ(outputs.size(), 1);
outTensor = (outputs.at(0));
ASSERT_NE(nullptr, outTensor);
ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
outData = reinterpret_cast<float *>(outTensor->MutableData());
ASSERT_NE(nullptr, outData);
std::cout << "==============Scores=Just Checking 3rd time====" << std::endl;
for (int i = 0; i < 10; i++) {
std::cout << outData[i] << ", ";
}
error = lite::RelativeOutputError(outData, output_path);
EXPECT_LT(error, 2e-3);




delete model; delete model;
delete session; delete session;
MS_LOG(INFO) << "TuningLayer passed";
} }
#endif
int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path, int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path,
std::function<int32_t(mindspore::session::TrainSession *session, const std::string &)> cb) { std::function<int32_t(mindspore::session::TrainSession *session, const std::string &)> cb) {
int32_t res = 0; int32_t res = 0;


+ 11
- 4
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc View File

@@ -30,7 +30,7 @@ class TestSoftmaxCrossEntropyFp32 : public mindspore::CommonTest {


TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {
// prepare stage // prepare stage
SoftmaxCrossEntropyParameter *sce_param = new SoftmaxCrossEntropyParameter();
auto sce_param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(malloc(sizeof(SoftmaxCrossEntropyParameter)));
size_t input_size; size_t input_size;


std::string input_path = "./test_data/operators/sce_fp32_1_y_6_4.bin"; std::string input_path = "./test_data/operators/sce_fp32_1_y_6_4.bin";
@@ -83,9 +83,16 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {
std::string grad_path = "./test_data/operators/sce_fp32_1_dy_6_4.bin"; std::string grad_path = "./test_data/operators/sce_fp32_1_dy_6_4.bin";
lite::CompareOutput(grad, grad_path); lite::CompareOutput(grad, grad_path);


delete sce_param;
l_tensor.SetData(NULL);
y_tensor.SetData(NULL);
delete [] ll_labels;
delete [] labels;
delete [] input_data;
delete [] loss;
delete [] grad;
l_tensor.SetData(nullptr);
y_tensor.SetData(nullptr);
loss_tensor.SetData(nullptr);
grad_tensor.SetData(nullptr);
delete kernel_obj;
MS_LOG(INFO) << "SoftmaxCrossEntropyFp32 passed"; MS_LOG(INFO) << "SoftmaxCrossEntropyFp32 passed";
} }




Loading…
Cancel
Save