Browse Source

!8478 tod netwoks test ci

From: @yonibaehr_admin
Reviewed-by: 
Signed-off-by:
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
522cefb878
100 changed files with 3640 additions and 706 deletions
  1. +3
    -0
      mindspore/lite/CMakeLists.txt
  2. +17
    -13
      mindspore/lite/include/train_model.h
  3. +45
    -2
      mindspore/lite/include/train_session.h
  4. +2
    -0
      mindspore/lite/minddata/CMakeLists.txt
  5. +18
    -8
      mindspore/lite/nnacl/fp32/batchnorm.c
  6. +1
    -1
      mindspore/lite/nnacl/fp32_grad/activation_grad.c
  7. +102
    -0
      mindspore/lite/nnacl/fp32_grad/arithmetic_grad.c
  8. +6
    -0
      mindspore/lite/nnacl/fp32_grad/arithmetic_grad.h
  9. +27
    -38
      mindspore/lite/nnacl/fp32_grad/batch_norm.c
  10. +6
    -5
      mindspore/lite/nnacl/fp32_grad/batch_norm.h
  11. +23
    -0
      mindspore/lite/nnacl/fp32_grad/dropout_grad.c
  12. +31
    -0
      mindspore/lite/nnacl/fp32_grad/dropout_grad.h
  13. +27
    -0
      mindspore/lite/nnacl/fp32_grad/dropout_parameter.h
  14. +504
    -150
      mindspore/lite/nnacl/fp32_grad/gemm.c
  15. +17
    -2
      mindspore/lite/nnacl/fp32_grad/gemm.h
  16. +123
    -24
      mindspore/lite/nnacl/fp32_grad/pack_ext.c
  17. +7
    -3
      mindspore/lite/nnacl/fp32_grad/pack_ext.h
  18. +3
    -4
      mindspore/lite/nnacl/fp32_grad/pooling_grad.c
  19. +2
    -45
      mindspore/lite/nnacl/fp32_grad/reduce_grad.c
  20. +8
    -2
      mindspore/lite/nnacl/fp32_grad/softmax_grad.c
  21. +72
    -0
      mindspore/lite/nnacl/fp32_grad/utils.h
  22. +3
    -0
      mindspore/lite/schema/model.fbs
  23. +15
    -5
      mindspore/lite/schema/ops.fbs
  24. +15
    -4
      mindspore/lite/src/lite_kernel.h
  25. +1
    -1
      mindspore/lite/src/ops/adam.cc
  26. +8
    -2
      mindspore/lite/src/ops/arithmetic_grad.cc
  27. +6
    -1
      mindspore/lite/src/ops/bias_grad.cc
  28. +13
    -3
      mindspore/lite/src/ops/bn_grad.cc
  29. +42
    -25
      mindspore/lite/src/ops/conv2d_grad_filter.cc
  30. +1
    -0
      mindspore/lite/src/ops/conv2d_grad_filter.h
  31. +40
    -25
      mindspore/lite/src/ops/conv2d_grad_input.cc
  32. +1
    -0
      mindspore/lite/src/ops/conv2d_grad_input.h
  33. +55
    -0
      mindspore/lite/src/ops/dropout.cc
  34. +6
    -3
      mindspore/lite/src/ops/dropout.h
  35. +100
    -0
      mindspore/lite/src/ops/dropout_grad.cc
  36. +47
    -0
      mindspore/lite/src/ops/dropout_grad.h
  37. +22
    -26
      mindspore/lite/src/ops/group_conv2d_grad_input.cc
  38. +1
    -0
      mindspore/lite/src/ops/group_conv2d_grad_input.h
  39. +3
    -3
      mindspore/lite/src/ops/maximum.h
  40. +124
    -0
      mindspore/lite/src/ops/maximum_grad.cc
  41. +46
    -0
      mindspore/lite/src/ops/maximum_grad.h
  42. +27
    -0
      mindspore/lite/src/ops/minimum.cc
  43. +4
    -3
      mindspore/lite/src/ops/minimum.h
  44. +76
    -0
      mindspore/lite/src/ops/minimum_grad.cc
  45. +45
    -0
      mindspore/lite/src/ops/minimum_grad.h
  46. +22
    -5
      mindspore/lite/src/ops/primitive_c.cc
  47. +1
    -2
      mindspore/lite/src/ops/squeeze.cc
  48. +3
    -4
      mindspore/lite/src/ops/sub.h
  49. +34
    -42
      mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
  50. +1
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.h
  51. +48
    -14
      mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc
  52. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.h
  53. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/one_hot_fp32.cc
  54. +7
    -12
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/adam.cc
  55. +24
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc
  56. +10
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h
  57. +17
    -9
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc
  58. +34
    -34
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc
  59. +8
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h
  60. +16
    -12
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc
  61. +8
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h
  62. +27
    -10
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc
  63. +8
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h
  64. +20
    -12
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc
  65. +4
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h
  66. +131
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.cc
  67. +43
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.h
  68. +118
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.cc
  69. +43
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.h
  70. +2
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc
  71. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc
  72. +65
    -1
      mindspore/lite/src/train/train_populate_parameter.cc
  73. +144
    -21
      mindspore/lite/src/train/train_session.cc
  74. +8
    -4
      mindspore/lite/src/train/train_session.h
  75. +8
    -0
      mindspore/lite/test/models_ms_train.cfg
  76. +394
    -0
      mindspore/lite/test/run_net_train.sh
  77. +2
    -2
      mindspore/lite/test/run_train_ut.sh
  78. +86
    -0
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/activation_grad_fp32_tests.cc
  79. +237
    -5
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc
  80. +58
    -2
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc
  81. +35
    -24
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc
  82. +50
    -9
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc
  83. +54
    -12
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc
  84. +14
    -47
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc
  85. +56
    -2
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc
  86. +15
    -3
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc
  87. +31
    -5
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc
  88. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_out_50.bin
  89. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_x_50.bin
  90. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_yt_50.bin
  91. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin
  92. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_x_50.bin
  93. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin
  94. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/dy_2_4_5_3.bin
  95. +2
    -2
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/input_x_2_4_5_3.bin
  96. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dbias_3.bin
  97. +1
    -1
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dscale_3.bin
  98. BIN
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin
  99. +1
    -1
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_mean_3.bin
  100. +1
    -1
      mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_var_3.bin

+ 3
- 0
mindspore/lite/CMakeLists.txt View File

@@ -219,6 +219,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/internal)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl)
if (ENABLE_TOOLS) if (ENABLE_TOOLS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
if (SUPPORT_TRAIN)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/net_train)
endif()
endif() endif()
if (NOT WIN32) if (NOT WIN32)
if (ENABLE_TOOLS) if (ENABLE_TOOLS)


+ 17
- 13
mindspore/lite/include/train_model.h View File

@@ -18,32 +18,36 @@
#include <vector> #include <vector>
#include "include/model.h" #include "include/model.h"


namespace mindspore::lite {
namespace mindspore {
namespace lite {

/// \brief TrainModel Defines a class that allows to import and export a mindsport trainable model
struct TrainModel : public lite::Model { struct TrainModel : public lite::Model {
/// \brief Static method to create a TrainModel pointer.
///
/// \param[in] model_buf Define the buffer read from a model file.
/// \param[in] size Define bytes number of model buffer.
/// \brief Static method to create a TrainModel object
/// ///
/// \return Pointer of MindSpore Lite TrainModel.
/// \param[in] model_buf A buffer that was read from a MS model file
/// \param[in] size Length of the buffer
//
/// \return Pointer to MindSpore Lite TrainModel
static TrainModel *Import(const char *model_buf, size_t size); static TrainModel *Import(const char *model_buf, size_t size);


/// \brief Free meta graph temporary buffer
/// \brief Free meta graph related data
void Free() override; void Free() override;


/// \brief TrainModel destruct, free all memory
/// \brief Class destructor, free all memory
virtual ~TrainModel(); virtual ~TrainModel();


/// \brief Export Model into buf.
/// \brief Export Model into a buffer
/// ///
/// \param[in] buf Define the buffer to Export into. If nullptr, buf will be allocated
/// \param[in] len size of the buffer.
/// \param[in] buf The buffer to Export into. If equal to nullptr, buf will be allocated
/// \param[in,out] len Size of the pre-allocated buffer, and returned size of the exported buffer
/// ///
/// \return Pointer to buffer with exported model /// \return Pointer to buffer with exported model
char* ExportBuf(char* buf, size_t* len) const;
char *ExportBuf(char *buf, size_t *len) const;


size_t buf_size_; size_t buf_size_;
}; };
} // namespace mindspore::lite
} // namespace lite
} // namespace mindspore


#endif // MINDSPORE_LITE_INCLUDE_TRAIN_MODEL_H_ #endif // MINDSPORE_LITE_INCLUDE_TRAIN_MODEL_H_

+ 45
- 2
mindspore/lite/include/train_session.h View File

@@ -25,16 +25,59 @@
namespace mindspore { namespace mindspore {
namespace session { namespace session {


/// \brief TrainSession Defines a class that allows training a MindSpore model
class TrainSession : public session::LiteSession { class TrainSession : public session::LiteSession {
public: public:
/// \brief Class destructor
virtual ~TrainSession() = default; virtual ~TrainSession() = default;

/// \brief Static method to create a TrainSession object
///
/// \param[in] context Defines the context of the session to be created
///
/// \return Pointer of MindSpore Lite TrainSession
static TrainSession *CreateSession(lite::Context *context); static TrainSession *CreateSession(lite::Context *context);


/// \brief Compile MindSpore Lite train model
///
/// \note CompileTrainGraph should be called before RunGraph
///
/// \param[in] model Define the model to be compiled
///
/// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h
virtual int CompileTrainGraph(lite::TrainModel *model) = 0; virtual int CompileTrainGraph(lite::TrainModel *model) = 0;

/// \brief Export the trained model into a buffer
///
/// \param[in] buf The buffer to Export into. If equal to nullptr, buf will be allocated
/// \param[in,out] len Size of the pre-allocated buffer, and returned size of the exported buffer
///
/// \return pointer to the export buffer
virtual void *ExportToBuf(char *buf, size_t *len) const = 0; virtual void *ExportToBuf(char *buf, size_t *len) const = 0;
virtual void Train() = 0;

/// \brief Save the trained model into a flatbuffer file
///
/// \param[in] filename Filename to save flatbuffer to
///
/// \return 0 on success or -1 in case of error
virtual int SaveToFile(const std::string &filename) const = 0;

/// \brief Set model to train mode
/// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h
virtual int Train() = 0;

/// \brief Check mode of model
///
/// \return boolean indication if model is in train mode
bool IsTrain() { return train_mode_ == true; } bool IsTrain() { return train_mode_ == true; }
virtual void Eval() = 0;

/// \brief Set model to eval mode
/// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h
virtual int Eval() = 0;

/// \brief Check mode of model
///
/// \return boolean indication if model is in eval mode
bool IsEval() { return train_mode_ == false; } bool IsEval() { return train_mode_ == false; }


protected: protected:


+ 2
- 0
mindspore/lite/minddata/CMakeLists.txt View File

@@ -270,11 +270,13 @@ if (BUILD_MINDDATA STREQUAL "full")
${CORE_DIR}/utils/ms_utils.cc ${CORE_DIR}/utils/ms_utils.cc
) )


find_package(Threads REQUIRED)
target_link_libraries(minddata-lite target_link_libraries(minddata-lite
securec securec
jpeg-turbo jpeg-turbo
jpeg jpeg
mindspore::json mindspore::json
Threads::Threads
) )


# ref: https://github.com/android/ndk/issues/1202 # ref: https://github.com/android/ndk/issues/1202


+ 18
- 8
mindspore/lite/nnacl/fp32/batchnorm.c View File

@@ -55,20 +55,30 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset


void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param,
float *save_mean, float *save_var) { float *save_mean, float *save_var) {
float N = (float)param->unit_;
const float N = (float)param->unit_;
const float VN = N;
const float VNUB = (N > 1.0f) ? (N - 1.0f) : 1.0f;
const float momentum = (1.0f - param->momentum_);

for (int i = 0; i < param->unit_; i++) { for (int i = 0; i < param->unit_; i++) {
for (int c = 0; c < param->channel_; c++) { for (int c = 0; c < param->channel_; c++) {
int idx = i * param->channel_ + c; int idx = i * param->channel_ + c;
run_mean[c] += input[idx]; run_mean[c] += input[idx];
run_var[c] += input[idx] * input[idx];
} }
} }
const float VN = (N > 1.0f) ? (N - 1.0f) : 1.0f;
for (int c = 0; c < param->channel_; c++) { for (int c = 0; c < param->channel_; c++) {
run_mean[c] = run_mean[c] / N;
run_var[c] = run_var[c] / VN - run_mean[c] * run_mean[c];
save_mean[c] = param->momentum_ * save_mean[c] + (1 - param->momentum_) * run_mean[c];
const float var = run_var[c];
save_var[c] = param->momentum_ * save_var[c] + (1 - param->momentum_) * var;
run_mean[c] /= N;
}
for (int i = 0; i < param->unit_; i++) {
for (int c = 0; c < param->channel_; c++) {
int idx = i * param->channel_ + c;
run_var[c] += (input[idx] - run_mean[c]) * (input[idx] - run_mean[c]);
}
}
for (int c = 0; c < param->channel_; c++) {
float unbiased_var = (run_var[c] / VNUB);
run_var[c] = (run_var[c] / VN);
save_mean[c] = momentum * save_mean[c] + (1.0f - momentum) * run_mean[c];
save_var[c] = momentum * save_var[c] + (1.0f - momentum) * unbiased_var;
} }
} }

+ 1
- 1
mindspore/lite/nnacl/fp32_grad/activation_grad.c View File

@@ -72,7 +72,7 @@ int HSwishGrad(float *src0, float *src1, int length, float *dst) {


int HSigmoidGrad(float *src0, float *src1, int length, float *dst) { int HSigmoidGrad(float *src0, float *src1, int length, float *dst) {
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f));
float tmp = (src1[i] > 3.0f ? 0.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f));
dst[i] = tmp * src0[i]; dst[i] = tmp * src0[i];
} }
return NNACL_OK; return NNACL_OK;


+ 102
- 0
mindspore/lite/nnacl/fp32_grad/arithmetic_grad.c View File

@@ -15,6 +15,8 @@
*/ */


#include "nnacl/fp32_grad/arithmetic_grad.h" #include "nnacl/fp32_grad/arithmetic_grad.h"
#include <string.h>
#include "nnacl/fp32_grad/utils.h"


void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size) { void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size) {
for (int i = 0; i < element_size; i++) { for (int i = 0; i < element_size; i++) {
@@ -27,3 +29,103 @@ void ElementMulAndDivNegSquare(const float *a, const float *b, const float *deno
output[i] = -a[i] * b[i] / (denom[i] * denom[i]); output[i] = -a[i] * b[i] / (denom[i] * denom[i]);
} }
} }

void MaximumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims,
const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims) {
int num_output0 = 1;
int num_output1 = 1;
int same_shape = 1;
for (int idx = 0; idx < num_dims; ++idx) {
num_output0 *= input0_dims[idx];
num_output1 *= input1_dims[idx];
if (input0_dims[idx] != input1_dims[idx]) {
same_shape = 0;
}
}

if (same_shape) {
int input_iter[8] = {0};

// Iterate through input_data.
do {
size_t offset = GetInputOffset(num_dims, input0_dims, input_iter);
output0[offset] = input0[offset] > input1[offset] ? dy[offset] : 0.;
output1[offset] = input1[offset] >= input0[offset] ? dy[offset] : 0.;
} while (NextIndex(num_dims, input0_dims, input_iter));
} else {
memset(output0, 0, num_output0 * sizeof(float)); // zero output
memset(output1, 0, num_output1 * sizeof(float)); // zero output

int input_iter[8] = {0};
int axes0[5] = {0};
int axes1[5] = {0};
int num_axes0 = 0;
int num_axes1 = 0;
for (int i = 0; i < num_dims; i++) {
if (input0_dims[i] == 1) {
axes0[num_axes0++] = i;
}
if (input1_dims[i] == 1) {
axes1[num_axes1++] = i;
}
}

do {
size_t offset0 = GetOutputOffset(num_dims, input0_dims, input_iter, num_axes0, axes0);
size_t offset1 = GetOutputOffset(num_dims, input1_dims, input_iter, num_axes1, axes1);
size_t yt_offset = GetInputOffset(num_dims, input0_dims, input_iter);
output0[offset0] += input0[offset0] > input1[offset1] ? dy[yt_offset] : 0.;
output1[offset1] += input1[offset1] >= input0[offset0] ? dy[yt_offset] : 0.;
} while (NextIndex(num_dims, dy_dims, input_iter));
}
}

void MinimumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims,
const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims) {
int num_output0 = 1;
int num_output1 = 1;
int same_shape = 1;
for (int idx = 0; idx < num_dims; ++idx) {
num_output0 *= input0_dims[idx];
num_output1 *= input1_dims[idx];
if (input0_dims[idx] != input1_dims[idx]) {
same_shape = 0;
}
}

if (same_shape) {
int input_iter[8] = {0};

// Iterate through input_data.
do {
size_t offset = GetInputOffset(num_dims, input0_dims, input_iter);
output0[offset] = input0[offset] < input1[offset] ? dy[offset] : 0.;
output1[offset] = input1[offset] <= input0[offset] ? dy[offset] : 0.;
} while (NextIndex(num_dims, input0_dims, input_iter));
} else {
memset(output0, 0, num_output0 * sizeof(float)); // zero output
memset(output1, 0, num_output1 * sizeof(float)); // zero output

int input_iter[8] = {0};
int axes0[5] = {0};
int axes1[5] = {0};
int num_axes0 = 0;
int num_axes1 = 0;
for (int i = 0; i < num_dims; i++) {
if (input0_dims[i] == 1) {
axes0[num_axes0++] = i;
}
if (input1_dims[i] == 1) {
axes1[num_axes1++] = i;
}
}

do {
size_t offset0 = GetOutputOffset(num_dims, input0_dims, input_iter, num_axes0, axes0);
size_t offset1 = GetOutputOffset(num_dims, input1_dims, input_iter, num_axes1, axes1);
size_t yt_offset = GetInputOffset(num_dims, input0_dims, input_iter);
output0[offset0] += input0[offset0] < input1[offset1] ? dy[yt_offset] : 0.;
output1[offset1] += input1[offset1] <= input0[offset0] ? dy[yt_offset] : 0.;
} while (NextIndex(num_dims, dy_dims, input_iter));
}
}

+ 6
- 0
mindspore/lite/nnacl/fp32_grad/arithmetic_grad.h View File

@@ -16,11 +16,17 @@
#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_ARITHMETIC_GRAD_H_ #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_ARITHMETIC_GRAD_H_
#define MINDSPORE_LITE_NNACL_FP32_GRAD_ARITHMETIC_GRAD_H_ #define MINDSPORE_LITE_NNACL_FP32_GRAD_ARITHMETIC_GRAD_H_


#include "nnacl/op_base.h"

#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size); void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size);
void ElementMulAndDivNegSquare(const float *a, const float *b, const float *denom, float *output, int element_size); void ElementMulAndDivNegSquare(const float *a, const float *b, const float *denom, float *output, int element_size);
void MaximumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims,
const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims);
void MinimumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims,
const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 27
- 38
mindspore/lite/nnacl/fp32_grad/batch_norm.c View File

@@ -17,66 +17,55 @@
#include <string.h> #include <string.h>
#include "nnacl/fp32_grad/batch_norm.h" #include "nnacl/fp32_grad/batch_norm.h"


void sumSpatialBatch(const float *in, int size, int ch, float *out) {
void sumSpatialBatch(const float *in, size_t size, int ch, float *out) {
memset(out, 0, ch * sizeof(float)); memset(out, 0, ch * sizeof(float));
for (int i = 0; i < size; i++) {
const float *ptr = in + i * ch;
for (int c = 0; c < ch; c++) {
for (size_t i = 0; i < size; i++) {
const float *ptr = in + (i * ch);
for (size_t c = 0; c < ch; c++) {
out[c] += ptr[c]; out[c] += ptr[c];
} }
} }
} }


static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) {
float N = (float)(size);
sumSpatialBatch(in, N, ch, mean);
for (int f = 0; f < ch; ++f) {
mean[f] /= N;
}
for (int f = 0; f < ch; f++) {
float tvar = 0;
for (int i = 0; i < N; i++) {
float x = in[i * ch + f];
tvar += (x - mean[f]) * (x - mean[f]);
}
invar[f] = 1.0f / (sqrt(tvar / N + eps));
}
}

void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps,
float *mean, float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) {
meanVar(in, size, channels, eps, mean, invar);
for (int i = 0; i < size; i++) {
for (int f = 0; f < channels; f++) {
int ix = i * channels + f;
void backwardX(const float *in, const float *dout, const float *scale, const size_t size, int channels, float *mean,
float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) {
const float N = (size);
for (size_t i = 0; i < size; i++) {
for (size_t f = 0; f < channels; f++) {
size_t ix = i * channels + f;
float x_hat = (in[ix] - mean[f]) * invar[f]; float x_hat = (in[ix] - mean[f]) * invar[f];
float dxhat = dout[ix] * scale[f];
dxhat_sum[f] += dxhat;
dxhathat_sum[f] += dxhat * x_hat;
float dx_hat = dout[ix] * scale[f];
dxhat_sum[f] += dx_hat;
dxhathat_sum[f] += dx_hat * x_hat;
} }
} }
for (int i = 0; i < size; i++) {
for (int f = 0; f < channels; f++) {
int ix = i * channels + f;
for (size_t i = 0; i < size; i++) {
for (size_t f = 0; f < channels; f++) {
size_t ix = i * channels + f;
float x_hat = (in[ix] - mean[f]) * invar[f]; float x_hat = (in[ix] - mean[f]) * invar[f];
float dxhat = dout[ix] * scale[f];
out[ix] = 1.f / size * invar[f] * (size * dxhat - dxhat_sum[f] - x_hat * dxhathat_sum[f]);
float dx_hat = dout[ix] * scale[f];
out[ix] = 1.0f / N * (invar[f]) * (N * dx_hat - dxhat_sum[f] - x_hat * dxhathat_sum[f]);
} }
} }
} }


void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch,
int n, int size, float *scale_updates) {
int i, b, f;
void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n,
int size, float *scale_updates) {
size_t i, b, f;
memset(scale_updates, 0, n * sizeof(float)); memset(scale_updates, 0, n * sizeof(float));
for (b = 0; b < batch; ++b) { for (b = 0; b < batch; ++b) {
for (i = 0; i < size; ++i) { for (i = 0; i < size; ++i) {
for (f = 0; f < n; ++f) { for (f = 0; f < n; ++f) {
int index = (b * size + i) * n + f; int index = (b * size + i) * n + f;
float x_norm = (x[index] - mean[f]) * invar[f]; float x_norm = (x[index] - mean[f]) * invar[f];
scale_updates[f] += delta[index] * x_norm;
scale_updates[f] += (delta[index] * x_norm);
} }
} }
} }
} }


void var2Invar(float *save_var, size_t size, float eps) {
for (size_t i = 0; i < size; i++) {
save_var[i] = 1.0f / sqrt(save_var[i] + eps);
}
}

+ 6
- 5
mindspore/lite/nnacl/fp32_grad/batch_norm.h View File

@@ -29,11 +29,12 @@ typedef struct BNGradParameter {
extern "C" { extern "C" {
#endif #endif


void sumSpatialBatch(const float *in, int size, int ch, float *out);
void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps,
float *mean, float *invar, float *xhat_sum, float *dxhat_sum, float *out);
void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch,
int n, int size, float *scale_updates);
void sumSpatialBatch(const float *in, size_t size, int ch, float *out);
void backwardX(const float *in, const float *dout, const float *scale, const size_t size, int channels, float *mean,
float *invar, float *xhat_sum, float *dxhat_sum, float *out);
void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n,
int size, float *scale_updates);
void var2Invar(float *save_var, size_t size, float eps);


#ifdef __cplusplus #ifdef __cplusplus
} }


+ 23
- 0
mindspore/lite/nnacl/fp32_grad/dropout_grad.c View File

@@ -0,0 +1,23 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "nnacl/fp32_grad/dropout_grad.h"

void DropoutGrad(const float *yt_ptr, const float *mask, float *output_ptr, int length, float scale) {
for (int i = 0; i < length; i++) {
output_ptr[i] = yt_ptr[i] * mask[i] * scale;
}
}

+ 31
- 0
mindspore/lite/nnacl/fp32_grad/dropout_grad.h View File

@@ -0,0 +1,31 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_
#define MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_

#include "nnacl/op_base.h"

#ifdef __cplusplus
extern "C" {
#endif

void DropoutGrad(const float *yt_ptr, const float *mask, float *output_ptr, int length, float ratio);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_

+ 27
- 0
mindspore/lite/nnacl/fp32_grad/dropout_parameter.h View File

@@ -0,0 +1,27 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_
#define MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_

#include "nnacl/op_base.h"

typedef struct DropoutParameter {
OpParameter op_parameter_;
float ratio_;
} DropoutParameter;

#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_

+ 504
- 150
mindspore/lite/nnacl/fp32_grad/gemm.c View File

@@ -16,182 +16,536 @@


#include "nnacl/fp32_grad/gemm.h" #include "nnacl/fp32_grad/gemm.h"
#include <string.h> #include <string.h>
#ifdef __ARM_NEON
#include <arm_neon.h>
#endif
#include "nnacl/fp32/matmul.h"


static void gemm_not_trana_not_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb,
float *mat_c, int ldc) {
const int block_size = 4;
int block_mod = N % block_size;
int block_c4 = N - block_mod;

int i, j, k;
for (i = 0; i < M; ++i) {
for (k = 0; k < K; ++k) {
float a = alpha * mat_a[i * lda + k];
for (j = 0; j < block_c4; j += block_size) {
float *b = &mat_b[k * ldb + j];
float *c = &mat_c[i * ldc + j];
c[0] += a * b[0];
c[1] += a * b[1];
c[2] += a * b[2];
c[3] += a * b[3];
}
for (; j < N; ++j) {
mat_c[i * ldc + j] += a * mat_b[k * ldb + j];
}
static void addv(const float *restrict v1, float *restrict v2, float beta, int row, int col, int stride) {
const float *src_ptr = v1;
float *dst_ptr = v2;
for (int r = 0; r < row; r++) {
for (int c = 0; c < col; c++) {
dst_ptr[c] += beta * src_ptr[c];
}
src_ptr += stride;
dst_ptr += stride;
}
}

int MatSize(int row, int col, int round) {
int res = UP_ROUND(row, round) * col;
return res;
}

int MatSizeTotal(int row, int col, int deep, int stride) {
#ifdef ENABLE_ARM32
const int num = C4NUM;
#else
const int num = C12NUM;
#endif
int res = MatSize(row, deep, num) + MatSize(col, deep, C8NUM);
if (stride > 0) res += row * stride;
return res;
}
#ifdef ENABLE_ARM32
static void RowMajor2Row4MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) {
for (int r = 0; r < row; r++) {
const float *src = src_ptr + r * lead;
for (int c = 0; c < col; c++) {
int cd8 = c / 4;
int cm8 = c % 4;
dst_ptr[cd8 * 4 * row + r * 4 + cm8] = src[c];
}
}
}
#endif

static void RowMajor2Row8MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) {
for (int r = 0; r < row; r++) {
const float *src = src_ptr + r * lead;
for (int c = 0; c < col; c++) {
int cd8 = c / 8;
int cm8 = c % 8;
dst_ptr[cd8 * 8 * row + r * 8 + cm8] = src[c];
}
}
return;
}

#ifndef ENABLE_ARM32
static void RowMajor2Row12MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) {
for (int r = 0; r < row; r++) {
const float *src = src_ptr + r * lead;
for (int c = 0; c < col; c++) {
int cd8 = c / C12NUM;
int cm8 = c % C12NUM;
dst_ptr[cd8 * C12NUM * row + r * C12NUM + cm8] = src[c];
} }
} }
return;
} }


static void gemm_not_trana_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb,
float *mat_c, int ldc) {
const int block_size = 4;
int block_mod = K % block_size;
int block_c4 = K - block_mod;

int i, j, k;
for (i = 0; i < M; ++i) {
for (j = 0; j < N; ++j) {
float sum = 0;
for (k = 0; k < block_c4; k += block_size) {
float *a = &mat_a[i * lda + k];
float *b = &mat_b[j * ldb + k];
sum += alpha * a[0] * b[0];
sum += alpha * a[1] * b[1];
sum += alpha * a[2] * b[2];
sum += alpha * a[3] * b[3];
static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) {
size_t row_up_12 = UP_ROUND(row, C12NUM);
size_t row12 = row / C12NUM * C12NUM;
size_t col4 = col / C4NUM * C4NUM;
const float *src_r = src_ptr;
float *dst_r = dst_ptr;

size_t ri = 0;
for (; ri < row12; ri += C12NUM) {
size_t ci = 0;
for (; ci < col4; ci += C4NUM) {
const float *src_c = src_r + ci;
float *dst_c = dst_r + ci * C12NUM;

/* 12x4 row-major to col-major */
#ifdef ENABLE_ARM64
size_t stride = lead * sizeof(float);
asm volatile(
"mov x10, %[src_c]\n"
"mov x11, %[dst_c]\n"

"ld1 {v0.4s}, [x10], %[stride]\n"
"ld1 {v1.4s}, [x10], %[stride]\n"
"ld1 {v2.4s}, [x10], %[stride]\n"
"ld1 {v3.4s}, [x10], %[stride]\n"

"ld1 {v4.4s}, [x10], %[stride]\n"
"ld1 {v5.4s}, [x10], %[stride]\n"
"ld1 {v6.4s}, [x10], %[stride]\n"
"ld1 {v7.4s}, [x10], %[stride]\n"

"zip1 v12.4s, v0.4s, v1.4s\n"
"zip2 v13.4s, v0.4s, v1.4s\n"
"zip1 v14.4s, v2.4s, v3.4s\n"
"zip2 v15.4s, v2.4s, v3.4s\n"

"ld1 {v8.4s}, [x10], %[stride]\n"
"ld1 {v9.4s}, [x10], %[stride]\n"
"ld1 {v10.4s}, [x10], %[stride]\n"
"ld1 {v11.4s}, [x10], %[stride]\n"

"zip1 v16.4s, v4.4s, v5.4s\n"
"zip2 v17.4s, v4.4s, v5.4s\n"
"zip1 v18.4s, v6.4s, v7.4s\n"
"zip2 v19.4s, v6.4s, v7.4s\n"

"trn1 v20.2d, v12.2d, v14.2d\n"
"trn2 v23.2d, v12.2d, v14.2d\n"
"trn1 v26.2d, v13.2d, v15.2d\n"
"trn2 v29.2d, v13.2d, v15.2d\n"

"trn1 v21.2d, v16.2d, v18.2d\n"
"trn2 v24.2d, v16.2d, v18.2d\n"
"trn1 v27.2d, v17.2d, v19.2d\n"
"trn2 v30.2d, v17.2d, v19.2d\n"

"zip1 v12.4s, v8.4s, v9.4s\n"
"zip2 v13.4s, v8.4s, v9.4s\n"
"zip1 v14.4s, v10.4s, v11.4s\n"
"zip2 v15.4s, v10.4s, v11.4s\n"

"trn1 v22.2d, v12.2d, v14.2d\n"
"trn2 v25.2d, v12.2d, v14.2d\n"
"trn1 v28.2d, v13.2d, v15.2d\n"
"trn2 v31.2d, v13.2d, v15.2d\n"

"st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x11], #64\n"
"st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x11], #64\n"
"st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x11], #64\n"

:
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
: "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"v30", "v31");
#elif ENABLE_ARM32
size_t stride = lead * sizeof(float);
asm volatile(
"mov r10, %[src_c]\n"
"mov r12, %[dst_c]\n"

"vld1.32 {q0}, [r10], %[stride]\n"
"vld1.32 {q3}, [r10], %[stride]\n"
"vld1.32 {q10}, [r10], %[stride]\n"
"vld1.32 {q13}, [r10], %[stride]\n"

"vtrn.32 d0, d6\n"
"vtrn.32 d1, d7\n"
"vtrn.32 d20, d26\n"
"vtrn.32 d21, d27\n"

"vld1.32 {q1}, [r10], %[stride]\n"
"vld1.32 {q8}, [r10], %[stride]\n"
"vld1.32 {q11}, [r10], %[stride]\n"
"vld1.32 {q14}, [r10], %[stride]\n"

"vswp d1, d20\n"
"vswp d7, d26\n"

"vld1.32 {q2}, [r10], %[stride]\n"
"vld1.32 {q9}, [r10], %[stride]\n"
"vld1.32 {q12}, [r10], %[stride]\n"
"vld1.32 {q15}, [r10], %[stride]\n"

"vtrn.32 d2, d16\n"
"vtrn.32 d3, d17\n"
"vtrn.32 d22, d28\n"
"vtrn.32 d23, d29\n"

"vswp d3, d22\n"
"vswp d17, d28\n"

"vtrn.32 d4, d18\n"
"vtrn.32 d5, d19\n"
"vtrn.32 d24, d30\n"
"vtrn.32 d25, d31\n"

"vswp d5, d24\n"
"vswp d19, d30\n"

"vst1.32 {q0, q1}, [r12]!\n"
"vst1.32 {q2, q3}, [r12]!\n"
"vst1.32 {q8, q9}, [r12]!\n"
"vst1.32 {q10, q11}, [r12]!\n"
"vst1.32 {q12, q13}, [r12]!\n"
"vst1.32 {q14, q15}, [r12]!\n"

:
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
: "r10", "r12", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15");
#else
for (int tr = 0; tr < C12NUM; tr++) {
for (int tc = 0; tc < C4NUM; tc++) {
dst_c[tc * C12NUM + tr] = src_c[tr * lead + tc];
}
} }
for (; k < K; ++k) {
sum += alpha * mat_a[i * lda + k] * mat_b[j * ldb + k];
#endif
}
for (; ci < col; ci++) {
const float *src_c = src_r + ci;
float *dst_c = dst_r + ci * C12NUM;
for (size_t i = 0; i < C12NUM; i++) {
dst_c[i] = src_c[i * lead];
} }
mat_c[i * ldc + j] += sum;
} }
src_r += C12NUM * lead;
dst_r += C12NUM * col;
} }

for (; ri < row; ri++) {
for (size_t i = 0; i < col; i++) {
dst_r[i * C12NUM] = src_r[i];
}
src_r += lead;
dst_r += 1;
}

for (; ri < row_up_12; ri++) {
for (size_t i = 0; i < col; i++) {
dst_r[i * C12NUM] = 0;
}
dst_r += 1;
}
return;
} }
#endif

static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) {
size_t row8 = row / C8NUM * C8NUM;
#ifdef ENABLE_ARM64
size_t col_skip = col / C8NUM * C8NUM;
int skip_size = C8NUM;
#else
size_t col_skip = col / C4NUM * C4NUM;
int skip_size = C4NUM;
#endif
const float *src_r = src_ptr;
float *dst_r = dst_ptr;

size_t ri = 0;
for (; ri < row8; ri += C8NUM) {
size_t ci = 0;
for (; ci < col_skip; ci += skip_size) {
const float *src_c = src_r + ci;
float *dst_c = dst_r + ci * C8NUM;

#ifdef ENABLE_ARM64
/* 8x8 row-major to col-major */
size_t stride = lead * sizeof(float);
asm volatile(
"mov x10, %[src_c]\n"
"mov x11, %[dst_c]\n"

"ld1 {v0.4s, v1.4s}, [x10], %[stride]\n"
"ld1 {v2.4s, v3.4s}, [x10], %[stride]\n"
"ld1 {v4.4s, v5.4s}, [x10], %[stride]\n"
"ld1 {v6.4s, v7.4s}, [x10], %[stride]\n"

"zip1 v8.4s, v0.4s, v2.4s\n"
"zip2 v9.4s, v0.4s, v2.4s\n"
"zip1 v10.4s, v4.4s, v6.4s\n"
"zip2 v11.4s, v4.4s, v6.4s\n"

"ld1 {v16.4s, v17.4s}, [x10], %[stride]\n"
"ld1 {v18.4s, v19.4s}, [x10], %[stride]\n"
"ld1 {v20.4s, v21.4s}, [x10], %[stride]\n"
"ld1 {v22.4s, v23.4s}, [x10], %[stride]\n"

"zip1 v12.4s, v1.4s, v3.4s\n"
"zip2 v13.4s, v1.4s, v3.4s\n"
"zip1 v14.4s, v5.4s, v7.4s\n"
"zip2 v15.4s, v5.4s, v7.4s\n"

"trn1 v0.2d, v8.2d, v10.2d\n"
"trn2 v1.2d, v8.2d, v10.2d\n"
"trn1 v2.2d, v9.2d, v11.2d\n"
"trn2 v3.2d, v9.2d, v11.2d\n"

"zip1 v24.4s, v16.4s, v18.4s\n"
"zip2 v25.4s, v16.4s, v18.4s\n"
"zip1 v26.4s, v20.4s, v22.4s\n"
"zip2 v27.4s, v20.4s, v22.4s\n"

"trn1 v4.2d, v12.2d, v14.2d\n"
"trn2 v5.2d, v12.2d, v14.2d\n"
"trn1 v6.2d, v13.2d, v15.2d\n"
"trn2 v7.2d, v13.2d, v15.2d\n"

"zip1 v28.4s, v17.4s, v19.4s\n"
"zip2 v29.4s, v17.4s, v19.4s\n"
"zip1 v30.4s, v21.4s, v23.4s\n"
"zip2 v31.4s, v21.4s, v23.4s\n"

"trn1 v16.2d, v24.2d, v26.2d\n"
"trn2 v17.2d, v24.2d, v26.2d\n"
"trn1 v18.2d, v25.2d, v27.2d\n"
"trn2 v19.2d, v25.2d, v27.2d\n"

"trn1 v20.2d, v28.2d, v30.2d\n"
"trn2 v21.2d, v28.2d, v30.2d\n"
"trn1 v22.2d, v29.2d, v31.2d\n"
"trn2 v23.2d, v29.2d, v31.2d\n"

"st1 {v0.4s}, [x11], #16\n"
"st1 {v16.4s}, [x11], #16\n"
"st1 {v1.4s}, [x11], #16\n"
"st1 {v17.4s}, [x11], #16\n"
"st1 {v2.4s}, [x11], #16\n"
"st1 {v18.4s}, [x11], #16\n"
"st1 {v3.4s}, [x11], #16\n"
"st1 {v19.4s}, [x11], #16\n"
"st1 {v4.4s}, [x11], #16\n"
"st1 {v20.4s}, [x11], #16\n"
"st1 {v5.4s}, [x11], #16\n"
"st1 {v21.4s}, [x11], #16\n"
"st1 {v6.4s}, [x11], #16\n"
"st1 {v22.4s}, [x11], #16\n"
"st1 {v7.4s}, [x11], #16\n"
"st1 {v23.4s}, [x11], #16\n"

:
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
: "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"v30", "v31");
#elif ENABLE_ARM32
/* 8x4 row-major to col-major */
size_t stride = col * sizeof(float);
asm volatile(
"mov r10, %[src_c]\n"
"mov r11, %[dst_c]\n"

"vld1.32 {q0}, [r10], %[stride]\n"
"vld1.32 {q2}, [r10], %[stride]\n"
"vld1.32 {q4}, [r10], %[stride]\n"
"vld1.32 {q6}, [r10], %[stride]\n"

"vtrn.32 d0, d4\n"
"vtrn.32 d1, d5\n"
"vtrn.32 d8, d12\n"
"vtrn.32 d9, d13\n"

"vld1.32 {q1}, [r10], %[stride]\n"
"vld1.32 {q3}, [r10], %[stride]\n"
"vld1.32 {q5}, [r10], %[stride]\n"
"vld1.32 {q7}, [r10], %[stride]\n"


static void gemm_trana_not_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb,
float *mat_c, int ldc) {
const int block_size = 4;
int block_mod = N % block_size;
int block_c4 = N - block_mod;

int i, j, k;
for (i = 0; i < M; ++i) {
for (k = 0; k < K; ++k) {
float a = alpha * mat_a[k * lda + i];
for (j = 0; j < block_c4; j += block_size) {
float *b = &mat_b[k * ldb + j];
float *c = &mat_c[i * ldc + j];
c[0] += a * b[0];
c[1] += a * b[1];
c[2] += a * b[2];
c[3] += a * b[3];
"vswp d1, d8\n"
"vswp d5, d12\n"

"vtrn.32 d2, d6\n"
"vtrn.32 d3, d7\n"
"vtrn.32 d10, d14\n"
"vtrn.32 d11, d15\n"

"vswp d3, d10\n"
"vswp d7, d14\n"

"vst1.32 {q0, q1}, [r11]!\n"
"vst1.32 {q2, q3}, [r11]!\n"
"vst1.32 {q4, q5}, [r11]!\n"
"vst1.32 {q6, q7}, [r11]!\n"

:
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
: "r10", "r11", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
#else
for (int tr = 0; tr < 8; tr++) {
for (int tc = 0; tc < 4; tc++) {
dst_c[tc * 8 + tr] = src_c[tr * lead + tc];
}
} }
for (; j < N; ++j) {
mat_c[i * ldc + j] += a * mat_b[k * ldb + j];
#endif
}
for (; ci < col; ci++) {
const float *src_c = src_r + ci;
float *dst_c = dst_r + ci * C8NUM;
for (size_t i = 0; i < C8NUM; i++) {
dst_c[i] = src_c[i * lead];
} }
} }
src_r += C8NUM * lead;
dst_r += C8NUM * col;
} }
for (; ri < row; ri++) {
for (size_t i = 0; i < col; i++) {
dst_r[i * C8NUM] = src_r[i];
}
src_r += lead;
dst_r += 1;
}
return;
} }
#ifdef ENABLE_ARM32
static void RowMajor2Col4MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) {
size_t row8 = row / C4NUM * C4NUM;
size_t col4 = col / C4NUM * C4NUM;
const float *src_r = src_ptr;
float *dst_r = dst_ptr;

size_t ri = 0;
for (; ri < row8; ri += C4NUM) {
size_t ci = 0;
for (; ci < col4; ci += C4NUM) {
const float *src_c = src_r + ci;
float *dst_c = dst_r + ci * C4NUM;

/* 4x4 row-major to col-major */
#ifdef ENABLE_ARM32
size_t stride = col * 4;
asm volatile(
"mov r10, %[src_c]\n"
"mov r12, %[dst_c]\n"

"vld1.32 {q0}, [r10], %[stride]\n"
"vld1.32 {q1}, [r10], %[stride]\n"
"vld1.32 {q2}, [r10], %[stride]\n"
"vld1.32 {q3}, [r10], %[stride]\n"

"vtrn.32 d0, d2\n"
"vtrn.32 d1, d3\n"
"vtrn.32 d4, d6\n"
"vtrn.32 d5, d7\n"


static void gemm_trana_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb,
float *mat_c, int ldc) {
int i, j, k;
const int block_size = 4;
int k_block_mod = K % block_size;
int k_block_c4 = K - k_block_mod;

int m_block_mod = M % block_size;
int m_block_c4 = M - m_block_mod;

for (i = 0; i < m_block_c4; i += block_size) {
for (j = 0; j < N; ++j) {
float sum0 = 0;
float sum1 = 0;
float sum2 = 0;
float sum3 = 0;

for (k = 0; k < k_block_c4; k += block_size) {
float *b = &mat_b[j * ldb + k];
sum0 += alpha * mat_a[i + k * lda] * b[0];
sum0 += alpha * mat_a[i + (k + 1) * lda] * b[1];
sum0 += alpha * mat_a[i + (k + 2) * lda] * b[2];
sum0 += alpha * mat_a[i + (k + 3) * lda] * b[3];

sum1 += alpha * mat_a[i + 1 + k * lda] * b[0];
sum1 += alpha * mat_a[i + 1 + (k + 1) * lda] * b[1];
sum1 += alpha * mat_a[i + 1 + (k + 2) * lda] * b[2];
sum1 += alpha * mat_a[i + 1 + (k + 3) * lda] * b[3];

sum2 += alpha * mat_a[i + 2 + k * lda] * b[0];
sum2 += alpha * mat_a[i + 2 + (k + 1) * lda] * b[1];
sum2 += alpha * mat_a[i + 2 + (k + 2) * lda] * b[2];
sum2 += alpha * mat_a[i + 2 + (k + 3) * lda] * b[3];

sum3 += alpha * mat_a[i + 3 + k * lda] * b[0];
sum3 += alpha * mat_a[i + 3 + (k + 1) * lda] * b[1];
sum3 += alpha * mat_a[i + 3 + (k + 2) * lda] * b[2];
sum3 += alpha * mat_a[i + 3 + (k + 3) * lda] * b[3];
"vswp d1, d4\n"
"vswp d3, d6\n"

"vst1.32 {q0}, [r12]!\n"
"vst1.32 {q1}, [r12]!\n"
"vst1.32 {q2}, [r12]!\n"
"vst1.32 {q3}, [r12]!\n"

:
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
: "r10", "r12", "q0", "q1", "q2", "q3");
#else
for (int tr = 0; tr < C4NUM; tr++) {
for (int tc = 0; tc < C4NUM; tc++) {
dst_c[tc * C4NUM + tr] = src_c[tr * lead + tc];
}
} }
for (; k < K; ++k) {
float *b = &mat_b[j * ldb + k];
sum0 += alpha * mat_a[i + (k * lda)] * b[0];
sum1 += alpha * mat_a[i + 1 + (k * lda)] * b[0];
sum2 += alpha * mat_a[i + 2 + (k * lda)] * b[0];
sum3 += alpha * mat_a[i + 3 + (k * lda)] * b[0];
#endif
}
for (; ci < col; ci++) {
const float *src_c = src_r + ci;
float *dst_c = dst_r + ci * C4NUM;
for (size_t i = 0; i < C4NUM; i++) {
dst_c[i] = src_c[i * lead];
} }
mat_c[i * ldc + j] += sum0;
mat_c[(i + 1) * ldc + j] += sum1;
mat_c[(i + 2) * ldc + j] += sum2;
mat_c[(i + 3) * ldc + j] += sum3;
} }
src_r += C4NUM * col;
dst_r += C4NUM * col;
} }
// no more block of 4x4
for (; i < M; ++i) {
for (j = 0; j < N; ++j) {
float sum = 0;
for (k = 0; k < K; ++k) {
sum += alpha * mat_a[i + k * lda] * mat_b[k + j * ldb];
}
mat_c[i * ldc + j] += sum;
for (; ri < row; ri++) {
for (size_t i = 0; i < col; i++) {
dst_r[i * C4NUM] = src_r[i];
} }
src_r += lead;
dst_r += 1;
} }
return;
}
#endif

void GemmMatmul(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b,
int ldb, float beta, float *mat_c, int ldc, float *workspace) {
GemmCb gcb;
gcb.atype = ActType_No;
gcb.ca = 0;
gcb.cb = 0;
gcb.bias = NULL;
GemmMatmulPlus(ta, tb, M, N, K, alpha, mat_a, lda, mat_b, ldb, beta, mat_c, ldc, workspace, &gcb);
} }


// mat_c = alpha*op( mat_a )*op( mat_b ) + beta*C
// M - number of rows of matrix a
// N - number of cols of matrix b
// K - number of cols of matrix a
// lda - fast dim of matrix a
// ldb - fast dim of matrix b
// ldc - fast dim of matrix c
void gemm(int transpose_a, int transpose_b, int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b,
int ldb, float beta, float *mat_c, int ldc) {
if (beta >= 0.f && beta <= 0.f) {
memset(mat_c, 0, M * N * sizeof(float));
} else if (beta < 1.f || beta > 1.f) {
const int block_size = 4;
const int size = M * N;
int block_mod = size % block_size;
int block_c4 = size - block_mod;
int i;
for (i = 0; i < block_c4; i += block_size) {
float *c = &mat_c[i];
c[0] *= beta;
c[1] *= beta;
c[2] *= beta;
c[3] *= beta;
}
for (; i < size; ++i) {
mat_c[i] *= beta;
void GemmMatmulPlus(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b,
int ldb, float beta, float *mat_c, int ldc, float *workspace, GemmCb *gcb) {
#ifdef ENABLE_ARM32
const int num = C4NUM;
#else
const int num = C12NUM;
#endif
float *output = mat_c;
float *fworkspace = workspace;
int incremental = (beta < 0.f) || (beta > 0.f);
float *mat_a_input = (float *)mat_a;
float *mat_b_input = (float *)mat_b;

#ifdef ENABLE_ARM32
if (!gcb->ca) {
mat_a_input = fworkspace;
fworkspace += MatSize(M, K, num);
if (ta) {
RowMajor2Row4MajorStride(mat_a, mat_a_input, K, M, lda);
} else {
RowMajor2Col4MajorStride(mat_a, mat_a_input, M, K, lda);
} }
} }
if (transpose_a && transpose_b) {
gemm_trana_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc);
} else if (!transpose_a && !transpose_b) {
gemm_not_trana_not_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc);
} else if (!transpose_a && transpose_b) {
gemm_not_trana_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc);
} else {
gemm_trana_not_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc);
#else
if (!gcb->ca) {
mat_a_input = fworkspace;
fworkspace += MatSize(M, K, num);
if (ta) {
RowMajor2Row12MajorStride(mat_a, mat_a_input, K, M, lda);
} else {
RowMajor2Col12MajorStride(mat_a, mat_a_input, M, K, lda);
}
}
#endif
if (!gcb->cb) {
mat_b_input = fworkspace;
fworkspace += MatSize(N, K, C8NUM);
if (tb) {
RowMajor2Col8MajorStride(mat_b, mat_b_input, N, K, ldb);
} else {
RowMajor2Row8MajorStride(mat_b, mat_b_input, K, N, ldb);
}
} }
if (incremental) output = fworkspace;
MatMulOpt(mat_a_input, mat_b_input, output, gcb->bias, gcb->atype, K, M, N, ldc, OutType_Nhwc);
if (incremental) addv(output, mat_c, beta, M, N, ldc);
gcb->mat_a = mat_a_input;
gcb->mat_b = mat_b_input;
} }

+ 17
- 2
mindspore/lite/nnacl/fp32_grad/gemm.h View File

@@ -17,11 +17,26 @@
#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_GEMM_H_ #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_GEMM_H_
#define MINDSPORE_LITE_NNACL_FP32_GRAD_GEMM_H_ #define MINDSPORE_LITE_NNACL_FP32_GRAD_GEMM_H_


#include <stdlib.h>
#include "nnacl/op_base.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
void gemm(int transpose_a, int transpose_b, int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b,
int ldb, float beta, float *mat_c, int ldc);
typedef struct {
int ca;
int cb;
ActType atype;
float *bias;
float *mat_a;
float *mat_b;
} GemmCb;

void GemmMatmulPlus(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b,
int ldb, float beta, float *mat_c, int ldc, float *workspace, GemmCb *cb);
void GemmMatmul(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b,
int ldb, float beta, float *mat_c, int ldc, float *workspace);
int MatSize(int row, int col, int round);
int MatSizeTotal(int row, int col, int deep, int inc);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 123
- 24
mindspore/lite/nnacl/fp32_grad/pack_ext.c View File

@@ -16,10 +16,11 @@


#include <string.h> #include <string.h>
#include "nnacl/fp32_grad/pack_ext.h" #include "nnacl/fp32_grad/pack_ext.h"
#include "nnacl/pack.h"


static int is_a_ge_zero_and_a_lt_b(int a, int b) { return (unsigned)(a) < (unsigned)(b); } static int is_a_ge_zero_and_a_lt_b(int a, int b) { return (unsigned)(a) < (unsigned)(b); }


void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param) {
void rolling_im2col_hwc(const float *in_data, float *data_col, const ConvParameter *conv_param, int rows, int start) {
const int pad_left = conv_param->pad_l_; const int pad_left = conv_param->pad_l_;
const int pad_up = conv_param->pad_u_; const int pad_up = conv_param->pad_u_;


@@ -35,42 +36,42 @@ void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param
const int in_height = conv_param->input_h_; const int in_height = conv_param->input_h_;
const int in_width = conv_param->input_w_; const int in_width = conv_param->input_w_;


const int output_h = conv_param->output_h_;
const int output_w = conv_param->output_w_; const int output_w = conv_param->output_w_;


const int channels = conv_param->input_channel_ / conv_param->group_; const int channels = conv_param->input_channel_ / conv_param->group_;
const int tot_channels = conv_param->input_channel_; const int tot_channels = conv_param->input_channel_;


int kernel_row, kernel_col, output_rows, output_col;

int row_stride_offset = 0;
int kernel_row, kernel_col;


for (output_rows = output_h; output_rows; output_rows--) {
int col_stride_offset = 0;
for (output_col = output_w; output_col; output_col--) {
for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
int input_row = -pad_up + kernel_row * dilation_h + row_stride_offset;
for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
int input_col = -pad_left + kernel_col * dilation_w + col_stride_offset;
for (int i = 0; i < rows; i++) {
int block_start = start + i;
int input_h = block_start / output_w * stride_h;
int input_w = block_start % output_w * stride_w;
for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
int input_row = -pad_up + kernel_row * dilation_h + input_h;
for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
int input_col = -pad_left + kernel_col * dilation_w + input_w;


if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) {
const int offset = (input_row * in_width + input_col) * tot_channels;
memcpy(data_col, in_data + offset, sizeof(float) * channels);
data_col += channels;
} else {
memset(data_col, 0, sizeof(float) * channels);
data_col += channels;
}
if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) {
const int offset = (input_row * in_width + input_col) * tot_channels;
memcpy(data_col, in_data + offset, sizeof(float) * channels);
data_col += channels;
} else {
memset(data_col, 0, sizeof(float) * channels);
data_col += channels;
} }
} }
col_stride_offset += stride_w;
} }
row_stride_offset += stride_h;
} }
} }


void RollingIm2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input,
int real_cal_num, int block_index) {
rolling_im2col_hwc(input_data, packed_input, conv_param, real_cal_num, block_index);
}

// output matrix is (kernel_h*kernel_w*channels)X(output_h*output_w) // output matrix is (kernel_h*kernel_w*channels)X(output_h*output_w)
void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param, bool transpose) {
void im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, bool transpose) {
const int pad_left = conv_param->pad_l_; const int pad_left = conv_param->pad_l_;
const int pad_up = conv_param->pad_u_; const int pad_up = conv_param->pad_u_;


@@ -150,7 +151,56 @@ void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param
} }
} }


void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param) {
void rolling_im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, int rows, int start) {
const int pad_left = conv_param->pad_l_;
const int pad_up = conv_param->pad_u_;

const int stride_h = conv_param->stride_h_;
const int stride_w = conv_param->stride_w_;

const int dilation_h = conv_param->dilation_h_;
const int dilation_w = conv_param->dilation_w_;

const int kernel_h = conv_param->kernel_h_;
const int kernel_w = conv_param->kernel_w_;

const int in_height = conv_param->output_h_;
const int in_width = conv_param->output_w_;

const int output_w = conv_param->input_w_;

const int tot_channels = conv_param->output_channel_;
const int channels = tot_channels / conv_param->group_;
int channel, kernel_row, kernel_col, output_rows, output_col;
for (channel = 0; channel < channels; channel++) {
for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
for (output_rows = start; output_rows < start + rows; output_rows++) {
int input_row = -pad_up + kernel_row * dilation_h + output_rows * stride_h;
if (!is_a_ge_zero_and_a_lt_b(input_row, in_height)) {
for (output_col = output_w; output_col; output_col--) {
*(data_row++) = 0;
}
} else {
int input_col = -pad_left + kernel_col * dilation_w;
for (output_col = output_w; output_col; output_col--) {
if (is_a_ge_zero_and_a_lt_b(input_col, in_width)) {
const int offset = (input_row * in_width + input_col) * tot_channels + channel;
*(data_row++) = in_data[offset];
} else {
*(data_row++) = 0;
}
input_col += stride_w;
}
}
// input_row += stride_h;
}
}
}
}
}

void col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param) {
const int pad_left = conv_param->pad_l_; const int pad_left = conv_param->pad_l_;
const int pad_up = conv_param->pad_u_; const int pad_up = conv_param->pad_u_;


@@ -198,3 +248,52 @@ void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param
row_stride_offset += stride_h; row_stride_offset += stride_h;
} }
} }

void rolling_col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param, int rows, int start) {
const int pad_left = conv_param->pad_l_;
const int pad_up = conv_param->pad_u_;

const int stride_h = conv_param->stride_h_;
const int stride_w = conv_param->stride_w_;

const int dilation_h = conv_param->dilation_h_;
const int dilation_w = conv_param->dilation_w_;

const int kernel_h = conv_param->kernel_h_;
const int kernel_w = conv_param->kernel_w_;

const int in_height = conv_param->input_h_;
const int in_width = conv_param->input_w_;

const int output_w = conv_param->output_w_;
const int channels = conv_param->input_channel_ / conv_param->group_;
const int tot_channels = conv_param->input_channel_;

int kernel_row, kernel_col;

for (int r = 0; r < rows; r++) {
int output_col = (start + r) % output_w;
int output_row = (start + r) / output_w;
int row_stride_offset = output_row * stride_h;
int col_stride_offset = output_col * stride_w;

// for (output_col = 0; output_col < output_w; output_col++)
{
for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
int input_row = -pad_up + kernel_row * dilation_h + row_stride_offset;
for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
int input_col = -pad_left + kernel_col * dilation_w + col_stride_offset;

if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) {
int offset = (input_row * in_width + input_col) * tot_channels;
float *data_im_ptr = &data_im[offset];
for (int i = 0; i < channels; i++) {
data_im_ptr[i] += data_col[i];
}
}
data_col += channels;
}
}
}
}
}

+ 7
- 3
mindspore/lite/nnacl/fp32_grad/pack_ext.h View File

@@ -17,14 +17,18 @@
#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_PACK_EXT_H_ #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_PACK_EXT_H_
#define MINDSPORE_LITE_NNACL_FP32_GRAD_PACK_EXT_H_ #define MINDSPORE_LITE_NNACL_FP32_GRAD_PACK_EXT_H_


#include <stddef.h>
#include "nnacl/conv_parameter.h" #include "nnacl/conv_parameter.h"


#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param);
void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param, bool transpose);
void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param);

void RollingIm2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input,
int real_cal_num, int block_index);
void rolling_im2col_hwc(const float *in_data, float *data_col, const ConvParameter *conv_param, int rows, int start);
void rolling_im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, int rows, int start);
void rolling_col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param, int rows, int start);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 3
- 4
mindspore/lite/nnacl/fp32_grad/pooling_grad.c View File

@@ -14,6 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
#include <stdint.h> #include <stdint.h>
#include <string.h>
#include <float.h> #include <float.h>
#include "nnacl/fp32_grad/pooling_grad.h" #include "nnacl/fp32_grad/pooling_grad.h"


@@ -31,8 +32,7 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter
int output_h = pooling_param->output_h_; int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_; int output_batch = pooling_param->output_batch_;


for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0;

memset(output_ptr, 0, in_h * in_w * channel * output_batch * sizeof(float));
float kk = (float)(win_h * win_w); float kk = (float)(win_h * win_w);
for (uint16_t ib = 0; ib < output_batch; ib++) { for (uint16_t ib = 0; ib < output_batch; ib++) {
float *out = &output_ptr[(ib * in_h * in_w * channel)]; float *out = &output_ptr[(ib * in_h * in_w * channel)];
@@ -77,8 +77,7 @@ void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy
int output_h = pooling_param->output_h_; int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_; int output_batch = pooling_param->output_batch_;


for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0;

memset(output_ptr, 0, in_h * in_w * channel * output_batch * sizeof(float));
for (uint16_t ib = 0; ib < output_batch; ib++) { for (uint16_t ib = 0; ib < output_batch; ib++) {
float *out = &output_ptr[(ib * in_h * in_w * channel)]; float *out = &output_ptr[(ib * in_h * in_w * channel)];
const float *inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]); const float *inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]);


+ 2
- 45
mindspore/lite/nnacl/fp32_grad/reduce_grad.c View File

@@ -15,50 +15,7 @@
*/ */
#include <string.h> #include <string.h>
#include "nnacl/fp32_grad/reduce_grad.h" #include "nnacl/fp32_grad/reduce_grad.h"

static inline int NextIndex(const int num_dims, const int *dims, int *current) {
int carry = 1;
for (int idx = num_dims - 1; idx >= 0; --idx) {
int current_val = current[idx] + carry;
if (dims[idx] == current_val) {
current[idx] = 0;
} else {
current[idx] = current_val;
carry = 0;
break;
}
}
return (carry == 0);
}

static inline size_t GetInputOffset(const int num_dims, const int *dims, const int *iter) {
size_t offset = 0;
for (int idx = 0; idx < num_dims; ++idx) {
offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]);
}

return offset;
}

static inline size_t GetOutputOffset(const int num_dims, const int *dims, const int *iter, const int num_axis,
const int *axes) {
size_t offset = 0;
for (int idx = 0; idx < num_dims; ++idx) {
// if we need to skip this axis
int is_axis = 0;
for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
if (idx == axes[axis_idx]) {
is_axis = 1;
break;
}
}

if (!is_axis) {
offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]);
}
}
return offset;
}
#include "nnacl/fp32_grad/utils.h"


void ReduceMeanByAxes(const float *input_data, int *input_iter, const int *input_dims, int input_num_dims, void ReduceMeanByAxes(const float *input_data, int *input_iter, const int *input_dims, int input_num_dims,
const int *axes, int num_axes, float *output_data, const int *output_dims, int output_num_dims) { const int *axes, int num_axes, float *output_data, const int *output_dims, int output_num_dims) {
@@ -111,7 +68,7 @@ void ReduceSumByAxes(const float *input, const int *input_dims, float *output, c
return; return;
} }


for (int idx = 0; idx < num_outputs; ++idx) output[idx] = 0; // zero output
memset(output, 0, num_outputs * sizeof(float)); // zero output


int input_iter[8] = {0}; int input_iter[8] = {0};
int axes[5] = {0}; int axes[5] = {0};


+ 8
- 2
mindspore/lite/nnacl/fp32_grad/softmax_grad.c View File

@@ -41,7 +41,6 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr,


const int M = input_shape[axis]; const int M = input_shape[axis];
const int N = inner_size; const int N = inner_size;
const int K = 1;
for (int i = 0; i < outter_size; i++) { for (int i = 0; i < outter_size; i++) {
int outter_offset = i * dim; int outter_offset = i * dim;
memset(sum_data, 0.0f, inner_size * sizeof(float)); memset(sum_data, 0.0f, inner_size * sizeof(float));
@@ -52,7 +51,14 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr,
sum_data[k] += output_ptr[offset] * input_ptr[offset]; sum_data[k] += output_ptr[offset] * input_ptr[offset];
} }
} }
gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N);
for (int k = 0; k < M; ++k) {
float a = -sum_mul[k];
for (int j = 0; j < N; ++j) {
*(output_ptr + outter_offset + k * N + j) += a * sum_data[j];
}
}

// gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N);
} }


for (int i = 0; i < ele_size; i++) { for (int i = 0; i < ele_size; i++) {


+ 72
- 0
mindspore/lite/nnacl/fp32_grad/utils.h View File

@@ -0,0 +1,72 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_
#define MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_

#include "nnacl/op_base.h"

#ifdef __cplusplus
extern "C" {
#endif

static inline size_t GetInputOffset(int num_dims, const int *dims, const int *iter) {
size_t offset = 0;
for (int idx = 0; idx < num_dims; ++idx) {
offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]);
}

return offset;
}

static inline size_t GetOutputOffset(int num_dims, const int *dims, const int *iter, int num_axis, const int *axes) {
size_t offset = 0;
for (int idx = 0; idx < num_dims; ++idx) {
// if we need to skip this axis
int is_axis = 0;
for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
if (idx == axes[axis_idx]) {
is_axis = 1;
break;
}
}

if (is_axis == 0) {
offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]);
}
}
return offset;
}

static inline int NextIndex(int num_dims, const int *dims, int *current) {
int carry = 1;
for (int idx = num_dims - 1; idx >= 0; --idx) {
int current_val = current[idx] + carry;
if (dims[idx] == current_val) {
current[idx] = 0;
} else {
current[idx] = current_val;
carry = 0;
break;
}
}
return (carry == 0);
}

#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_

+ 3
- 0
mindspore/lite/schema/model.fbs View File

@@ -234,6 +234,9 @@ union PrimitiveType {
BinaryCrossEntropyGrad, BinaryCrossEntropyGrad,
BinaryCrossEntropy, BinaryCrossEntropy,
LpNormalization, LpNormalization,
DropoutGrad,
MaximumGrad,
MinimumGrad
} }


enum QuantType: int { enum QuantType: int {


+ 15
- 5
mindspore/lite/schema/ops.fbs View File

@@ -224,6 +224,7 @@ table Conv2DGradFilter {
dilateW: int; dilateW: int;
dilateH: int; dilateH: int;
hasBias: bool = false; hasBias: bool = false;
filter_shape: [int];
activationType: ActivationType = 0; activationType: ActivationType = 0;
} }


@@ -244,6 +245,7 @@ table Conv2DGradInput {
dilateW: int; dilateW: int;
dilateH: int; dilateH: int;
hasBias: bool = false; hasBias: bool = false;
input_shape: [int];
activationType: ActivationType = 0; activationType: ActivationType = 0;
} }


@@ -264,6 +266,7 @@ table GroupConv2DGradInput {
dilateW: int; dilateW: int;
dilateH: int; dilateH: int;
hasBias: bool = false; hasBias: bool = false;
input_shape: [int];
activationType: ActivationType = 0; activationType: ActivationType = 0;
} }


@@ -478,13 +481,10 @@ table DeConv2DGradFilter {
} }


table BNGrad { table BNGrad {
eps : float;
momentum: float;
}
table BNGradInput {
eps : float;
eps: float;
momentum: float; momentum: float;
} }

table Scale { table Scale {
axis: int; axis: int;
activationType: ActivationType = 0; activationType: ActivationType = 0;
@@ -1087,6 +1087,16 @@ table FftReal {
table FftImag { table FftImag {
} }


table DropoutGrad {
ratio : float = 0.5;
}

table MaximumGrad {
}

table MinimumGrad {
}

table NonMaxSuppression { table NonMaxSuppression {
centerPointBox : int = 0; centerPointBox : int = 0;
} }


+ 15
- 4
mindspore/lite/src/lite_kernel.h View File

@@ -95,13 +95,23 @@ class LiteKernel {


std::string name() const { return this->name_; } std::string name() const { return this->name_; }


virtual void train() { train_mode_ = true; }
virtual int Train() {
this->train_mode_ = true;
return mindspore::lite::RET_OK;
}

virtual bool IsTrain() const { return this->train_mode_; }

virtual int Eval() {
this->train_mode_ = false;
return mindspore::lite::RET_OK;
}


virtual bool is_train() { return train_mode_; }
virtual bool IsEval() const { return !this->train_mode_; }


virtual void eval() { train_mode_ = false; }
virtual void SetTrainable(bool trainable = true) { this->trainable_ = trainable; }


virtual bool is_eval() { return !train_mode_; }
virtual bool IsTrainable() const { return this->trainable_; }


void set_name(const std::string &name) { this->name_ = name; } void set_name(const std::string &name) { this->name_ = name; }


@@ -179,6 +189,7 @@ class LiteKernel {
std::vector<LiteKernel *> in_kernels_; std::vector<LiteKernel *> in_kernels_;
std::vector<LiteKernel *> out_kernels_; std::vector<LiteKernel *> out_kernels_;
bool train_mode_ = false; bool train_mode_ = false;
bool trainable_ = false; // paramaters of this Kernel are trained in Train Session
bool is_model_output_ = false; bool is_model_output_ = false;
size_t workspace_size_ = 0; size_t workspace_size_ = 0;
static void *workspace_; static void *workspace_;


+ 1
- 1
mindspore/lite/src/ops/adam.cc View File

@@ -73,7 +73,7 @@ Registry AdamRegistry(schema::PrimitiveType_Adam, AdamCreator);


int Adam::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { int Adam::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) {
if (10 != inputs.size()) { if (10 != inputs.size()) {
MS_LOG(ERROR) << "Adam should have at 10 input tensors";
MS_LOG(ERROR) << "Adam should have 10 input tensors";
return RET_ERROR; return RET_ERROR;
} }




+ 8
- 2
mindspore/lite/src/ops/arithmetic_grad.cc View File

@@ -42,11 +42,18 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<
MS_ASSERT(dx1 != nullptr); MS_ASSERT(dx1 != nullptr);
MS_ASSERT(dx2 != nullptr); MS_ASSERT(dx2 != nullptr);


if ((Type() == schema::PrimitiveType_MaximumGrad) || (Type() == schema::PrimitiveType_MinimumGrad)) {
x1 = inputs_[0];
x2 = inputs_[1];
dy = inputs_[2];
}

auto inShape0 = x1->shape(); auto inShape0 = x1->shape();
auto inShape1 = x2->shape(); auto inShape1 = x2->shape();
auto outShape = dy->shape(); auto outShape = dy->shape();


if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad)) {
if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad) ||
(Type() == schema::PrimitiveType_MaximumGrad) || (Type() == schema::PrimitiveType_MinimumGrad)) {
ndim_ = outShape.size(); ndim_ = outShape.size();
x1_shape_.resize(ndim_); x1_shape_.resize(ndim_);
x2_shape_.resize(ndim_); x2_shape_.resize(ndim_);
@@ -61,7 +68,6 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<
dy_shape_[i] = outShape[i]; dy_shape_[i] = outShape[i];
} }
} else { } else {
// if (inShape0.size() < inShape1.size())
if (dx1->ElementsNum() < dx2->ElementsNum()) { if (dx1->ElementsNum() < dx2->ElementsNum()) {
ndim_ = inShape1.size(); ndim_ = inShape1.size();
x1_shape_.resize(ndim_); x1_shape_.resize(ndim_);


+ 6
- 1
mindspore/lite/src/ops/bias_grad.cc View File

@@ -45,7 +45,12 @@ int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &i
MS_LOG(ERROR) << "new primitiveT value failed"; MS_LOG(ERROR) << "new primitiveT value failed";
return RET_ERROR; return RET_ERROR;
} }
attr->axis = {0}; // GetValue<std::vector<int>>(prim.GetAttr("axis"));
if (prim.GetAttr("axis") == nullptr) {
MS_LOG(WARNING) << "get axis failed";
attr->axis = {0};
} else {
attr->axis = GetValue<std::vector<int>>(prim.GetAttr("axis"));
}
this->primitive_->value.value = attr; this->primitive_->value.value = attr;
if (this->primitive_->value.value == nullptr) { if (this->primitive_->value.value == nullptr) {
MS_LOG(ERROR) << "primitive value is nullptr"; MS_LOG(ERROR) << "primitive value is nullptr";


+ 13
- 3
mindspore/lite/src/ops/bn_grad.cc View File

@@ -42,13 +42,16 @@ int BNGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp
return RET_ERROR; return RET_ERROR;
} }
if (this->primitive_->value.value == nullptr) { if (this->primitive_->value.value == nullptr) {
auto attr = new (std::nothrow) schema::BNGradInputT();
auto attr = new (std::nothrow) schema::BNGradT();
if (attr == nullptr) { if (attr == nullptr) {
MS_LOG(ERROR) << "new primitiveT value failed"; MS_LOG(ERROR) << "new primitiveT value failed";
return RET_ERROR; return RET_ERROR;
} }
attr->momentum = GetValue<float>(prim.GetAttr("momentum"));
// FusedBatchNormGrad dows not get this attribute
attr->momentum = 0.1f;
if (prim.GetAttr("momentum") != nullptr) {
attr->momentum = GetValue<float>(prim.GetAttr("momentum"));
}
attr->eps = 1e-5;
if (prim.GetAttr("epsilon") != nullptr) { if (prim.GetAttr("epsilon") != nullptr) {
attr->eps = GetValue<float>(prim.GetAttr("epsilon")); attr->eps = GetValue<float>(prim.GetAttr("epsilon"));
} }
@@ -75,6 +78,9 @@ int BNGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers:
return RET_OK; return RET_OK;
} }


PrimitiveC *BNGradCreator(const schema::Primitive *primitive) { return PrimitiveC::NewPrimitiveC<BNGrad>(primitive); }
Registry BNGradRegistry(schema::PrimitiveType_BNGrad, BNGradCreator);

float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps(); } float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps(); }
float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); } float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); }
#endif #endif
@@ -90,6 +96,10 @@ int BNGrad::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Ten
auto in = inputs[1]; auto in = inputs[1];
auto scale = inputs[2]; auto scale = inputs[2];


if (in->shape().size() != 4) {
MS_LOG(ERROR) << "Grad Fused batchnorm only support nhwc input!";
}

outputs[0]->set_shape(in->shape()); outputs[0]->set_shape(in->shape());
outputs[1]->set_shape(scale->shape()); outputs[1]->set_shape(scale->shape());
outputs[2]->set_shape(scale->shape()); outputs[2]->set_shape(scale->shape());


+ 42
- 25
mindspore/lite/src/ops/conv2d_grad_filter.cc View File

@@ -38,6 +38,7 @@ int Conv2DGradFilter::GetPadRight() const { return this->primitive_->value.AsCon
int Conv2DGradFilter::GetDilateW() const { return this->primitive_->value.AsConv2DGradFilter()->dilateW; } int Conv2DGradFilter::GetDilateW() const { return this->primitive_->value.AsConv2DGradFilter()->dilateW; }
int Conv2DGradFilter::GetDilateH() const { return this->primitive_->value.AsConv2DGradFilter()->dilateH; } int Conv2DGradFilter::GetDilateH() const { return this->primitive_->value.AsConv2DGradFilter()->dilateH; }
bool Conv2DGradFilter::GetHasBias() const { return this->primitive_->value.AsConv2DGradFilter()->hasBias; } bool Conv2DGradFilter::GetHasBias() const { return this->primitive_->value.AsConv2DGradFilter()->hasBias; }

int Conv2DGradFilter::GetActivationType() const { return this->primitive_->value.AsConv2DGradFilter()->activationType; } int Conv2DGradFilter::GetActivationType() const { return this->primitive_->value.AsConv2DGradFilter()->activationType; }


void Conv2DGradFilter::SetFormat(int format) { void Conv2DGradFilter::SetFormat(int format) {
@@ -66,6 +67,9 @@ void Conv2DGradFilter::SetPadRight(int pad_right) {
void Conv2DGradFilter::SetDilateW(int dilate_w) { this->primitive_->value.AsConv2DGradFilter()->dilateW = dilate_w; } void Conv2DGradFilter::SetDilateW(int dilate_w) { this->primitive_->value.AsConv2DGradFilter()->dilateW = dilate_w; }
void Conv2DGradFilter::SetDilateH(int dilate_h) { this->primitive_->value.AsConv2DGradFilter()->dilateH = dilate_h; } void Conv2DGradFilter::SetDilateH(int dilate_h) { this->primitive_->value.AsConv2DGradFilter()->dilateH = dilate_h; }
void Conv2DGradFilter::SetHasBias(bool has_bias) { this->primitive_->value.AsConv2DGradFilter()->hasBias = has_bias; } void Conv2DGradFilter::SetHasBias(bool has_bias) { this->primitive_->value.AsConv2DGradFilter()->hasBias = has_bias; }
std::vector<int> Conv2DGradFilter::GetFilterShape() const {
return this->primitive_->value.AsConv2DGradFilter()->filter_shape;
}
void Conv2DGradFilter::SetActivationType(int activation_type) { void Conv2DGradFilter::SetActivationType(int activation_type) {
this->primitive_->value.AsConv2DGradFilter()->activationType = (schema::ActivationType)activation_type; this->primitive_->value.AsConv2DGradFilter()->activationType = (schema::ActivationType)activation_type;
} }
@@ -134,6 +138,28 @@ int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vector<AnfNod
attr->activationType = schema::ActivationType_NO_ACTIVATION; attr->activationType = schema::ActivationType_NO_ACTIVATION;
} }


if (inputs.size() >= kAnfPopulaterThree) {
auto filter_shape = inputs[kAnfPopulaterTwo];
MS_ASSERT(filter_shape != nullptr);
if (filter_shape->isa<ValueNode>()) {
auto valueNode = filter_shape->cast<ValueNodePtr>();
MS_ASSERT(valueNode != nullptr);
auto value = valueNode->value();
MS_ASSERT(value != nullptr);
if (value->isa<ValueTuple>()) {
auto valTuplPtr = dyn_cast<ValueTuple>(value);
MS_ASSERT(valTuplPtr != nullptr);
const int nchw2nhwc[] = {0, 3, 1, 2};
attr->filter_shape.resize(valTuplPtr->size());
for (size_t i = 0; i < valTuplPtr->size(); i++) {
auto elem = dyn_cast<Int32Imm>((*valTuplPtr)[i]);
MS_ASSERT(elem != nullptr);
attr->filter_shape[nchw2nhwc[i]] = elem->value();
}
}
}
}

this->primitive_->value.value = attr; this->primitive_->value.value = attr;
if (this->primitive_->value.value == nullptr) { if (this->primitive_->value.value == nullptr) {
MS_LOG(ERROR) << "primitive value is nullptr"; MS_LOG(ERROR) << "primitive value is nullptr";
@@ -151,10 +177,16 @@ int Conv2DGradFilter::UnPackToFlatBuilder(const schema::Primitive *primitive, fl
MS_LOG(ERROR) << "value_as_Conv2DGradFilter return nullptr"; MS_LOG(ERROR) << "value_as_Conv2DGradFilter return nullptr";
return RET_ERROR; return RET_ERROR;
} }
auto val_offset = schema::CreateConv2DGradFilter(
std::vector<int32_t> filter_shape;
if (attr->filter_shape() != nullptr) {
for (int i = 0; i < static_cast<int>(attr->filter_shape()->size()); i++) {
filter_shape.push_back(attr->filter_shape()->data()[i]);
}
}
auto val_offset = schema::CreateConv2DGradFilterDirect(
*fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(),
attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(),
attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType());
attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &filter_shape, attr->activationType());
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Conv2DGradFilter, val_offset.o); auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Conv2DGradFilter, val_offset.o);
fbb->Finish(prim_offset); fbb->Finish(prim_offset);
return RET_OK; return RET_OK;
@@ -175,6 +207,10 @@ int Conv2DGradFilter::GetPadRight() const { return this->primitive_->value_as_Co
int Conv2DGradFilter::GetDilateW() const { return this->primitive_->value_as_Conv2DGradFilter()->dilateW(); } int Conv2DGradFilter::GetDilateW() const { return this->primitive_->value_as_Conv2DGradFilter()->dilateW(); }
int Conv2DGradFilter::GetDilateH() const { return this->primitive_->value_as_Conv2DGradFilter()->dilateH(); } int Conv2DGradFilter::GetDilateH() const { return this->primitive_->value_as_Conv2DGradFilter()->dilateH(); }
bool Conv2DGradFilter::GetHasBias() const { return this->primitive_->value_as_Conv2DGradFilter()->hasBias(); } bool Conv2DGradFilter::GetHasBias() const { return this->primitive_->value_as_Conv2DGradFilter()->hasBias(); }
std::vector<int> Conv2DGradFilter::GetFilterShape() const {
auto fb_vector = this->primitive_->value_as_Conv2DGradFilter()->filter_shape();
return std::vector<int>(fb_vector->begin(), fb_vector->end());
}
int Conv2DGradFilter::GetActivationType() const { int Conv2DGradFilter::GetActivationType() const {
return this->primitive_->value_as_Conv2DGradFilter()->activationType(); return this->primitive_->value_as_Conv2DGradFilter()->activationType();
} }
@@ -186,41 +222,22 @@ Registry conv2DGradFilterRegistry(schema::PrimitiveType_Conv2DGradFilter, Conv2D
#endif #endif


int Conv2DGradFilter::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { int Conv2DGradFilter::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) {
if (3 != inputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad Filter should have 3 inputs";
if (2 != inputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad Filter should have 2 inputs, but it got " << inputs.size();
return RET_ERROR; return RET_ERROR;
} }
if (1 != outputs.size()) { if (1 != outputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad Filter should have one output";
MS_LOG(ERROR) << "Conv2d Grad Filter should have one output but it got " << outputs.size();
return RET_ERROR; return RET_ERROR;
} }


auto *in0 = inputs.at(0); auto *in0 = inputs.at(0);
auto *in = inputs.at(2);
MS_ASSERT(in0 != nullptr); MS_ASSERT(in0 != nullptr);
MS_ASSERT(in != nullptr);

std::vector<int> output_shape;
int *out_shape = reinterpret_cast<int *>(in->MutableData());
int new_size = in->ElementsNum();
if (in0->GetFormat() == in->GetFormat()) {
for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]);
} else {
if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) {
output_shape.push_back(out_shape[0]);
output_shape.push_back(out_shape[2]);
output_shape.push_back(out_shape[3]);
output_shape.push_back(out_shape[1]);
} else {
MS_LOG(ERROR) << "Shape covnert is not supported";
return RET_ERROR;
}
}


auto *out = outputs.at(0); auto *out = outputs.at(0);
MS_ASSERT(out != nullptr); MS_ASSERT(out != nullptr);


out->set_shape(output_shape);
out->set_shape(GetFilterShape());
out->set_data_type(in0->data_type()); out->set_data_type(in0->data_type());
out->SetFormat(in0->GetFormat()); out->SetFormat(in0->GetFormat());




+ 1
- 0
mindspore/lite/src/ops/conv2d_grad_filter.h View File

@@ -72,6 +72,7 @@ class Conv2DGradFilter : public PrimitiveC {
int GetDilateH() const; int GetDilateH() const;
bool GetHasBias() const; bool GetHasBias() const;
int GetActivationType() const; int GetActivationType() const;
std::vector<int> GetFilterShape() const;
}; };
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore


+ 40
- 25
mindspore/lite/src/ops/conv2d_grad_input.cc View File

@@ -39,6 +39,9 @@ int Conv2DGradInput::GetPadRight() const { return this->primitive_->value.AsConv
int Conv2DGradInput::GetDilateW() const { return this->primitive_->value.AsConv2DGradInput()->dilateW; } int Conv2DGradInput::GetDilateW() const { return this->primitive_->value.AsConv2DGradInput()->dilateW; }
int Conv2DGradInput::GetDilateH() const { return this->primitive_->value.AsConv2DGradInput()->dilateH; } int Conv2DGradInput::GetDilateH() const { return this->primitive_->value.AsConv2DGradInput()->dilateH; }
bool Conv2DGradInput::GetHasBias() const { return this->primitive_->value.AsConv2DGradInput()->hasBias; } bool Conv2DGradInput::GetHasBias() const { return this->primitive_->value.AsConv2DGradInput()->hasBias; }
std::vector<int> Conv2DGradInput::GetInputShape() const {
return this->primitive_->value.AsConv2DGradInput()->input_shape;
}
int Conv2DGradInput::GetActivationType() const { return this->primitive_->value.AsConv2DGradInput()->activationType; } int Conv2DGradInput::GetActivationType() const { return this->primitive_->value.AsConv2DGradInput()->activationType; }


void Conv2DGradInput::SetFormat(int format) { void Conv2DGradInput::SetFormat(int format) {
@@ -137,6 +140,27 @@ int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNode
attr->activationType = schema::ActivationType_NO_ACTIVATION; attr->activationType = schema::ActivationType_NO_ACTIVATION;
} }


if (inputs.size() >= kAnfPopulaterThree) {
auto input_shape = inputs[kAnfPopulaterTwo];
MS_ASSERT(input_shape != nullptr);
if (input_shape->isa<ValueNode>()) {
auto valueNode = input_shape->cast<ValueNodePtr>();
MS_ASSERT(valueNode != nullptr);
auto value = valueNode->value();
MS_ASSERT(value != nullptr);
if (value->isa<ValueTuple>()) {
auto valTuplPtr = dyn_cast<ValueTuple>(value);
MS_ASSERT(valTuplPtr != nullptr);
const int nchw2nhwc[] = {0, 3, 1, 2};
attr->input_shape.resize(valTuplPtr->size());
for (size_t i = 0; i < valTuplPtr->size(); i++) {
auto elem = dyn_cast<Int32Imm>((*valTuplPtr)[i]);
MS_ASSERT(elem != nullptr);
attr->input_shape[nchw2nhwc[i]] = elem->value();
}
}
}
}
this->primitive_->value.value = attr; this->primitive_->value.value = attr;
if (this->primitive_->value.value == nullptr) { if (this->primitive_->value.value == nullptr) {
MS_LOG(ERROR) << "primitive value is nullptr"; MS_LOG(ERROR) << "primitive value is nullptr";
@@ -154,10 +178,16 @@ int Conv2DGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive, fla
MS_LOG(ERROR) << "value_as_Conv2DGradInput return nullptr"; MS_LOG(ERROR) << "value_as_Conv2DGradInput return nullptr";
return RET_ERROR; return RET_ERROR;
} }
auto val_offset = schema::CreateConv2DGradInput(
std::vector<int32_t> input_shape;
if (attr->input_shape() != nullptr) {
for (int i = 0; i < static_cast<int>(attr->input_shape()->size()); i++) {
input_shape.push_back(attr->input_shape()->data()[i]);
}
}
auto val_offset = schema::CreateConv2DGradInputDirect(
*fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(),
attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(),
attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType());
attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &input_shape, attr->activationType());
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Conv2DGradInput, val_offset.o); auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Conv2DGradInput, val_offset.o);
fbb->Finish(prim_offset); fbb->Finish(prim_offset);
return RET_OK; return RET_OK;
@@ -178,6 +208,10 @@ int Conv2DGradInput::GetPadRight() const { return this->primitive_->value_as_Con
int Conv2DGradInput::GetDilateW() const { return this->primitive_->value_as_Conv2DGradInput()->dilateW(); } int Conv2DGradInput::GetDilateW() const { return this->primitive_->value_as_Conv2DGradInput()->dilateW(); }
int Conv2DGradInput::GetDilateH() const { return this->primitive_->value_as_Conv2DGradInput()->dilateH(); } int Conv2DGradInput::GetDilateH() const { return this->primitive_->value_as_Conv2DGradInput()->dilateH(); }
bool Conv2DGradInput::GetHasBias() const { return this->primitive_->value_as_Conv2DGradInput()->hasBias(); } bool Conv2DGradInput::GetHasBias() const { return this->primitive_->value_as_Conv2DGradInput()->hasBias(); }
std::vector<int> Conv2DGradInput::GetInputShape() const {
auto fb_vector = this->primitive_->value_as_Conv2DGradInput()->input_shape();
return std::vector<int>(fb_vector->begin(), fb_vector->end());
}
int Conv2DGradInput::GetActivationType() const { int Conv2DGradInput::GetActivationType() const {
return this->primitive_->value_as_Conv2DGradInput()->activationType(); return this->primitive_->value_as_Conv2DGradInput()->activationType();
} }
@@ -189,40 +223,21 @@ Registry Conv2DGradInputRegistry(schema::PrimitiveType_Conv2DGradInput, Conv2DGr
#endif #endif


int Conv2DGradInput::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { int Conv2DGradInput::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) {
if (3 != inputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad Input should have 3 inputs";
if (2 != inputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad Input should have 2 inputs";
return RET_ERROR; return RET_ERROR;
} }
if (1 != outputs.size()) { if (1 != outputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad input should have one output";
MS_LOG(ERROR) << "Conv2d Grad output should have one output";
return RET_ERROR; return RET_ERROR;
} }


auto *in0 = inputs.at(0); auto *in0 = inputs.at(0);
auto *in = inputs.at(2);
MS_ASSERT(in0 != nullptr); MS_ASSERT(in0 != nullptr);
MS_ASSERT(in != nullptr);

std::vector<int> output_shape;
int *out_shape = reinterpret_cast<int *>(in->MutableData());
int new_size = in->ElementsNum();
if (in0->GetFormat() == in->GetFormat()) {
for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]);
} else {
if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) {
output_shape.push_back(out_shape[0]);
output_shape.push_back(out_shape[2]);
output_shape.push_back(out_shape[3]);
output_shape.push_back(out_shape[1]);
} else {
MS_LOG(ERROR) << "Shape covnert is not supported";
return RET_ERROR;
}
}


auto *out = outputs.at(0); auto *out = outputs.at(0);
MS_ASSERT(out != nullptr); MS_ASSERT(out != nullptr);
out->set_shape(output_shape);
out->set_shape(GetInputShape());
out->set_data_type(in0->data_type()); out->set_data_type(in0->data_type());
out->SetFormat(in0->GetFormat()); out->SetFormat(in0->GetFormat());




+ 1
- 0
mindspore/lite/src/ops/conv2d_grad_input.h View File

@@ -72,6 +72,7 @@ class Conv2DGradInput : public PrimitiveC {
int GetDilateH() const; int GetDilateH() const;
bool GetHasBias() const; bool GetHasBias() const;
int GetActivationType() const; int GetActivationType() const;
std::vector<int> GetInputShape() const;
}; };
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore


+ 55
- 0
mindspore/lite/src/ops/dropout.cc View File

@@ -27,6 +27,37 @@ float Dropout::GetRatio() const { return this->primitive_->value.AsDropout()->ra


void Dropout::SetRatio(float ratio) { this->primitive_->value.AsDropout()->ratio = ratio; } void Dropout::SetRatio(float ratio) { this->primitive_->value.AsDropout()->ratio = ratio; }


int Dropout::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
if (this->primitive_ == nullptr) {
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
if (this->primitive_ == nullptr) {
MS_LOG(ERROR) << "new primitiveT failed";
return RET_ERROR;
}
this->primitive_->value.type = schema::PrimitiveType_Dropout;
}
if (this->primitive_->value.type != schema::PrimitiveType_Dropout) {
MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type;
return RET_ERROR;
}
if (this->primitive_->value.value == nullptr) {
auto attr = new (std::nothrow) schema::DropoutT();
if (attr == nullptr) {
MS_LOG(ERROR) << "new primitiveT value failed";
return RET_ERROR;
}
if (prim.GetAttr("keep_prob") != nullptr) {
attr->ratio = GetValue<float>(prim.GetAttr("keep_prob"));
}
this->primitive_->value.value = attr;
if (this->primitive_->value.value == nullptr) {
MS_LOG(ERROR) << "primitive value is nullptr";
return RET_ERROR;
}
}
return RET_OK;
}

#else #else
int Dropout::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { int Dropout::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
MS_ASSERT(nullptr != primitive); MS_ASSERT(nullptr != primitive);
@@ -46,5 +77,29 @@ float Dropout::GetRatio() const { return this->primitive_->value_as_Dropout()->r
PrimitiveC *DropoutCreator(const schema::Primitive *primitive) { return PrimitiveC::NewPrimitiveC<Dropout>(primitive); } PrimitiveC *DropoutCreator(const schema::Primitive *primitive) { return PrimitiveC::NewPrimitiveC<Dropout>(primitive); }
Registry DropoutRegistry(schema::PrimitiveType_Dropout, DropoutCreator); Registry DropoutRegistry(schema::PrimitiveType_Dropout, DropoutCreator);
#endif #endif
int Dropout::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) {
MS_ASSERT(this->primitive_ != nullptr);
auto input = inputs_.front();
MS_ASSERT(input != nullptr);
auto output0 = outputs_.front();
MS_ASSERT(output0 != nullptr);
if (!GetInferFlag()) {
return RET_OK;
}
output0->set_shape(input->shape());
output0->set_data_type(input->data_type());
output0->SetFormat(input->GetFormat());

if (outputs_.size() > 1) {
auto output1 = outputs_[1];
MS_ASSERT(output1 != nullptr);
output1->set_shape(input->shape());
output1->set_data_type(input->data_type());
output1->SetFormat(input->GetFormat());
}

return RET_OK;
}

} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore

+ 6
- 3
mindspore/lite/src/ops/dropout.h View File

@@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */


#ifndef LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_
#define LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_
#ifndef MINDSPORE_LITE_SRC_OPS_DROPOUT_H_
#define MINDSPORE_LITE_SRC_OPS_DROPOUT_H_


#include <vector> #include <vector>
#include <set> #include <set>
@@ -32,13 +32,16 @@ class Dropout : public PrimitiveC {
MS_DECLARE_PARENT(Dropout, PrimitiveC); MS_DECLARE_PARENT(Dropout, PrimitiveC);
explicit Dropout(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} explicit Dropout(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
void SetRatio(float ratio); void SetRatio(float ratio);
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;


#else #else
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
#endif #endif
float GetRatio() const; float GetRatio() const;
int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override;
}; };

} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore


#endif // LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_
#endif // MINDSPORE_LITE_SRC_OPS_DROPOUT_H_

+ 100
- 0
mindspore/lite/src/ops/dropout_grad.cc View File

@@ -0,0 +1,100 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "src/ops/dropout_grad.h"

#ifndef PRIMITIVE_WRITEABLE
#include "src/ops/ops_register.h"
#endif

namespace mindspore {
namespace lite {
#ifdef PRIMITIVE_WRITEABLE
float DropoutGrad::GetRatio() const { return this->primitive_->value.AsDropout()->ratio; }

void DropoutGrad::SetRatio(float ratio) { this->primitive_->value.AsDropout()->ratio = ratio; }

int DropoutGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
if (this->primitive_ == nullptr) {
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
if (this->primitive_ == nullptr) {
MS_LOG(ERROR) << "new primitiveT failed";
return RET_ERROR;
}
this->primitive_->value.type = schema::PrimitiveType_DropoutGrad;
}
if (this->primitive_->value.type != schema::PrimitiveType_DropoutGrad) {
MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type;
return RET_ERROR;
}
if (this->primitive_->value.value == nullptr) {
auto attr = new (std::nothrow) schema::DropoutGradT();
if (attr == nullptr) {
MS_LOG(ERROR) << "new primitiveT value failed";
return RET_ERROR;
}
if (prim.GetAttr("keep_prob") != nullptr) {
attr->ratio = GetValue<float>(prim.GetAttr("keep_prob"));
}
this->primitive_->value.value = attr;
if (this->primitive_->value.value == nullptr) {
MS_LOG(ERROR) << "primitive value is nullptr";
return RET_ERROR;
}
}
return RET_OK;
}
#else
int DropoutGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
MS_ASSERT(nullptr != primitive);
MS_ASSERT(nullptr != fbb);
auto attr = primitive->value_as_DropoutGrad();
if (attr == nullptr) {
MS_LOG(ERROR) << "value_as_DropoutGrad return nullptr";
return RET_ERROR;
}
auto val_offset = schema::CreateDropoutGrad(*fbb, attr->ratio());
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_DropoutGrad, val_offset.o);
fbb->Finish(prim_offset);
return RET_OK;
}
float DropoutGrad::GetRatio() const { return this->primitive_->value_as_DropoutGrad()->ratio(); }

PrimitiveC *DropoutGradCreator(const schema::Primitive *primitive) {
return PrimitiveC::NewPrimitiveC<DropoutGrad>(primitive);
}
Registry DropoutGradRegistry(schema::PrimitiveType_DropoutGrad, DropoutGradCreator);

#endif
int DropoutGrad::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) {
MS_ASSERT(this->primitive_ != nullptr);
MS_ASSERT(inputs_.size() == 2);
auto input = inputs_.front();
MS_ASSERT(input != nullptr);
auto output = outputs_.front();
MS_ASSERT(output != nullptr);
if (!GetInferFlag()) {
return RET_OK;
}
output->set_shape(input->shape());
output->set_data_type(input->data_type());
output->SetFormat(input->GetFormat());

return RET_OK;
}

} // namespace lite
} // namespace mindspore

+ 47
- 0
mindspore/lite/src/ops/dropout_grad.h View File

@@ -0,0 +1,47 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_
#define MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_

#include <vector>
#include <set>
#include <cmath>
#include "src/ops/primitive_c.h"

namespace mindspore {
namespace lite {
class DropoutGrad : public PrimitiveC {
public:
#ifdef PRIMITIVE_WRITEABLE
MS_DECLARE_PARENT(DropoutGrad, PrimitiveC);
DropoutGrad() = default;
explicit DropoutGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
void SetRatio(float ratio);
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;

#else
DropoutGrad() = default;

int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
#endif
float GetRatio() const;
int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override;
};
} // namespace lite
} // namespace mindspore

#endif // MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_

+ 22
- 26
mindspore/lite/src/ops/group_conv2d_grad_input.cc View File

@@ -39,6 +39,9 @@ int GroupConv2DGradInput::GetPadRight() const { return this->primitive_->value.A
int GroupConv2DGradInput::GetDilateW() const { return this->primitive_->value.AsGroupConv2DGradInput()->dilateW; } int GroupConv2DGradInput::GetDilateW() const { return this->primitive_->value.AsGroupConv2DGradInput()->dilateW; }
int GroupConv2DGradInput::GetDilateH() const { return this->primitive_->value.AsGroupConv2DGradInput()->dilateH; } int GroupConv2DGradInput::GetDilateH() const { return this->primitive_->value.AsGroupConv2DGradInput()->dilateH; }
bool GroupConv2DGradInput::GetHasBias() const { return this->primitive_->value.AsGroupConv2DGradInput()->hasBias; } bool GroupConv2DGradInput::GetHasBias() const { return this->primitive_->value.AsGroupConv2DGradInput()->hasBias; }
std::vector<int> GroupConv2DGradInput::GetInputShape() const {
return this->primitive_->value.AsGroupConv2DGradInput()->input_shape;
}
int GroupConv2DGradInput::GetActivationType() const { int GroupConv2DGradInput::GetActivationType() const {
return this->primitive_->value.AsGroupConv2DGradInput()->activationType; return this->primitive_->value.AsGroupConv2DGradInput()->activationType;
} }
@@ -99,10 +102,16 @@ int GroupConv2DGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive
MS_LOG(ERROR) << "value_as_GroupConv2DGradInput return nullptr"; MS_LOG(ERROR) << "value_as_GroupConv2DGradInput return nullptr";
return RET_ERROR; return RET_ERROR;
} }
auto val_offset = schema::CreateGroupConv2DGradInput(
std::vector<int32_t> input_shape;
if (attr->input_shape() != nullptr) {
for (int i = 0; i < static_cast<int>(attr->input_shape()->size()); i++) {
input_shape.push_back(attr->input_shape()->data()[i]);
}
}
auto val_offset = schema::CreateGroupConv2DGradInputDirect(
*fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(),
attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(),
attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType());
attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &input_shape, attr->activationType());
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_GroupConv2DGradInput, val_offset.o); auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_GroupConv2DGradInput, val_offset.o);
fbb->Finish(prim_offset); fbb->Finish(prim_offset);
return RET_OK; return RET_OK;
@@ -127,51 +136,38 @@ int GroupConv2DGradInput::GetPadRight() const { return this->primitive_->value_a
int GroupConv2DGradInput::GetDilateW() const { return this->primitive_->value_as_GroupConv2DGradInput()->dilateW(); } int GroupConv2DGradInput::GetDilateW() const { return this->primitive_->value_as_GroupConv2DGradInput()->dilateW(); }
int GroupConv2DGradInput::GetDilateH() const { return this->primitive_->value_as_GroupConv2DGradInput()->dilateH(); } int GroupConv2DGradInput::GetDilateH() const { return this->primitive_->value_as_GroupConv2DGradInput()->dilateH(); }
bool GroupConv2DGradInput::GetHasBias() const { return this->primitive_->value_as_GroupConv2DGradInput()->hasBias(); } bool GroupConv2DGradInput::GetHasBias() const { return this->primitive_->value_as_GroupConv2DGradInput()->hasBias(); }
std::vector<int> GroupConv2DGradInput::GetInputShape() const {
auto fb_vector = this->primitive_->value_as_GroupConv2DGradInput()->input_shape();
return std::vector<int>(fb_vector->begin(), fb_vector->end());
}
int GroupConv2DGradInput::GetActivationType() const { int GroupConv2DGradInput::GetActivationType() const {
return this->primitive_->value_as_GroupConv2DGradInput()->activationType(); return this->primitive_->value_as_GroupConv2DGradInput()->activationType();
} }

PrimitiveC *GroupConv2DGradInputCreator(const schema::Primitive *primitive) { PrimitiveC *GroupConv2DGradInputCreator(const schema::Primitive *primitive) {
return PrimitiveC::NewPrimitiveC<GroupConv2DGradInput>(primitive); return PrimitiveC::NewPrimitiveC<GroupConv2DGradInput>(primitive);
} }
Registry GroupConv2DGradInputRegistry(schema::PrimitiveType_GroupConv2DGradInput, GroupConv2DGradInputCreator); Registry GroupConv2DGradInputRegistry(schema::PrimitiveType_GroupConv2DGradInput, GroupConv2DGradInputCreator);

#endif #endif


int GroupConv2DGradInput::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { int GroupConv2DGradInput::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) {
if (3 != inputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad Input should have 3 inputs";
if (2 != inputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad input should have 2 inputs";
return RET_ERROR; return RET_ERROR;
} }
if (1 != outputs.size()) { if (1 != outputs.size()) {
MS_LOG(ERROR) << "Conv2d Grad input should have one output";
MS_LOG(ERROR) << "Conv2d Grad output should have one output";
return RET_ERROR; return RET_ERROR;
} }


auto *in0 = inputs.at(0); auto *in0 = inputs.at(0);
auto *in = inputs.at(2);
MS_ASSERT(in0 != nullptr);
MS_ASSERT(in != nullptr);


std::vector<int> output_shape;
int *out_shape = reinterpret_cast<int *>(in->MutableData());
int new_size = in->ElementsNum();
if (in0->GetFormat() == in->GetFormat()) {
for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]);
} else {
if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) {
output_shape.push_back(out_shape[0]);
output_shape.push_back(out_shape[2]);
output_shape.push_back(out_shape[3]);
output_shape.push_back(out_shape[1]);
} else {
MS_LOG(ERROR) << "Shape covnert is not supported";
return RET_ERROR;
}
}
MS_ASSERT(in0 != nullptr);


auto *out = outputs.at(0); auto *out = outputs.at(0);
MS_ASSERT(out != nullptr); MS_ASSERT(out != nullptr);
out->set_shape(output_shape);
out->set_shape(GetInputShape());

out->set_data_type(in0->data_type()); out->set_data_type(in0->data_type());
out->SetFormat(in0->GetFormat()); out->SetFormat(in0->GetFormat());




+ 1
- 0
mindspore/lite/src/ops/group_conv2d_grad_input.h View File

@@ -70,6 +70,7 @@ class GroupConv2DGradInput : public PrimitiveC {
int GetDilateW() const; int GetDilateW() const;
int GetDilateH() const; int GetDilateH() const;
bool GetHasBias() const; bool GetHasBias() const;
std::vector<int> GetInputShape() const;
int GetActivationType() const; int GetActivationType() const;
}; };
} // namespace lite } // namespace lite


+ 3
- 3
mindspore/lite/src/ops/maximum.h View File

@@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */


#ifndef LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_
#define LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_
#ifndef MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_
#define MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_


#include <vector> #include <vector>
#include <set> #include <set>
@@ -41,4 +41,4 @@ class Maximum : public Arithmetic {
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore


#endif // LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_
#endif // MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_

+ 124
- 0
mindspore/lite/src/ops/maximum_grad.cc View File

@@ -0,0 +1,124 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "include/errorcode.h"
#include "src/ops/maximum_grad.h"
#include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE
#include <float.h>
#include "tools/converter/quantizer/quantize_util.h"
#endif

#ifndef PRIMITIVE_WRITEABLE
#include "src/ops/ops_register.h"
#endif

namespace mindspore {
namespace lite {
#ifdef PRIMITIVE_WRITEABLE
int MaximumGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
if (this->primitive_ == nullptr) {
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
if (this->primitive_ == nullptr) {
MS_LOG(ERROR) << "new primitiveT failed";
return RET_ERROR;
}
this->primitive_->value.type = schema::PrimitiveType_MaximumGrad;
}
if (this->primitive_->value.type != schema::PrimitiveType_MaximumGrad) {
MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type;
return RET_ERROR;
}
if (this->primitive_->value.value == nullptr) {
auto attr = new (std::nothrow) schema::MaximumGradT();
if (attr == nullptr) {
MS_LOG(ERROR) << "new primitiveT value failed";
return RET_ERROR;
}
this->primitive_->value.value = attr;
if (this->primitive_->value.value == nullptr) {
MS_LOG(ERROR) << "primitive value is nullptr";
return RET_ERROR;
}
}
return RET_OK;
}
#else
int MaximumGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
MS_ASSERT(nullptr != primitive);
MS_ASSERT(nullptr != fbb);
auto val_offset = schema::CreateMaximumGrad(*fbb);
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_MaximumGrad, val_offset.o);
fbb->Finish(prim_offset);
return RET_OK;
}
PrimitiveC *MaximumGradCreator(const schema::Primitive *primitive) {
return PrimitiveC::NewPrimitiveC<MaximumGrad>(primitive);
}
Registry MaximumGradRegistry(schema::PrimitiveType_MaximumGrad, MaximumGradCreator);

#endif
int MaximumGrad::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) {
if (inputs_.size() != 3) {
MS_LOG(ERROR) << "The number of input must be 3";
return RET_ERROR;
}
if (outputs_.size() != 2) {
MS_LOG(ERROR) << "The number of output must be 2";
return RET_ERROR;
}

auto x1 = inputs_[0];
auto x2 = inputs_[1];
auto dy = inputs_[2];
auto dx1 = outputs_[0];
auto dx2 = outputs_[1];

MS_ASSERT(dy != nullptr);
MS_ASSERT(x1 != nullptr);
MS_ASSERT(x2 != nullptr);
MS_ASSERT(dx1 != nullptr);
MS_ASSERT(dx2 != nullptr);
if (!GetInferFlag()) {
return RET_OK;
}

auto inShape0 = x1->shape();
auto inShape1 = x2->shape();
auto outShape = dy->shape();

ndim_ = outShape.size();
x1_shape_.resize(ndim_);
x2_shape_.resize(ndim_);
dy_shape_.resize(ndim_);
auto fillDimNum0 = outShape.size() - inShape0.size();
auto fillDimNum1 = outShape.size() - inShape1.size();
int j0 = 0;
int j1 = 0;
for (unsigned int i = 0; i < outShape.size(); i++) {
x1_shape_[i] = (i < fillDimNum0) ? 1 : inShape0[j0++];
x2_shape_[i] = (i < fillDimNum1) ? 1 : inShape1[j1++];
dy_shape_[i] = outShape[i];
}

dx1->set_shape(x1->shape());
dx2->set_shape(x2->shape());
dx1->set_data_type(dy->data_type());
dx2->set_data_type(dy->data_type());
return RET_OK;
}
} // namespace lite
} // namespace mindspore

+ 46
- 0
mindspore/lite/src/ops/maximum_grad.h View File

@@ -0,0 +1,46 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_
#define MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_

#include <vector>
#include <set>
#include <cmath>

#include "src/ops/arithmetic_grad.h"
#include "src/ops/primitive_c.h"

namespace mindspore {
namespace lite {
class MaximumGrad : public ArithmeticGrad {
public:
#ifdef PRIMITIVE_WRITEABLE
MS_DECLARE_PARENT(MaximumGrad, ArithmeticGrad);
MaximumGrad() = default;
explicit MaximumGrad(schema::PrimitiveT *primitive) : ArithmeticGrad(primitive) {}
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
#else
MaximumGrad() = default;

int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
#endif
int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override;
};
} // namespace lite
} // namespace mindspore

#endif // MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_

+ 27
- 0
mindspore/lite/src/ops/minimum.cc View File

@@ -23,6 +23,33 @@
namespace mindspore { namespace mindspore {
namespace lite { namespace lite {
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
int Minimum::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
if (this->primitive_ == nullptr) {
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
if (this->primitive_ == nullptr) {
MS_LOG(ERROR) << "new primitiveT failed";
return RET_ERROR;
}
this->primitive_->value.type = schema::PrimitiveType_Minimum;
}
if (this->primitive_->value.type != schema::PrimitiveType_Minimum) {
MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type;
return RET_ERROR;
}
if (this->primitive_->value.value == nullptr) {
auto attr = new (std::nothrow) schema::MinimumT();
if (attr == nullptr) {
MS_LOG(ERROR) << "new primitiveT value failed";
return RET_ERROR;
}
this->primitive_->value.value = attr;
if (this->primitive_->value.value == nullptr) {
MS_LOG(ERROR) << "primitive value is nullptr";
return RET_ERROR;
}
}
return RET_OK;
}
#else #else
int Minimum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { int Minimum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
MS_ASSERT(nullptr != primitive); MS_ASSERT(nullptr != primitive);


+ 4
- 3
mindspore/lite/src/ops/minimum.h View File

@@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */


#ifndef LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_
#define LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_
#ifndef MINDSPORE_LITE_SRC_OPS_MINIMUM_H_
#define MINDSPORE_LITE_SRC_OPS_MINIMUM_H_


#include <vector> #include <vector>
#include <set> #include <set>
@@ -32,6 +32,7 @@ class Minimum : public Arithmetic {
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
MS_DECLARE_PARENT(Arithmetic, Arithmetic); MS_DECLARE_PARENT(Arithmetic, Arithmetic);
explicit Minimum(schema::PrimitiveT *primitive) : Arithmetic(primitive) {} explicit Minimum(schema::PrimitiveT *primitive) : Arithmetic(primitive) {}
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
#else #else
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
#endif #endif
@@ -39,4 +40,4 @@ class Minimum : public Arithmetic {
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore


#endif // LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_
#endif // MINDSPORE_LITE_SRC_OPS_MINIMUM_H_

+ 76
- 0
mindspore/lite/src/ops/minimum_grad.cc View File

@@ -0,0 +1,76 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "include/errorcode.h"
#include "src/ops/minimum_grad.h"
#include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE
#include <float.h>
#include "tools/converter/quantizer/quantize_util.h"
#endif

#ifndef PRIMITIVE_WRITEABLE
#include "src/ops/ops_register.h"
#endif

namespace mindspore {
namespace lite {
#ifdef PRIMITIVE_WRITEABLE
int MinimumGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
if (this->primitive_ == nullptr) {
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
if (this->primitive_ == nullptr) {
MS_LOG(ERROR) << "new primitiveT failed";
return RET_ERROR;
}
this->primitive_->value.type = schema::PrimitiveType_MinimumGrad;
}
if (this->primitive_->value.type != schema::PrimitiveType_MinimumGrad) {
MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type;
return RET_ERROR;
}
if (this->primitive_->value.value == nullptr) {
auto attr = new (std::nothrow) schema::MinimumGradT();
if (attr == nullptr) {
MS_LOG(ERROR) << "new primitiveT value failed";
return RET_ERROR;
}
this->primitive_->value.value = attr;
if (this->primitive_->value.value == nullptr) {
MS_LOG(ERROR) << "primitive value is nullptr";
return RET_ERROR;
}
}
return RET_OK;
}

#else
PrimitiveC *MinimumGradCreator(const schema::Primitive *primitive) {
return PrimitiveC::NewPrimitiveC<MinimumGrad>(primitive);
}
Registry MinimumGradRegistry(schema::PrimitiveType_MinimumGrad, MinimumGradCreator);

int MinimumGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
MS_ASSERT(nullptr != primitive);
MS_ASSERT(nullptr != fbb);
auto val_offset = schema::CreateMinimumGrad(*fbb);
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_MinimumGrad, val_offset.o);
fbb->Finish(prim_offset);
return RET_OK;
}
#endif
} // namespace lite
} // namespace mindspore

+ 45
- 0
mindspore/lite/src/ops/minimum_grad.h View File

@@ -0,0 +1,45 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_
#define MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_

#include <vector>
#include <set>
#include <cmath>

#include "src/ops/arithmetic_grad.h"
#include "src/ops/primitive_c.h"

namespace mindspore {
namespace lite {
class MinimumGrad : public ArithmeticGrad {
public:
#ifdef PRIMITIVE_WRITEABLE
MS_DECLARE_PARENT(MinimumGrad, ArithmeticGrad);
MinimumGrad() = default;
explicit MinimumGrad(schema::PrimitiveT *primitive) : ArithmeticGrad(primitive) {}
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
#else
MinimumGrad() = default;

int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
#endif
};
} // namespace lite
} // namespace mindspore

#endif // MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_

+ 22
- 5
mindspore/lite/src/ops/primitive_c.cc View File

@@ -18,6 +18,7 @@
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include <memory> #include <memory>
#include <map> #include <map>

#include "tools/converter/quantizer/quantize_util.h" #include "tools/converter/quantizer/quantize_util.h"
#include "src/ops/space_to_batch.h" #include "src/ops/space_to_batch.h"
#include "src/ops/space_to_batch_nd.h" #include "src/ops/space_to_batch_nd.h"
@@ -167,12 +168,14 @@
#include "src/ops/sgd.h" #include "src/ops/sgd.h"
#include "src/ops/adam.h" #include "src/ops/adam.h"
#include "src/ops/assign.h" #include "src/ops/assign.h"
#include "src/ops/dropout_grad.h"
#include "src/ops/maximum_grad.h"
#include "src/ops/minimum_grad.h"
#include "src/ops/control_depend.h" #include "src/ops/control_depend.h"
#include "src/ops/assign_add.h" #include "src/ops/assign_add.h"
#include "src/ops/binary_cross_entropy.h" #include "src/ops/binary_cross_entropy.h"
#include "src/ops/binary_cross_entropy_grad.h" #include "src/ops/binary_cross_entropy_grad.h"
#endif #endif

#endif #endif
namespace mindspore { namespace mindspore {
namespace lite { namespace lite {
@@ -506,10 +509,12 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std:
return NewPrimitiveC<Maximum>(prim, inputs, quantType); return NewPrimitiveC<Maximum>(prim, inputs, quantType);
} else if (op_type == "Split") { } else if (op_type == "Split") {
return NewPrimitiveC<Split>(prim, inputs, quantType); return NewPrimitiveC<Split>(prim, inputs, quantType);
} else if (op_type == "While") {
return NewPrimitiveC<While>(prim, inputs, quantType);
} else if (op_type == "OneHot") { } else if (op_type == "OneHot") {
return NewPrimitiveC<OneHot>(prim, inputs, quantType); return NewPrimitiveC<OneHot>(prim, inputs, quantType);
} else if (op_type == "Dropout") {
return NewPrimitiveC<Dropout>(prim, inputs, quantType);
} else if (op_type == "While") {
return NewPrimitiveC<While>(prim, inputs, quantType);
} else if (op_type == "GatherV2") { } else if (op_type == "GatherV2") {
return NewPrimitiveC<Gather>(prim, inputs, quantType); return NewPrimitiveC<Gather>(prim, inputs, quantType);
} else if (op_type == "OnesLike") { } else if (op_type == "OnesLike") {
@@ -537,7 +542,7 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std:
} else if ((op_type == "ReluGrad" || op_type == "ReLU6Grad" || op_type == "SigmoidGrad" || } else if ((op_type == "ReluGrad" || op_type == "ReLU6Grad" || op_type == "SigmoidGrad" ||
op_type == "HSigmoidGrad" || op_type == "HSwishGrad")) { op_type == "HSigmoidGrad" || op_type == "HSwishGrad")) {
return NewPrimitiveC<ActivationGrad>(prim, inputs, quantType); return NewPrimitiveC<ActivationGrad>(prim, inputs, quantType);
} else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad")) {
} else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad") || (op_type == "AvgPoolGradGpu")) {
return NewPrimitiveC<PoolingGrad>(prim, inputs, quantType); return NewPrimitiveC<PoolingGrad>(prim, inputs, quantType);
} else if (op_type == "Conv2DBackpropFilter") { } else if (op_type == "Conv2DBackpropFilter") {
return NewPrimitiveC<Conv2DGradFilter>(prim, inputs, quantType); return NewPrimitiveC<Conv2DGradFilter>(prim, inputs, quantType);
@@ -559,6 +564,12 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std:
return NewPrimitiveC<Adam>(prim, inputs, quantType); return NewPrimitiveC<Adam>(prim, inputs, quantType);
} else if (op_type == "Assign") { } else if (op_type == "Assign") {
return NewPrimitiveC<Assign>(prim, inputs, quantType); return NewPrimitiveC<Assign>(prim, inputs, quantType);
} else if (op_type == "DropoutGrad") {
return NewPrimitiveC<DropoutGrad>(prim, inputs, quantType);
} else if (op_type == "MaximumGrad") {
return NewPrimitiveC<MaximumGrad>(prim, inputs, quantType);
} else if (op_type == "MinimumGrad") {
return NewPrimitiveC<MinimumGrad>(prim, inputs, quantType);
} else if (op_type == "AssignAdd") { } else if (op_type == "AssignAdd") {
return NewPrimitiveC<AssignAdd>(prim, inputs, quantType); return NewPrimitiveC<AssignAdd>(prim, inputs, quantType);
} else if (op_type == "BinaryCrossEntropy") { } else if (op_type == "BinaryCrossEntropy") {
@@ -884,7 +895,12 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) {
return new BinaryCrossEntropyGrad(primitive); return new BinaryCrossEntropyGrad(primitive);
case schema::PrimitiveType_BinaryCrossEntropy: case schema::PrimitiveType_BinaryCrossEntropy:
return new BinaryCrossEntropy(primitive); return new BinaryCrossEntropy(primitive);

case schema::PrimitiveType_DropoutGrad:
return new DropoutGrad(primitive);
case schema::PrimitiveType_MaximumGrad:
return new MaximumGrad(primitive);
case schema::PrimitiveType_MinimumGrad:
return new MinimumGrad(primitive);
#endif #endif
default: default:
MS_LOG(ERROR) << "Unsupported primitive type in Create : " << schema::EnumNamePrimitiveType(op_type); MS_LOG(ERROR) << "Unsupported primitive type in Create : " << schema::EnumNamePrimitiveType(op_type);
@@ -892,6 +908,7 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) {
} }
return nullptr; return nullptr;
} }

#else #else
void PrimitiveC::SetQuantType(schema::QuantType quant_type) { this->quant_type_ = quant_type; } void PrimitiveC::SetQuantType(schema::QuantType quant_type) { this->quant_type_ = quant_type; }
schema::QuantType PrimitiveC::GetQuantType() const { return quant_type_; } schema::QuantType PrimitiveC::GetQuantType() const { return quant_type_; }


+ 1
- 2
mindspore/lite/src/ops/squeeze.cc View File

@@ -50,8 +50,7 @@ int Squeeze::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &in
MS_LOG(INFO) << "Squeeze's attr xis is set to default"; MS_LOG(INFO) << "Squeeze's attr xis is set to default";
attr->axis = {0}; attr->axis = {0};
} else { } else {
int axis = GetValue<int>(prim.GetAttr("axis"));
attr->axis = {axis};
attr->axis = GetValue<std::vector<int>>(prim.GetAttr("axis"));
} }
this->primitive_->value.value = attr; this->primitive_->value.value = attr;
} }


+ 3
- 4
mindspore/lite/src/ops/sub.h View File

@@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */


#ifndef LITE_MINDSPORE_LITE_C_OPS_SUB_H_
#define LITE_MINDSPORE_LITE_C_OPS_SUB_H_
#ifndef MINDSPORE_LITE_SRC_OPS_SUB_H_
#define MINDSPORE_LITE_SRC_OPS_SUB_H_


#include <vector> #include <vector>
#include <set> #include <set>
@@ -34,7 +34,6 @@ class Sub : public Arithmetic {
explicit Sub(schema::PrimitiveT *primitive) : Arithmetic(primitive) {} explicit Sub(schema::PrimitiveT *primitive) : Arithmetic(primitive) {}
void SetActivationType(int activation_type); void SetActivationType(int activation_type);
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;

#else #else
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
#endif #endif
@@ -43,4 +42,4 @@ class Sub : public Arithmetic {
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore


#endif // LITE_MINDSPORE_LITE_C_OPS_SUB_H_
#endif // MINDSPORE_LITE_SRC_OPS_SUB_H_

+ 34
- 42
mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc View File

@@ -39,14 +39,6 @@ void FusedBatchnormCPUKernel::FreeScaleAndOffset() {
free(offset_); free(offset_);
offset_ = nullptr; offset_ = nullptr;
} }
if (save_mean_ != nullptr) {
free(save_mean_);
save_mean_ = nullptr;
}
if (save_variance_ != nullptr) {
free(save_variance_);
save_variance_ = nullptr;
}
} }


int FusedBatchnormCPUKernel::InitConstTensor() { int FusedBatchnormCPUKernel::InitConstTensor() {
@@ -59,11 +51,8 @@ int FusedBatchnormCPUKernel::InitConstTensor() {
offset_ = malloc(offset->Size()); offset_ = malloc(offset->Size());
mean_ = malloc(mean->Size()); mean_ = malloc(mean->Size());
variance_ = malloc(variance->Size()); variance_ = malloc(variance->Size());
save_mean_ = malloc(mean->Size());
save_variance_ = malloc(variance->Size());


if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr || save_mean_ == nullptr ||
save_variance_ == nullptr) {
if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr) {
FreeMeanAndVariance(); FreeMeanAndVariance();
FreeScaleAndOffset(); FreeScaleAndOffset();
MS_LOG(ERROR) << "Memory allocation failed"; MS_LOG(ERROR) << "Memory allocation failed";
@@ -73,61 +62,64 @@ int FusedBatchnormCPUKernel::InitConstTensor() {
memcpy(offset_, offset->MutableData(), offset->Size()); memcpy(offset_, offset->MutableData(), offset->Size());
memcpy(mean_, mean->MutableData(), mean->Size()); memcpy(mean_, mean->MutableData(), mean->Size());
memcpy(variance_, variance->MutableData(), variance->Size()); memcpy(variance_, variance->MutableData(), variance->Size());
memset(save_mean_, 0, mean->Size());
memset(save_variance_, 0, variance->Size());
if (out_tensors_.size() > 4) {
for (size_t i = 1; i < out_tensors_.size(); i++) {
auto *data = static_cast<float *>(out_tensors_[i]->MutableData());
std::fill(data, data + out_tensors_[i]->ElementsNum(), 0.f);
}
}


return RET_OK; return RET_OK;
} }


int FusedBatchnormCPUKernel::Run() { int FusedBatchnormCPUKernel::Run() {
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
if (is_train() && in_tensors_.size() >= 5) {
if (IsTrain() && IsTrainable() && in_tensors_.size() >= 5) {
float *in = static_cast<float *>(in_tensors_[0]->MutableData()); float *in = static_cast<float *>(in_tensors_[0]->MutableData());
float *scale = static_cast<float *>(in_tensors_[1]->MutableData()); float *scale = static_cast<float *>(in_tensors_[1]->MutableData());
float *bias = static_cast<float *>(in_tensors_[2]->MutableData());
float *mean = static_cast<float *>(in_tensors_[3]->MutableData());
float *var = static_cast<float *>(in_tensors_[4]->MutableData());
std::fill(mean, mean + in_tensors_[3]->ElementsNum(), 0.f);
std::fill(var, var + in_tensors_[4]->ElementsNum(), 0.f);
FusedBatchNormFp32MeanVar(in, mean, var, param, static_cast<float *>(save_mean_),
static_cast<float *>(save_variance_));
memcpy(out_tensors_[3]->MutableData(), save_mean_, out_tensors_[3]->Size());
memcpy(out_tensors_[4]->MutableData(), save_variance_, out_tensors_[3]->Size());
memcpy(mean_, mean, in_tensors_[3]->Size());
memcpy(variance_, var, in_tensors_[4]->Size());
float *offset = static_cast<float *>(in_tensors_[2]->MutableData());
float *current_mean = static_cast<float *>(mean_);
float *current_var = static_cast<float *>(variance_);
float *save_mean = static_cast<float *>(in_tensors_[3]->MutableData());
float *save_variance = static_cast<float *>(in_tensors_[4]->MutableData());

std::fill(current_mean, current_mean + in_tensors_[3]->ElementsNum(), 0.f);
std::fill(current_var, current_var + in_tensors_[4]->ElementsNum(), 0.f);
FusedBatchNormFp32MeanVar(in, current_mean, current_var, param, static_cast<float *>(save_mean),
static_cast<float *>(save_variance));

memcpy(out_tensors_[1]->MutableData(), scale, out_tensors_[1]->Size());
memcpy(out_tensors_[2]->MutableData(), offset, out_tensors_[2]->Size());
memcpy(out_tensors_[3]->MutableData(), current_mean, out_tensors_[3]->Size());
memcpy(out_tensors_[4]->MutableData(), current_var, out_tensors_[4]->Size());

// Copy to local variables
memcpy(scale_, scale, in_tensors_[1]->Size()); memcpy(scale_, scale, in_tensors_[1]->Size());
memcpy(offset_, bias, in_tensors_[2]->Size());
memcpy(offset_, offset, in_tensors_[2]->Size());

// save for next iteration
memcpy(in_tensors_[3]->MutableData(), save_mean, in_tensors_[3]->Size());
memcpy(in_tensors_[4]->MutableData(), save_variance, in_tensors_[4]->Size());

trained_ = true; // trained at least once trained_ = true; // trained at least once
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
} }

return ret; return ret;
} }


void FusedBatchnormCPUKernel::eval() {
LiteKernel::eval();
int FusedBatchnormCPUKernel::Eval() {
LiteKernel::Eval();
if (trained_) { if (trained_) {
float *run_mean = static_cast<float *>(in_tensors_[3]->MutableData());
float *run_var = static_cast<float *>(in_tensors_[4]->MutableData());
float *save_mean = static_cast<float *>(in_tensors_[3]->MutableData());
float *save_var = static_cast<float *>(in_tensors_[4]->MutableData());
float *scale = static_cast<float *>(in_tensors_[1]->MutableData()); float *scale = static_cast<float *>(in_tensors_[1]->MutableData());
float *bias = static_cast<float *>(in_tensors_[2]->MutableData()); float *bias = static_cast<float *>(in_tensors_[2]->MutableData());
// Copy to input tensors for Model export
memcpy(run_mean, save_mean_, in_tensors_[3]->Size());
memcpy(run_var, save_variance_, in_tensors_[4]->Size());

// Copy to local variables // Copy to local variables
memcpy(mean_, run_mean, in_tensors_[3]->Size());
memcpy(variance_, run_var, in_tensors_[4]->Size());
memcpy(scale_, scale, in_tensors_[1]->Size()); memcpy(scale_, scale, in_tensors_[1]->Size());
memcpy(offset_, bias, in_tensors_[2]->Size()); memcpy(offset_, bias, in_tensors_[2]->Size());
memcpy(mean_, save_mean, in_tensors_[3]->Size());
memcpy(variance_, save_var, in_tensors_[4]->Size());
} }
return RET_OK;
} }


int FusedBatchnormCPUKernel::DoExecute(int task_id) { int FusedBatchnormCPUKernel::DoExecute(int task_id) {


+ 1
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.h View File

@@ -29,7 +29,7 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel {
: BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); } ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); }


void eval() override;
int Eval() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int InitConstTensor() override; int InitConstTensor() override;
@@ -39,8 +39,6 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel {
void FreeScaleAndOffset(); void FreeScaleAndOffset();
void *scale_ = nullptr; void *scale_ = nullptr;
void *offset_ = nullptr; void *offset_ = nullptr;
void *save_mean_ = nullptr;
void *save_variance_ = nullptr;
bool trained_ = false; bool trained_ = false;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 48
- 14
mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc View File

@@ -140,10 +140,12 @@ int MatmulCPUKernel::InitBias() {
: (c_shape[c_shape.size() - 1]); : (c_shape[c_shape.size() - 1]);
params_->col_8_ = UP_ROUND(params_->col_, 8); params_->col_8_ = UP_ROUND(params_->col_, 8);
auto col_tmp = is_vector_a_ ? params_->col_ : params_->col_8_; auto col_tmp = is_vector_a_ ? params_->col_ : params_->col_8_;
bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float)));
if (bias_ptr_ == nullptr) { if (bias_ptr_ == nullptr) {
FreeTmpBuffer();
return RET_MEMORY_FAILED;
bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float)));
if (bias_ptr_ == nullptr) {
FreeTmpBuffer();
return RET_MEMORY_FAILED;
}
} }
memset(bias_ptr_, 0, col_tmp * sizeof(float)); memset(bias_ptr_, 0, col_tmp * sizeof(float));
if (in_tensors_.size() == 3) { if (in_tensors_.size() == 3) {
@@ -154,6 +156,8 @@ int MatmulCPUKernel::InitBias() {


int MatmulCPUKernel::ReSize() { int MatmulCPUKernel::ReSize() {
if (!params_->b_const_) { if (!params_->b_const_) {
free(bias_ptr_);
bias_ptr_ = nullptr;
auto ret = InitBias(); auto ret = InitBias();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 init bias failed"; MS_LOG(ERROR) << "Matmul fp32 init bias failed";
@@ -277,7 +281,7 @@ int MatmulCPUKernel::Run() {
auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c()); auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c());
auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c()); auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c());


if (!params_->a_const_ || is_train()) {
if (!params_->a_const_ || IsTrain()) {
if (a_pack_ptr_ != nullptr) { if (a_pack_ptr_ != nullptr) {
params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_); params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_);
a_pack_ptr_ = nullptr; a_pack_ptr_ = nullptr;
@@ -294,7 +298,7 @@ int MatmulCPUKernel::Run() {
a_ptr_ = a_pack_ptr_; a_ptr_ = a_pack_ptr_;
} }
} }
if (!params_->b_const_ || is_train()) {
if (!params_->b_const_ || IsTrain()) {
if (b_pack_ptr_ != nullptr) { if (b_pack_ptr_ != nullptr) {
params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_); params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_);
b_pack_ptr_ = nullptr; b_pack_ptr_ = nullptr;
@@ -311,7 +315,9 @@ int MatmulCPUKernel::Run() {
b_ptr_ = b_pack_ptr_; b_ptr_ = b_pack_ptr_;
} }
} }

if (IsTrain()) {
InitBias();
}
for (int i = 0; i < params_->batch; ++i) { for (int i = 0; i < params_->batch; ++i) {
if (is_vector_a_) { if (is_vector_a_) {
cur_a_ptr_ = a_ptr_ + i * params_->deep_; cur_a_ptr_ = a_ptr_ + i * params_->deep_;
@@ -329,26 +335,54 @@ int MatmulCPUKernel::Run() {
return RET_ERROR; return RET_ERROR;
} }
} }
if (!params_->a_const_ || is_train()) {
context_->allocator->Free(a_pack_ptr_);
if (!params_->a_const_ || IsTrain()) {
params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_);
a_pack_ptr_ = nullptr; a_pack_ptr_ = nullptr;
} }
if (!params_->b_const_ || is_train()) {
context_->allocator->Free(b_pack_ptr_);
if (!params_->b_const_ || IsTrain()) {
params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_);
b_pack_ptr_ = nullptr; b_pack_ptr_ = nullptr;
} }
return RET_OK; return RET_OK;
} }


void MatmulCPUKernel::eval() {
int MatmulCPUKernel::Eval() {
// Copy weights after training // Copy weights after training
LiteKernel::eval();
auto a_src = reinterpret_cast<float *>(in_tensors_[0]->data_c());
auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c());
LiteKernel::Eval();
if (params_->a_const_) { if (params_->a_const_) {
InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->MutableData()), a_pack_ptr_);
if (a_pack_ptr_ == nullptr) {
auto ret = MallocMatrixABuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 malloc matrix a buffer failed";
return RET_ERROR;
}
}
if (is_vector_a_) {
a_ptr_ = a_src;
} else {
InitMatrixA(a_src, a_pack_ptr_);
a_ptr_ = a_pack_ptr_;
}
} }
if (params_->b_const_) { if (params_->b_const_) {
InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->MutableData()), b_pack_ptr_);
if (b_pack_ptr_ == nullptr) {
auto ret = MallocMatrixBBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 malloc matrix b buffer failed";
return RET_ERROR;
}
}
if (is_vector_a_ && params_->b_transpose_) {
b_ptr_ = b_src;
} else {
InitMatrixB(b_src, b_pack_ptr_);
b_ptr_ = b_pack_ptr_;
}
} }
InitBias();
return RET_OK;
} }


kernel::LiteKernel *CpuMatmulFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuMatmulFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.h View File

@@ -34,7 +34,7 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel {
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int RunImpl(int task_id); int RunImpl(int task_id);
void eval() override;
int Eval() override;


private: private:
int MallocMatrixABuffer(); int MallocMatrixABuffer();


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/one_hot_fp32.cc View File

@@ -214,5 +214,5 @@ kernel::LiteKernel *CpuOneHotFp32KernelCreator(const std::vector<lite::Tensor *>
return kernel; return kernel;
} }


REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_OneHot, CpuOneHotFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_OneHot, CpuOneHotFp32KernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel

+ 7
- 12
mindspore/lite/src/runtime/kernel/arm/fp32_grad/adam.cc View File

@@ -45,24 +45,19 @@ int AdamCPUKernel::Execute(int task_id) {
auto eps = reinterpret_cast<float *>(in_tensors_[8]->MutableData())[0]; auto eps = reinterpret_cast<float *>(in_tensors_[8]->MutableData())[0];
auto gradient = reinterpret_cast<float *>(in_tensors_[9]->MutableData()); auto gradient = reinterpret_cast<float *>(in_tensors_[9]->MutableData());
size_t elem_num = in_tensors_[0]->ElementsNum(); size_t elem_num = in_tensors_[0]->ElementsNum();
auto update_lr = learning_rate * std::sqrt(1 - beta2_power) / (1 - beta1_power);


if (adam_param_->use_nesterov_) { // Nadam if (adam_param_->use_nesterov_) { // Nadam
for (size_t i = 0; i < elem_num; ++i) { for (size_t i = 0; i < elem_num; ++i) {
m[i] = (m[i] * beta1) + (gradient[i] * (1.f - beta1));
v[i] = (v[i] * beta2) + (gradient[i] * gradient[i] * (1.f - beta2));
auto g_hat = gradient[i] / (1 - beta1_power);
auto m_hat = m[i] / (1 - beta1_power);
auto v_hat = v[i] / (1 - beta2_power);
auto m_tag = (1.f - beta1) * g_hat + beta1 * m_hat;
weight[i] -= learning_rate * m_tag / (sqrtf(v_hat) + eps);
m[i] += (gradient[i] - m[i]) * (1 - beta1);
v[i] += (gradient[i] * gradient[i] - v[i]) * (1 - beta2);
weight[i] -= update_lr * (m[i] * beta1 + (1 - beta1) * gradient[i]) / (std::sqrt(v[i]) + eps);
} }
} else { } else {
for (size_t i = 0; i < elem_num; ++i) { for (size_t i = 0; i < elem_num; ++i) {
m[i] = (m[i] * beta1) + (gradient[i] * (1.f - beta1));
v[i] = (v[i] * beta2) + (gradient[i] * gradient[i] * (1.f - beta2));
auto m_hat = m[i] / (1 - beta1_power);
auto v_hat = v[i] / (1 - beta2_power);
weight[i] -= learning_rate * m_hat / (sqrtf(v_hat) + eps);
m[i] += (gradient[i] - m[i]) * (1 - beta1);
v[i] += (gradient[i] * gradient[i] - v[i]) * (1 - beta2);
weight[i] -= update_lr * m[i] / (std::sqrt(v[i]) + eps);
} }
} }
return RET_OK; return RET_OK;


+ 24
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc View File

@@ -177,6 +177,28 @@ void ArithmeticGradCPUKernel::ArithmeticGradDiv2L(float *dy, int dy_size, float
ElementDivNegSquare(tile_data2, x2_data, dx2, dy_size); ElementDivNegSquare(tile_data2, x2_data, dx2, dy_size);
} }


void ArithmeticGradCPUKernel::ArithmeticGradMaximum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
int dx2_size) {
// For some reason, input order is x0, x1, dy
auto x1 = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
auto x2 = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
dy = reinterpret_cast<float *>(in_tensors_[2]->MutableData());

MaximumByAxes(x1, x2, dy, arithmeticParameter_->in_shape0_, arithmeticParameter_->in_shape1_,
arithmeticParameter_->out_shape_, dx1, dx2, arithmeticParameter_->ndim_);
}

void ArithmeticGradCPUKernel::ArithmeticGradMinimum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
int dx2_size) {
// For some reason, input order is x0, x1, dy
auto x1 = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
auto x2 = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
dy = reinterpret_cast<float *>(in_tensors_[2]->MutableData());

MinimumByAxes(x1, x2, dy, arithmeticParameter_->out_shape_, arithmeticParameter_->in_shape0_,
arithmeticParameter_->in_shape1_, dx1, dx2, arithmeticParameter_->ndim_);
}

int ArithmeticGradCPUKernel::ReSize() { return RET_OK; } int ArithmeticGradCPUKernel::ReSize() { return RET_OK; }


int ArithmeticGradCPUKernel::Execute(int task_id) { int ArithmeticGradCPUKernel::Execute(int task_id) {
@@ -240,4 +262,6 @@ REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulGrad, CpuArithmeticGradFp3
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_AddGrad, CpuArithmeticGradFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_AddGrad, CpuArithmeticGradFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SubGrad, CpuArithmeticGradFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SubGrad, CpuArithmeticGradFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DivGrad, CpuArithmeticGradFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DivGrad, CpuArithmeticGradFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MaximumGrad, CpuArithmeticGradFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MinimumGrad, CpuArithmeticGradFp32KernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel

+ 10
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h View File

@@ -24,6 +24,8 @@


using mindspore::schema::PrimitiveType_AddGrad; using mindspore::schema::PrimitiveType_AddGrad;
using mindspore::schema::PrimitiveType_DivGrad; using mindspore::schema::PrimitiveType_DivGrad;
using mindspore::schema::PrimitiveType_MaximumGrad;
using mindspore::schema::PrimitiveType_MinimumGrad;
using mindspore::schema::PrimitiveType_MulGrad; using mindspore::schema::PrimitiveType_MulGrad;
using mindspore::schema::PrimitiveType_SubGrad; using mindspore::schema::PrimitiveType_SubGrad;


@@ -52,6 +54,12 @@ class ArithmeticGradCPUKernel : public LiteKernel {
case PrimitiveType_DivGrad: case PrimitiveType_DivGrad:
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv; // this will be adjusted in InferShape arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv; // this will be adjusted in InferShape
break; break;
case PrimitiveType_MaximumGrad:
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMaximum;
break;
case PrimitiveType_MinimumGrad:
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMinimum;
break;
default: default:
MS_LOG(ERROR) << "Error Operator type " << parameter->type_; MS_LOG(ERROR) << "Error Operator type " << parameter->type_;
break; break;
@@ -79,6 +87,8 @@ class ArithmeticGradCPUKernel : public LiteKernel {
void ArithmeticGradDiv(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); void ArithmeticGradDiv(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size);
void ArithmeticGradDiv1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); void ArithmeticGradDiv1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size);
void ArithmeticGradDiv2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); void ArithmeticGradDiv2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size);
void ArithmeticGradMaximum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size);
void ArithmeticGradMinimum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size);
ArithmeticParameter *arithmeticParameter_; ArithmeticParameter *arithmeticParameter_;
ArithmeticGradOperation arithmetic_grad_; ArithmeticGradOperation arithmetic_grad_;
float *tile_data0; float *tile_data0;


+ 17
- 9
mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc View File

@@ -15,6 +15,7 @@
*/ */


#include "src/runtime/kernel/arm/fp32_grad/bn_grad.h" #include "src/runtime/kernel/arm/fp32_grad/bn_grad.h"
#include <math.h>
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include "schema/model_generated.h" #include "schema/model_generated.h"
@@ -34,7 +35,7 @@ namespace mindspore::kernel {
int BNGradCPUKernel::Init() { int BNGradCPUKernel::Init() {
auto *input_x = in_tensors_.at(1); auto *input_x = in_tensors_.at(1);
int channels = input_x->shape().at(kNHWC_C); int channels = input_x->shape().at(kNHWC_C);
SetWorkspaceSize(4 * channels * sizeof(float));
SetWorkspaceSize(2 * channels * sizeof(float));
return RET_OK; return RET_OK;
} }


@@ -45,19 +46,23 @@ int BNGradCPUKernel::Execute(int task_id) {
auto *input_yt = in_tensors_.at(0); auto *input_yt = in_tensors_.at(0);
auto *input_x = in_tensors_.at(1); auto *input_x = in_tensors_.at(1);
auto *input_scale = in_tensors_.at(2); auto *input_scale = in_tensors_.at(2);
auto *input_mean = in_tensors_.at(3);
auto *input_var = in_tensors_.at(4);

float *save_mean = reinterpret_cast<float *>(input_mean->MutableData());
float *save_var = reinterpret_cast<float *>(input_var->MutableData());

auto *output_dx = out_tensors_.at(0); auto *output_dx = out_tensors_.at(0);
auto *output_scale = out_tensors_.at(1); auto *output_scale = out_tensors_.at(1);
auto *output_bias = out_tensors_.at(2); auto *output_bias = out_tensors_.at(2);
int batch = input_x->Batch();
int channels = input_x->Channel();
int spatial = input_x->Height() * input_x->Width();
size_t batch = input_x->Batch();
size_t channels = input_x->Channel();
size_t spatial = input_x->Height() * input_x->Width();
float eps = bn_param->epsilon_; float eps = bn_param->epsilon_;


float *workspace = static_cast<float *>(GetWorkspace()); float *workspace = static_cast<float *>(GetWorkspace());
std::fill(workspace, workspace + GetWorkspaceSize() / sizeof(*workspace), 0.f); std::fill(workspace, workspace + GetWorkspaceSize() / sizeof(*workspace), 0.f);
float *mean = workspace;
float *invar = mean + channels;
float *dxhat_sum = invar + channels;
float *dxhat_sum = workspace;
float *dxhathat_sum = dxhat_sum + channels; float *dxhathat_sum = dxhat_sum + channels;


float *x = reinterpret_cast<float *>(input_x->MutableData()); float *x = reinterpret_cast<float *>(input_x->MutableData());
@@ -67,11 +72,14 @@ int BNGradCPUKernel::Execute(int task_id) {
float *dscale = reinterpret_cast<float *>(output_scale->MutableData()); float *dscale = reinterpret_cast<float *>(output_scale->MutableData());
float *dbias = reinterpret_cast<float *>(output_bias->MutableData()); float *dbias = reinterpret_cast<float *>(output_bias->MutableData());


backwardX(x, yt, scale, batch * spatial, channels, eps, mean, invar, dxhat_sum, dxhathat_sum, dx);
var2Invar(save_var, input_var->ElementsNum(), eps);
// dx
backwardX(x, yt, scale, batch * spatial, channels, save_mean, save_var, dxhat_sum, dxhathat_sum, dx);
// dbias // dbias
sumSpatialBatch(yt, batch * spatial, channels, dbias); sumSpatialBatch(yt, batch * spatial, channels, dbias);
// dscale // dscale
backwardScale(x, mean, invar, yt, batch, channels, spatial, dscale);
backwardScale(x, save_mean, save_var, yt, batch, channels, spatial, dscale);

return RET_OK; return RET_OK;
} }




+ 34
- 34
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc View File

@@ -19,6 +19,7 @@
#include "nnacl/fp32_grad/gemm.h" #include "nnacl/fp32_grad/gemm.h"
#include "include/errorcode.h" #include "include/errorcode.h"
#include "src/runtime/runtime_api.h" #include "src/runtime/runtime_api.h"
#include "nnacl/pack.h"


using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
@@ -26,8 +27,8 @@ using mindspore::lite::RET_OK;


namespace mindspore::kernel { namespace mindspore::kernel {
int ConvolutionTrainCPUKernel::Init() { int ConvolutionTrainCPUKernel::Init() {
if (2 != in_tensors_.size()) {
MS_LOG(ERROR) << "Convolution should have two inputs";
if (2 > in_tensors_.size()) {
MS_LOG(ERROR) << "Convolution should have at least two inputs";
return RET_ERROR; return RET_ERROR;
} }
if (1 != out_tensors_.size()) { if (1 != out_tensors_.size()) {
@@ -51,11 +52,11 @@ int ConvolutionTrainCPUKernel::Init() {
conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W); conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W);


conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_; conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_;
int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ *
conv_param_->input_channel_ / conv_param_->group_;
SetWorkspaceSize(ws_size * sizeof(float));
const int n = conv_param_->output_channel_ * conv_param_->group_;
const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_;
ws_size = chunk * k;
int mat_alloc = MatSizeTotal(chunk, n, k, 0);
SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float));
return RET_OK; return RET_OK;
} }


@@ -71,36 +72,35 @@ int ConvolutionTrainCPUKernel::Execute(int task_id) {
auto y_addr = reinterpret_cast<float *>(out_y->MutableData()); auto y_addr = reinterpret_cast<float *>(out_y->MutableData());
auto w_addr = reinterpret_cast<float *>(input_w->MutableData()); auto w_addr = reinterpret_cast<float *>(input_w->MutableData());


int i, j;
int nweights = input_w->ElementsNum();
int in_ch = conv_param_->input_channel_;
int in_h = conv_param_->input_h_;
int in_w = conv_param_->input_w_;
int k_h = conv_param_->kernel_h_;
int k_w = conv_param_->kernel_w_;
int batch = conv_param_->output_batch_;
int out_ch = conv_param_->output_channel_; // out_y->shape()[3];
int groups = conv_param_->group_;
int out_h = conv_param_->output_h_;
int out_w = conv_param_->output_w_;
int m = out_h * out_w;
int n = out_ch / groups;
int k = k_h * k_w * in_ch / groups;
const int nweights = input_w->ElementsNum();
const int in_ch = conv_param_->input_channel_;
const int in_h = conv_param_->input_h_;
const int in_w = conv_param_->input_w_;
const int k_h = conv_param_->kernel_h_;
const int k_w = conv_param_->kernel_w_;
const int batch = conv_param_->output_batch_;
const int out_ch = conv_param_->output_channel_; // out_y->shape()[3];
const int groups = conv_param_->group_;
const int out_h = conv_param_->output_h_;
const int out_w = conv_param_->output_w_;
const int m = out_h * out_w;
const int n = out_ch / groups;
const int k = k_h * k_w * in_ch / groups;
float *workspace = static_cast<float *>(GetWorkspace()); float *workspace = static_cast<float *>(GetWorkspace());

memset(y_addr, 0, out_y->Size());

for (i = 0; i < batch; ++i) {
for (j = 0; j < groups; ++j) {
float *mat_a = workspace;
float *mat_b = w_addr + j * nweights / groups;
float *mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups);
float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups);
im2col_hwc(im, mat_a, conv_param_);
gemm(0, 1, m, n, k, 1, mat_a, k, mat_b, k, 1, mat_c, out_ch);
float *mat_workspace = workspace + ws_size;
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < groups; ++j) {
for (int ci = 0; ci < m; ci += chunk) {
int real_chunk = MSMIN(m - ci, chunk);
float *mat_a = workspace;
const float *mat_b = w_addr + j * nweights / groups;
float *mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups);
RollingIm2ColPackUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
}
} }
} }

return RET_OK; return RET_OK;
} }




+ 8
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h View File

@@ -33,6 +33,14 @@ class ConvolutionTrainCPUKernel : public LiteKernel {
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int Execute(int task_id); int Execute(int task_id);

private:
int ws_size = 0;
#ifdef ENABLE_ARM32
const int chunk = C4NUM;
#else
const int chunk = C12NUM;
#endif
}; };


kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,


+ 16
- 12
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc View File

@@ -51,10 +51,12 @@ int ConvolutionGradFilterCPUKernel::Init() {
conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_h_ = dy_tensor->shape()[kNHWC_H];
conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W];


size_t ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ *
conv_param->input_channel_ / conv_param->group_;
ws_size = chunk * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_;


SetWorkspaceSize(ws_size * sizeof(float));
int n = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_;
int k = conv_param->output_channel_ / conv_param->group_;
size_t mat_alloc = MatSizeTotal(k, n, chunk, n);
SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float));
return RET_OK; return RET_OK;
} }


@@ -88,19 +90,21 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
int k = out_ch / groups; int k = out_ch / groups;


float *workspace = reinterpret_cast<float *>(GetWorkspace()); float *workspace = reinterpret_cast<float *>(GetWorkspace());
float *mat_workspace = workspace + ws_size;
// zero out pointer // zero out pointer
memset(dw_addr, 0, out_dw->Size()); memset(dw_addr, 0, out_dw->Size());

for (i = 0; i < batch; ++i) { for (i = 0; i < batch; ++i) {
for (j = 0; j < groups; ++j) { for (j = 0; j < groups; ++j) {
float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups);
float *mat_b = workspace;
float *mat_c = dw_addr + j * nweights / groups;
float *im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups);

im2row_hwc(im, mat_b, conv_param, false);
gemm(1, 1, k, n, m, 1, mat_a, out_ch, mat_b, m, 1, mat_c, n);
for (int ci = 0; ci < m; ci += chunk) {
int real_chunk = MSMIN(m - ci, chunk);
float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch;
float *mat_b = workspace;
float *mat_c = dw_addr + j * nweights / groups;
float *im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups);
memset(mat_b, 0, n * real_chunk * sizeof(float));
RollingIm2ColPackUnitFp32(im, conv_param, mat_b, real_chunk, ci);
GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, mat_b, n, 1, mat_c, n, mat_workspace);
}
} }
} }
return RET_OK; return RET_OK;


+ 8
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h View File

@@ -34,6 +34,14 @@ class ConvolutionGradFilterCPUKernel : public LiteKernel {
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int Execute(int task_id); int Execute(int task_id);

private:
size_t ws_size = 0;
#ifdef ENABLE_ARM32
const int chunk = C4NUM;
#else
const int chunk = C12NUM;
#endif
}; };


} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 27
- 10
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc View File

@@ -51,11 +51,14 @@ int ConvolutionGradInputCPUKernel::Init() {


conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_h_ = dy_tensor->shape()[kNHWC_H];
conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W];
ws_size = chunk * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_;


size_t ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ *
conv_param->input_channel_ / conv_param->group_;
int n = conv_param->kernel_w_ * conv_param->kernel_h_ * conv_param->input_channel_ / conv_param->group_;
int k = conv_param->output_channel_ / conv_param->group_;


SetWorkspaceSize(ws_size * sizeof(float));
size_t mat_alloc = MatSizeTotal(chunk, n, k, 0);

SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float));
return RET_OK; return RET_OK;
} }


@@ -88,16 +91,30 @@ int ConvolutionGradInputCPUKernel::Execute(int task_id) {
int n = k_w * k_h * in_ch / groups; int n = k_w * k_h * in_ch / groups;
int k = out_ch / groups; int k = out_ch / groups;
float *workspace = reinterpret_cast<float *>(GetWorkspace()); float *workspace = reinterpret_cast<float *>(GetWorkspace());
float *mat_workspace = workspace + ws_size;
memset(dx_addr, 0, sizeof(float) * batch * in_ch * in_h * in_w); memset(dx_addr, 0, sizeof(float) * batch * in_ch * in_h * in_w);

for (i = 0; i < batch; ++i) { for (i = 0; i < batch; ++i) {
for (j = 0; j < groups; ++j) { for (j = 0; j < groups; ++j) {
float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups);
float *mat_b = w_addr + j * nweights / groups;
float *mat_c = workspace;
gemm(0, 0, m, n, k, 1, mat_a, out_ch, mat_b, n, 0, mat_c, n);
col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), conv_param);
GemmCb gcb;
for (int ci = 0; ci < m; ci += chunk) {
float *mat_b;
if (ci == 0) {
mat_b = w_addr + j * nweights / groups;
gcb.ca = 0;
gcb.cb = 0;
gcb.bias = nullptr;
gcb.atype = ActType_No;
} else {
mat_b = gcb.mat_b;
gcb.cb = 1;
}
int real_chunk = MSMIN(m - ci, chunk);
float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch;
float *mat_c = workspace;
GemmMatmulPlus(0, 0, real_chunk, n, k, 1, mat_a, out_ch, mat_b, n, 0, mat_c, n, mat_workspace, &gcb);
rolling_col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups),
conv_param, real_chunk, ci);
}
} }
} }




+ 8
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h View File

@@ -33,6 +33,14 @@ class ConvolutionGradInputCPUKernel : public LiteKernel {
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int Execute(int task_id); int Execute(int task_id);

private:
size_t ws_size = 0;
#ifdef ENABLE_ARM32
const int chunk = C4NUM;
#else
const int chunk = C12NUM;
#endif
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel




+ 20
- 12
mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc View File

@@ -50,10 +50,14 @@ int DeConvolutionGradFilterCPUKernel::Init() {
conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_h_ = dy_tensor->shape()[kNHWC_H];
conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W];


int ws_size = conv_param->input_h_ * conv_param->input_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ *
conv_param->output_channel_ / conv_param->group_;
ws_size = chunk * conv_param->input_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->output_channel_ /
conv_param->group_;


SetWorkspaceSize(ws_size * sizeof(float));
int m = conv_param->input_channel_ / conv_param->group_;
int n = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->output_channel_ / conv_param->group_;
size_t mat_alloc = MatSizeTotal(n, m, chunk * conv_param->input_w_, conv_param->input_channel_);

SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float));


return RET_OK; return RET_OK;
} }
@@ -82,21 +86,25 @@ int DeConvolutionGradFilterCPUKernel::Execute(int task_id) {
int out_h = conv_param->output_h_; int out_h = conv_param->output_h_;
int out_w = conv_param->output_w_; int out_w = conv_param->output_w_;


int m = in_ch / groups;
int n = k_h * k_w * out_ch / groups;
int k = in_h * in_w;
const int m = in_ch / groups;
const int n = k_h * k_w * out_ch / groups;


float *workspace = reinterpret_cast<float *>(GetWorkspace()); float *workspace = reinterpret_cast<float *>(GetWorkspace());
float *mat_workspace = workspace + ws_size;
// zero out pointer // zero out pointer
memset(dw_addr, 0, out_dw->Size()); memset(dw_addr, 0, out_dw->Size());
for (i = 0; i < batch; ++i) { for (i = 0; i < batch; ++i) {
for (j = 0; j < groups; ++j) { for (j = 0; j < groups; ++j) {
float *mat_a = x_addr + (i * (in_ch * in_h * in_w) + j * (in_ch / groups));
float *mat_b = workspace;
float *mat_c = dw_addr + j * m;
float *im = dy_addr + (i * (out_h * out_w * out_ch) + j * (out_ch / groups));
im2row_hwc(im, mat_b, conv_param, true);
gemm(0, 0, n, m, k, 1, mat_b, k, mat_a, in_ch, 1, mat_c, in_ch);
for (int ci = 0; ci < in_h; ci += chunk) {
int real_chunk = MSMIN(in_h - ci, chunk);
float *mat_a = x_addr + (i * (in_ch * in_h * in_w) + j * (in_ch / groups)) + ci * in_w * in_ch;
float *mat_b = workspace;
float *mat_c = dw_addr + j * m;
float *im = dy_addr + (i * (out_h * out_w * out_ch) + j * (out_ch / groups));
rolling_im2row_hwc(im, mat_b, conv_param, real_chunk, ci);
GemmMatmul(0, 0, n, m, real_chunk * in_w, 1, mat_b, real_chunk * in_w, mat_a, in_ch, 1, mat_c, in_ch,
mat_workspace);
}
} }
} }
return RET_OK; return RET_OK;


+ 4
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h View File

@@ -33,6 +33,10 @@ class DeConvolutionGradFilterCPUKernel : public LiteKernel {
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int Execute(int task_id); int Execute(int task_id);

private:
size_t ws_size = 0;
const int chunk = 1;
}; };


} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 131
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.cc View File

@@ -0,0 +1,131 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <random>
#include <algorithm>
#include "src/runtime/kernel/arm/fp32_grad/dropout.h"
#include "schema/model_generated.h"
#include "src/runtime/runtime_api.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "nnacl/fp32_grad/dropout_parameter.h"

using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_NULL_PTR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Dropout;

namespace mindspore::kernel {

int DropoutCPUKernel::Init() {
auto param = reinterpret_cast<DropoutParameter *>(op_parameter_);
if (param == nullptr) {
MS_LOG(ERROR) << "Dropout op_parameter_ nullptr";
return RET_NULL_PTR;
}

if ((param->ratio_ > 1.0f) || (param->ratio_ < 0.0f)) {
MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one";
return RET_ERROR;
}

if (param->ratio_ >= 1.0f) {
scale_ = 1.0f;
} else {
scale_ = 1. / (1. - param->ratio_);
}
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}

int DropoutCPUKernel::ReSize() { return RET_OK; }

int DropoutCPUKernel::Execute(int task_id) {
auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData());
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
auto mask = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
auto length = in_tensors_.at(kInputIndex)->ElementsNum();
auto param = reinterpret_cast<DropoutParameter *>(op_parameter_);
if (param == nullptr) {
MS_LOG(ERROR) << "Dropout op_parameter_ nullptr";
return RET_NULL_PTR;
}
if (IsEval()) {
std::copy(input_ptr, input_ptr + length, output_ptr);
} else {
std::default_random_engine generator;
std::bernoulli_distribution distribution(param->ratio_);

for (int i = 0; i < length; i++) {
mask[i] = distribution(generator);
output_ptr[i] = input_ptr[i] * mask[i] * scale_;
}
}
return RET_OK;
}

int RunDropout(void *cdata, int task_id) {
auto dropout = reinterpret_cast<DropoutCPUKernel *>(cdata);
auto error_code = dropout->Execute(task_id);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Dropout Run error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}

int DropoutCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, RunDropout, this, 1);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Dropout function error error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}

kernel::LiteKernel *CpuDropoutFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Dropout opParameter nullptr.";
return nullptr;
}
if (desc.type != schema::PrimitiveType_Dropout) {
MS_LOG(ERROR) << "Dropout desc type should be " << schema::PrimitiveType_Dropout << " got " << desc.type;
return nullptr;
}
auto *kernel = new (std::nothrow) DropoutCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Dropout new kernel failed.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
delete kernel;
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
return nullptr;
}
return kernel;
}

REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Dropout, CpuDropoutFp32KernelCreator)
} // namespace mindspore::kernel

+ 43
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_

#include <vector>
#include "src/lite_kernel.h"

namespace mindspore::kernel {
class DropoutCPUKernel : public LiteKernel {
public:
DropoutCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}

~DropoutCPUKernel() override = default;

int Init() override;
int ReSize() override;
int Run() override;
int Execute(int task_id);

private:
float scale_;
};

} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_

+ 118
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.cc View File

@@ -0,0 +1,118 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <random>
#include "src/runtime/kernel/arm/fp32_grad/dropout_grad.h"
#include "nnacl/fp32_grad/dropout_grad.h"
#include "schema/model_generated.h"
#include "src/runtime/runtime_api.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "nnacl/fp32_grad/dropout_parameter.h"

using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_NULL_PTR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_DropoutGrad;

namespace mindspore::kernel {

int DropoutGradCPUKernel::Init() {
auto param = reinterpret_cast<DropoutParameter *>(op_parameter_);
if (param == nullptr) {
MS_LOG(ERROR) << "Dropout op_parameter_ nullptr";
return RET_NULL_PTR;
}

if ((param->ratio_ > 1.0f) || (param->ratio_ < 0.0f)) {
MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one";
return RET_ERROR;
}

if (param->ratio_ >= 1.0f) {
scale_ = 1.0f;
} else {
scale_ = 1. / (1. - param->ratio_);
}
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}

int DropoutGradCPUKernel::ReSize() { return RET_OK; }

int DropoutGradCPUKernel::Execute(int task_id) {
auto yt_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData());
auto mask_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
auto length = in_tensors_.at(kInputIndex)->ElementsNum();
DropoutGrad(yt_ptr, mask_ptr, output_ptr, length, scale_);

return RET_OK;
}

int RunDropoutGrad(void *cdata, int task_id) {
auto dropout = reinterpret_cast<DropoutGradCPUKernel *>(cdata);
auto error_code = dropout->Execute(task_id);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Dropout Grad Run error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}

int DropoutGradCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, RunDropoutGrad, this, 1);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Dropout Grad function error error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}

kernel::LiteKernel *CpuDropoutGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter, const lite::InnerContext *ctx,
const kernel::KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "DropoutGrad opParameter nullptr.";
return nullptr;
}
if (desc.type != schema::PrimitiveType_DropoutGrad) {
MS_LOG(ERROR) << "DropoutGrad desc type should be " << schema::PrimitiveType_DropoutGrad << " got " << desc.type;
return nullptr;
}
auto *kernel = new (std::nothrow) DropoutGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "DropoutGrad new kernel failed.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
delete kernel;
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
return nullptr;
}
return kernel;
}

REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DropoutGrad, CpuDropoutGradFp32KernelCreator)
} // namespace mindspore::kernel

+ 43
- 0
mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_

#include <vector>
#include "src/lite_kernel.h"

namespace mindspore::kernel {
class DropoutGradCPUKernel : public LiteKernel {
public:
DropoutGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}

~DropoutGradCPUKernel() override = default;

int Init() override;
int ReSize() override;
int Run() override;
int Execute(int task_id);

private:
float scale_;
};

} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_

+ 2
- 3
mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc View File

@@ -41,8 +41,7 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab
float logit = float logit =
-logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]); -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]);
grads[i * param_->number_of_classes_ + j] = grads[i * param_->number_of_classes_ + j] =
(logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j]) /
param_->batch_size_;
(logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j]);
total_loss += labels[i * param_->number_of_classes_ + j] * logit; total_loss += labels[i * param_->number_of_classes_ + j] * logit;
} }
} }
@@ -63,7 +62,7 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
auto labels = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); auto labels = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
float *out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); float *out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
float *grads = NULL; float *grads = NULL;
if (is_train() && out_tensors_.size() > 1) {
if (IsTrain() && out_tensors_.size() > 1) {
grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
} }
size_t data_size = in_tensors_.at(0)->ElementsNum(); size_t data_size = in_tensors_.at(0)->ElementsNum();


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc View File

@@ -86,7 +86,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
auto labels = reinterpret_cast<int *>(in_tensors_.at(1)->data_c()); auto labels = reinterpret_cast<int *>(in_tensors_.at(1)->data_c());
float *out = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); float *out = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
float *grads = NULL; float *grads = NULL;
if (is_train() && out_tensors_.size() > 1) {
if (IsTrain() && out_tensors_.size() > 1) {
grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
} }
size_t data_size = in_tensors_.at(0)->ElementsNum(); size_t data_size = in_tensors_.at(0)->ElementsNum();
@@ -99,7 +99,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
std::fill(losses_, losses_ + data_size, 0.f); std::fill(losses_, losses_ + data_size, 0.f);
std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0.f); std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0.f);
Softmax(ins, losses_, sum_data_, &sm_params_); Softmax(ins, losses_, sum_data_, &sm_params_);
if (is_train()) {
if (IsTrain()) {
GradPostExecute(labels, losses_, grads, out); GradPostExecute(labels, losses_, grads, out);
} else { } else {
ForwardPostExecute(labels, losses_, out); ForwardPostExecute(labels, losses_, out);


+ 65
- 1
mindspore/lite/src/train/train_populate_parameter.cc View File

@@ -36,6 +36,10 @@
#include "src/ops/bn_grad.h" #include "src/ops/bn_grad.h"
#include "nnacl/fp32_grad/batch_norm.h" #include "nnacl/fp32_grad/batch_norm.h"
#include "src/ops/adam.h" #include "src/ops/adam.h"
#include "nnacl/fp32_grad/dropout_parameter.h"
#include "src/ops/dropout.h"
#include "src/ops/dropout_grad.h"
#include "src/ops/arithmetic.h"
#include "src/ops/oneslike.h" #include "src/ops/oneslike.h"
#include "src/ops/binary_cross_entropy.h" #include "src/ops/binary_cross_entropy.h"
#include "src/ops/binary_cross_entropy_grad.h" #include "src/ops/binary_cross_entropy_grad.h"
@@ -399,10 +403,66 @@ OpParameter *PopulateBNGradParameter(const mindspore::lite::PrimitiveC *primitiv
bnGrad_param->op_parameter_.type_ = primitive->Type(); bnGrad_param->op_parameter_.type_ = primitive->Type();
auto bngrad = reinterpret_cast<mindspore::lite::BNGrad *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); auto bngrad = reinterpret_cast<mindspore::lite::BNGrad *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
bnGrad_param->epsilon_ = bngrad->GetEps(); bnGrad_param->epsilon_ = bngrad->GetEps();
bnGrad_param->momentum_ = 0.1;
bnGrad_param->momentum_ = bngrad->GetMomentum();
return reinterpret_cast<OpParameter *>(bnGrad_param); return reinterpret_cast<OpParameter *>(bnGrad_param);
} }


OpParameter *PopulateDropoutParameter(const mindspore::lite::PrimitiveC *primitive) {
DropoutParameter *dropout_parameter = reinterpret_cast<DropoutParameter *>(malloc(sizeof(DropoutParameter)));
if (dropout_parameter == nullptr) {
MS_LOG(ERROR) << "malloc Dropout Parameter failed.";
return nullptr;
}
memset(dropout_parameter, 0, sizeof(DropoutParameter));
dropout_parameter->op_parameter_.type_ = primitive->Type();
auto param = reinterpret_cast<mindspore::lite::Dropout *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
dropout_parameter->ratio_ = param->GetRatio();
if (dropout_parameter->ratio_ < 0.f || dropout_parameter->ratio_ > 1.f) {
MS_LOG(ERROR) << "Dropout ratio must be between 0 to 1, got " << dropout_parameter->ratio_;
free(dropout_parameter);
return nullptr;
}
return reinterpret_cast<OpParameter *>(dropout_parameter);
}

OpParameter *PopulateDropoutGradParameter(const mindspore::lite::PrimitiveC *primitive) {
DropoutParameter *dropoutGrad_parameter = reinterpret_cast<DropoutParameter *>(malloc(sizeof(DropoutParameter)));
if (dropoutGrad_parameter == nullptr) {
MS_LOG(ERROR) << "malloc Dropout Grad Parameter failed.";
return nullptr;
}
memset(dropoutGrad_parameter, 0, sizeof(DropoutParameter));
dropoutGrad_parameter->op_parameter_.type_ = primitive->Type();
auto param = reinterpret_cast<mindspore::lite::DropoutGrad *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
dropoutGrad_parameter->ratio_ = param->GetRatio();
if (dropoutGrad_parameter->ratio_ < 0.f || dropoutGrad_parameter->ratio_ > 1.f) {
MS_LOG(ERROR) << "Dropout Grad ratio must be between 0 to 1, got " << dropoutGrad_parameter->ratio_;
free(dropoutGrad_parameter);
return nullptr;
}
return reinterpret_cast<OpParameter *>(dropoutGrad_parameter);
}

OpParameter *PopulateArithmeticGradParameter(const mindspore::lite::PrimitiveC *primitive) {
ArithmeticParameter *arithmetic_param = reinterpret_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter)));
if (arithmetic_param == nullptr) {
MS_LOG(ERROR) << "malloc ArithmeticParameter failed.";
return nullptr;
}
memset(arithmetic_param, 0, sizeof(ArithmeticParameter));
arithmetic_param->op_parameter_.type_ = primitive->Type();
arithmetic_param->broadcasting_ = ((lite::Arithmetic *)primitive)->Broadcasting();
arithmetic_param->ndim_ = ((lite::Arithmetic *)primitive)->NDims();

auto tmp_shape = ((lite::Arithmetic *)primitive)->InShape0();
memcpy(arithmetic_param->in_shape0_, static_cast<void *>(tmp_shape.data()), tmp_shape.size() * sizeof(int));
tmp_shape = ((lite::Arithmetic *)primitive)->InShape1();
memcpy(arithmetic_param->in_shape1_, static_cast<void *>(tmp_shape.data()), tmp_shape.size() * sizeof(int));
tmp_shape = ((lite::Arithmetic *)primitive)->OutputShape();
memcpy(arithmetic_param->out_shape_, static_cast<void *>(tmp_shape.data()), tmp_shape.size() * sizeof(int));
return reinterpret_cast<OpParameter *>(arithmetic_param);
}

void PopulateTrainParameters() { void PopulateTrainParameters() {
lite::Registry ApplyMomentumParameterRegistry(schema::PrimitiveType_ApplyMomentum, PopulateApplyMomentumParameter); lite::Registry ApplyMomentumParameterRegistry(schema::PrimitiveType_ApplyMomentum, PopulateApplyMomentumParameter);
lite::Registry BiasGradParameterRegistry(schema::PrimitiveType_BiasGrad, PopulateBiasGradParameter); lite::Registry BiasGradParameterRegistry(schema::PrimitiveType_BiasGrad, PopulateBiasGradParameter);
@@ -430,6 +490,10 @@ void PopulateTrainParameters() {
lite::Registry OnesLikeParameterRegistry(schema::PrimitiveType_OnesLike, DefaultPopulateParameter); lite::Registry OnesLikeParameterRegistry(schema::PrimitiveType_OnesLike, DefaultPopulateParameter);
lite::Registry UnsortedSegmentSumParameterRegistry(schema::PrimitiveType_UnsortedSegmentSum, lite::Registry UnsortedSegmentSumParameterRegistry(schema::PrimitiveType_UnsortedSegmentSum,
DefaultPopulateParameter); DefaultPopulateParameter);
lite::Registry DropoutParameterRegistry(schema::PrimitiveType_Dropout, PopulateDropoutParameter);
lite::Registry DropGradParameterRegistry(schema::PrimitiveType_DropoutGrad, PopulateDropoutGradParameter);
lite::Registry MaximumGradParameterRegistry(schema::PrimitiveType_MaximumGrad, PopulateArithmeticGradParameter);
lite::Registry MinimumGradParameterRegistry(schema::PrimitiveType_MinimumGrad, PopulateArithmeticGradParameter);
} }


} // namespace mindspore::kernel } // namespace mindspore::kernel

+ 144
- 21
mindspore/lite/src/train/train_session.cc View File

@@ -15,9 +15,12 @@
*/ */


#include "src/train/train_session.h" #include "src/train/train_session.h"
#include <sys/stat.h>
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <iostream>
#include <fstream>
#include "include/errorcode.h" #include "include/errorcode.h"
#include "include/train_model.h" #include "include/train_model.h"
#include "src/common/utils.h" #include "src/common/utils.h"
@@ -98,6 +101,21 @@ int TrainSession::CompileTrainGraph(mindspore::lite::TrainModel *model) {
for (auto inTensor : inputs_) inTensor->MutableData(); for (auto inTensor : inputs_) inTensor->MutableData();
RestoreOps(restore); RestoreOps(restore);
AllocWorkSpace(); AllocWorkSpace();
MarkOptimizedKernels();
CompileTrainKernels();
if (train_mode_) {
auto ret1 = Train();
if (ret1 != RET_OK) {
MS_LOG(ERROR) << "faild to initialize network in train mode";
return RET_ERROR;
}
} else {
auto ret1 = Eval();
if (ret1 != RET_OK) {
MS_LOG(ERROR) << "faild to initialize network in eval mode";
return RET_ERROR;
}
}
return ret; return ret;
} }


@@ -110,34 +128,67 @@ void *TrainSession::ExportToBuf(char *buf, size_t *len) const { return model_->E


int TrainSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) { int TrainSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) {
this->outputs_.clear(); this->outputs_.clear();
for (auto ms_tensors : output_node_map_)
for (auto ms_tensor : ms_tensors.second) this->outputs_.push_back((static_cast<lite::Tensor *>(ms_tensor)));
if (train_mode_) return lite::LiteSession::RunGraph(before, after);

// build out tensor
for (auto ms_tensors : output_node_map_) {
for (auto ms_tensor : ms_tensors.second) {
this->outputs_.push_back((static_cast<lite::Tensor *>(ms_tensor)));
}
}


if (this->context_ == nullptr) { if (this->context_ == nullptr) {
MS_LOG(ERROR) << "context is null"; MS_LOG(ERROR) << "context is null";
return lite::RET_NULL_PTR; return lite::RET_NULL_PTR;
} }
auto run_kernel = (train_mode_) ? train_kernels_ : inference_kernels_;
lite::Executor executor; lite::Executor executor;
if (before == nullptr && after == nullptr) { if (before == nullptr && after == nullptr) {
return executor.Run(this->inputs_, this->outputs_, inference_kernels_, this->context_->allocator.get());
return executor.Run(this->inputs_, this->outputs_, run_kernel, this->context_->allocator.get());
} else { } else {
return executor.Run(this->inputs_, this->outputs_, inference_kernels_, this->context_->allocator.get(), before,
after);
return executor.Run(this->inputs_, this->outputs_, run_kernel, this->context_->allocator.get(), before, after);
} }
} }


void TrainSession::Train() {
int TrainSession::SaveToFile(const std::string &filename) const {
size_t fb_size = 0;
auto *buf = reinterpret_cast<char *>(ExportToBuf(nullptr, &fb_size));
if (buf == NULL) {
MS_LOG(ERROR) << "Could not Export Trained model";
return lite::RET_NULL_PTR;
}
std::ofstream ofs(filename);
if ((true != ofs.good()) || (true != ofs.is_open())) {
MS_LOG(ERROR) << "Could not open file \"" << filename << "\" for writing";
free(buf);
return RET_ERROR;
}

ofs.seekp(0, std::ios::beg);
ofs.write(buf, fb_size);
ofs.close();
free(buf);
return chmod(filename.c_str(), S_IRUSR);
}

int TrainSession::Train() {
for (auto ori_kernel : kernels_) { for (auto ori_kernel : kernels_) {
MS_ASSERT(nullptr != ori_kernel); MS_ASSERT(nullptr != ori_kernel);
if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) {
ori_kernel->train();
auto ret = ori_kernel->Train();
if (ret != RET_OK) {
MS_LOG(ERROR) << ori_kernel->name() << " failed to set train mode";
return RET_ERROR;
}
} else { } else {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel); auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel);
MS_ASSERT(nullptr != sub_graph); MS_ASSERT(nullptr != sub_graph);
for (auto kernel : sub_graph->nodes()) { for (auto kernel : sub_graph->nodes()) {
MS_ASSERT(nullptr != kernel); MS_ASSERT(nullptr != kernel);
kernel->train();
auto ret = kernel->Train();
if (ret != RET_OK) {
MS_LOG(ERROR) << kernel->name() << " failed to set train mode";
return RET_ERROR;
}
} }
} }
} }
@@ -157,6 +208,7 @@ void TrainSession::Train() {
} }
} }
} }
return RET_OK;
} }


void TrainSession::UpdateOutputMapByLossKernel(const kernel::LiteKernel *kernel) { void TrainSession::UpdateOutputMapByLossKernel(const kernel::LiteKernel *kernel) {
@@ -190,17 +242,25 @@ void TrainSession::UpdateOutputMapByInKernel(const kernel::LiteKernel *kernel) {
} }
} }


void TrainSession::Eval() {
int TrainSession::Eval() {
for (auto ori_kernel : kernels_) { for (auto ori_kernel : kernels_) {
MS_ASSERT(nullptr != ori_kernel); MS_ASSERT(nullptr != ori_kernel);
if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) {
ori_kernel->eval();
auto ret = ori_kernel->Eval();
if (ret != RET_OK) {
MS_LOG(ERROR) << ori_kernel->name() << " failed to set eval mode";
return RET_ERROR;
}
} else { } else {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel); auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel);
MS_ASSERT(nullptr != sub_graph); MS_ASSERT(nullptr != sub_graph);
for (auto kernel : sub_graph->nodes()) { for (auto kernel : sub_graph->nodes()) {
MS_ASSERT(nullptr != kernel); MS_ASSERT(nullptr != kernel);
kernel->eval();
auto ret = kernel->Eval();
if (ret != RET_OK) {
MS_LOG(ERROR) << kernel->name() << " failed to set eval mode";
return RET_ERROR;
}
} }
} }
} }
@@ -221,6 +281,7 @@ void TrainSession::Eval() {
if (inference_kernels_.size() == 0) { if (inference_kernels_.size() == 0) {
BuildInferenceKernelsMap(); BuildInferenceKernelsMap();
} }
return RET_OK;
} }


void TrainSession::BuildInferenceKernelsRecursive(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *v) { void TrainSession::BuildInferenceKernelsRecursive(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *v) {
@@ -234,24 +295,25 @@ void TrainSession::BuildInferenceKernelsRecursive(kernel::LiteKernel *kernel, st


void TrainSession::BuildInferenceKernelsMap() { void TrainSession::BuildInferenceKernelsMap() {
std::vector<kernel::LiteKernel *> req_kernels; std::vector<kernel::LiteKernel *> req_kernels;
for (auto ori_kernel : kernels_) {
if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) {
if (IsLossKernel(ori_kernel)) { // For each loss in the system add backward tree
for (auto in_node : ori_kernel->in_kernels()) {
for (auto kernel : this->kernels_) {
if (kernel->subgraph_type() == kernel::kNotSubGraph) {
if (IsLossKernel(kernel)) { // For each loss in the system add backward tree
for (auto in_node : kernel->in_kernels()) {
BuildInferenceKernelsRecursive(in_node, &req_kernels); BuildInferenceKernelsRecursive(in_node, &req_kernels);
} }
} }
} else { } else {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel);
for (auto kernel : sub_graph->nodes()) {
if (IsLossKernel(kernel)) { // For each loss in the system add backward tree
for (auto in_node : kernel->in_kernels()) {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
for (auto sb_kernel : sub_graph->nodes()) {
if (IsLossKernel(sb_kernel)) { // For each loss in the system add backward tree
for (auto in_node : sb_kernel->in_kernels()) {
BuildInferenceKernelsRecursive(in_node, &req_kernels); BuildInferenceKernelsRecursive(in_node, &req_kernels);
} }
} }
} }
} }
} }

inference_kernels_.clear(); inference_kernels_.clear();
for (auto ori_kernel : kernels_) { for (auto ori_kernel : kernels_) {
if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) {
@@ -272,10 +334,71 @@ void TrainSession::BuildInferenceKernelsMap() {
} }
} }


bool TrainSession::IsLossKernel(const kernel::LiteKernel *kernel) {
void TrainSession::CompileTrainKernels() {
train_kernels_.clear();
for (auto ori_kernel : kernels_) {
if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) {
train_kernels_.push_back(ori_kernel);
} else {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel);
for (auto kernel : sub_graph->nodes()) {
train_kernels_.push_back(kernel);
}
}
}
}

void TrainSession::MarkOptimizedKernels() {
std::vector<lite::Tensor *> ot;
for (auto kernel : this->kernels_) {
if (kernel->subgraph_type() == kernel::kNotSubGraph) {
if (IsOptimizer(kernel)) {
std::copy(kernel->in_tensors().begin(), kernel->in_tensors().end(), std::back_inserter(ot));
}
} else {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
for (auto sb_kernel : sub_graph->nodes()) {
if (IsOptimizer(sb_kernel)) {
std::copy(sb_kernel->in_tensors().begin(), sb_kernel->in_tensors().end(), std::back_inserter(ot));
}
}
}
}
for (auto kernel : this->kernels_) {
if (kernel->subgraph_type() == kernel::kNotSubGraph) {
if (!IsOptimizer(kernel)) {
for (auto it : kernel->in_tensors()) {
if (std::find(ot.begin(), ot.end(), it) != ot.end()) {
kernel->SetTrainable(true);
break;
}
}
}
} else {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
for (auto sb_kernel : sub_graph->nodes()) {
if (!IsOptimizer(sb_kernel)) {
for (auto it : sb_kernel->in_tensors()) {
if (std::find(ot.begin(), ot.end(), it) != ot.end()) {
sb_kernel->SetTrainable(true);
break;
}
}
}
}
}
}
}

bool TrainSession::IsLossKernel(const kernel::LiteKernel *kernel) const {
return (kernel->Type() == schema::PrimitiveType_SoftmaxCrossEntropy); return (kernel->Type() == schema::PrimitiveType_SoftmaxCrossEntropy);
} }


bool TrainSession::IsOptimizer(kernel::LiteKernel *kernel) const {
return ((kernel->Type() == schema::PrimitiveType_Adam) || (kernel->Type() == schema::PrimitiveType_Sgd) ||
(kernel->Type() == schema::PrimitiveType_ApplyMomentum));
}

} // namespace lite } // namespace lite


session::TrainSession *session::TrainSession::CreateSession(lite::Context *context) { session::TrainSession *session::TrainSession::CreateSession(lite::Context *context) {


+ 8
- 4
mindspore/lite/src/train/train_session.h View File

@@ -55,9 +55,10 @@ class TrainSession : virtual public session::TrainSession, virtual public lite::
int CompileTrainGraph(lite::TrainModel *model) override; int CompileTrainGraph(lite::TrainModel *model) override;


void *ExportToBuf(char *buf, size_t *len) const override; void *ExportToBuf(char *buf, size_t *len) const override;
int SaveToFile(const std::string &filename) const override;


void Train() override;
void Eval() override;
int Train() override;
int Eval() override;


void BindThread(bool if_bind) override { return lite::LiteSession::BindThread(if_bind); } void BindThread(bool if_bind) override { return lite::LiteSession::BindThread(if_bind); }
std::vector<tensor::MSTensor *> GetInputs() const override { return lite::LiteSession::GetInputs(); } std::vector<tensor::MSTensor *> GetInputs() const override { return lite::LiteSession::GetInputs(); }
@@ -84,16 +85,19 @@ class TrainSession : virtual public session::TrainSession, virtual public lite::


protected: protected:
void AllocWorkSpace(); void AllocWorkSpace();
bool IsLossKernel(const kernel::LiteKernel *kernel);
bool IsLossKernel(const kernel::LiteKernel *kernel) const;
bool IsOptimizer(kernel::LiteKernel *kernel) const;
virtual void MarkOptimizedKernels();
virtual std::vector<CreatorOp> ReplaceOps(); virtual std::vector<CreatorOp> ReplaceOps();
virtual void RestoreOps(const std::vector<CreatorOp> &restore); virtual void RestoreOps(const std::vector<CreatorOp> &restore);
virtual void BuildInferenceKernelsMap(); virtual void BuildInferenceKernelsMap();
virtual void BuildInferenceKernelsRecursive(kernel::LiteKernel *ker, std::vector<kernel::LiteKernel *> *req_kernels); virtual void BuildInferenceKernelsRecursive(kernel::LiteKernel *ker, std::vector<kernel::LiteKernel *> *req_kernels);
virtual void CompileTrainKernels();
TrainModel *model_ = nullptr; TrainModel *model_ = nullptr;
std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> orig_output_map_; std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> orig_output_map_;
std::unordered_map<std::string, mindspore::tensor::MSTensor *> orig_output_tensor_map_; std::unordered_map<std::string, mindspore::tensor::MSTensor *> orig_output_tensor_map_;
std::vector<kernel::LiteKernel *> inference_kernels_; std::vector<kernel::LiteKernel *> inference_kernels_;
std::vector<kernel::LiteKernel *> train_kernels_;
}; };
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore


+ 8
- 0
mindspore/lite/test/models_ms_train.cfg View File

@@ -0,0 +1,8 @@
mini_alexnet
mobilenetv1
mobilenetv2
mobilenetv3
lenet
effnet
effnet_tune
resnet

+ 394
- 0
mindspore/lite/test/run_net_train.sh View File

@@ -0,0 +1,394 @@
#!/bin/bash

# Run Export on x86 platform and create output test files:
function Run_Export(){
cd $models_path || exit 1
if [[ -z "${CLOUD_MODEL_ZOO}" ]]; then
echo "CLOUD_MODEL_ZOO is not defined - exiting export models"
exit 1
fi
# Export mindspore train models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
continue
fi
echo ${model_name}'_train_export.py' >> "${export_log_file}"
echo 'exporting' ${model_name}
echo 'docker run --user $(id -u):$(id -g) --env CLOUD_MODEL_ZOO=${CLOUD_MODEL_ZOO} -w $PWD --runtime=nvidia -v /home/$USER:/home/$USER -v /opt/share:/opt/share --privileged=true mindspore_dev:5 python '${models_path}'/'${model_name}'_train_export.py' >> "${export_log_file}"
docker run --user $(id -u):$(id -g) --env CLOUD_MODEL_ZOO=${CLOUD_MODEL_ZOO} -w $PWD --runtime=nvidia -v /home/$USER:/home/$USER -v /opt/share:/opt/share --privileged=true mindspore_dev:5 python ${models_path}'/'${model_name}_train_export.py
if [ $? = 0 ]; then
export_result='export mindspore '${model_name}'_train_export pass';echo ${export_result} >> ${export_result_file}
else
export_result='export mindspore '${model_name}'_train_export failed';echo ${export_result} >> ${export_result_file}
fi
done < ${models_mindspore_train_config}
}

# Run converter on x86 platform:
function Run_Converter() {
# Unzip x86 runtime and convertor
cd ${x86_path} || exit 1
tar -zxf mindspore-lite-${version}-runtime-x86-${process_unit_x86}-train.tar.gz || exit 1

tar -zxf mindspore-lite-${version}-converter-ubuntu-train.tar.gz || exit 1
cd ${x86_path}/mindspore-lite-${version}-converter-ubuntu-train || exit 1
cp converter/converter_lite ./ || exit 1

# Convert the models
cd ${x86_path}/mindspore-lite-${version}-converter-ubuntu-train || exit 1

rm -rf ${ms_models_path}
mkdir -p ${ms_models_path}

# Convert mindspore train models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
continue
fi
echo ${model_name}'_train' >> "${run_converter_log_file}"
echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}'_train.mindir --outputFile='${ms_models_path}'/'${model_name}'_train --trainModel=true' >> "${run_converter_log_file}"
LD_LIBRARY_PATH=./lib/:./third_party/protobuf/lib:./third_party/flatbuffers/lib:./third_party/glog/lib \
./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name}_train.mindir \
--outputFile=${ms_models_path}/${model_name}'_train' \
--trainModel=true
if [ $? = 0 ]; then
converter_result='converter mindspore '${model_name}'_train pass';echo ${converter_result} >> ${run_converter_result_file}
else
converter_result='converter mindspore '${model_name}'_train failed';echo ${converter_result} >> ${run_converter_result_file}
fi
done < ${models_mindspore_train_config}
}

# Run on x86 platform:
function Run_x86() {
# Run mindspore converted train models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
continue
fi
echo ${model_name}'_train' >> "${run_x86_log_file}"
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86}-train >> "${run_x86_log_file}"
cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86}-train || return 1
echo 'LD_LIBRARY_PATH='${LD_LIBRARY_PATH}':./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./net_train/net_train --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile='${input_path}'/'${model_name}'_input1.bin,'${train_io_path}'/'${model_name}'_input2.bin --expectedDataFile='${train_io_path}'/'${model_name}'_outputs.bin --exportFile='${ms_models_path}'/'${model_name}'_train_exported.ms' >> "${run_x86_log_file}"
echo '-------------------------------------------------------------------------------' >> "${run_x86_log_file}"
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib \
${run_valgrind}./net_train/net_train \
--modelFile=${ms_models_path}/${model_name}_train.ms \
--inDataFile=${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin \
--expectedDataFile=${train_io_path}/${model_name}_outputs.bin \
--exportFile=${ms_models_path}/${model_name}_train_exported.ms >> "${run_x86_log_file}"
if [ $? = 0 ]; then
run_result='x86: '${model_name}'_train pass'; echo ${run_result} >> ${run_net_train_result_file}
else
run_result='x86: '${model_name}'_train failed'; echo ${run_result} >> ${run_net_train_result_file}
fi
done < ${models_mindspore_train_config}
}

# Run on arm platform:
# Gets a parameter - arm64/arm32
function Run_arm() {
if [ "$1" == arm64 ]; then
arm_path=${arm64_path}
process_unit=${process_unit_arm64}
version_arm=${version_arm64}
run_arm_log_file=${run_arm64_log_file}
adb_cmd_run_file=${adb_cmd_arm64_run_file}
adb_push_log_file=${adb_push_arm64_log_file}
adb_cmd_file=${adb_cmd_arm64_file}
elif [ "$1" == arm32 ]; then
arm_path=${arm32_path}
process_unit=${process_unit_arm32}
version_arm=${version_arm32}
run_arm_log_file=${run_arm32_log_file}
adb_cmd_run_file=${adb_cmd_arm32_run_file}
adb_push_log_file=${adb_push_arm32_log_file}
adb_cmd_file=${adb_cmd_arm32_file}
else
echo 'type ' $1 'is not supported'
exit 1
fi
arm_type=$1

# Unzip
cd ${arm_path} || exit 1
tar -zxf mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train.tar.gz || exit 1

# If build with minddata, copy the minddata related libs
cd ${net_train_test_path} || exit 1
if [ -f ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libminddata-lite.so ]; then
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/libjpeg-turbo/lib/libjpeg.so ${net_train_test_path}/libjpeg.so || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/libjpeg-turbo/lib/libturbojpeg.so ${net_train_test_path}/libturbojpeg.so || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_core.so ${net_train_test_path}/libopencv_core.so || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_imgcodecs.so ${net_train_test_path}/libopencv_imgcodecs.so || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_imgproc.so ${net_train_test_path}/libopencv_imgproc.so || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libminddata-lite.so ${net_train_test_path}/libminddata-lite.so || exit 1
fi

cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite.so ${net_train_test_path}/libmindspore-lite.so || exit 1
if [ "$1" == arm64 ]; then
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite-fp16.so ${net_train_test_path}/libmindspore-lite-fp16.so || exit 1
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite-optimize.so ${net_train_test_path}/libmindspore-lite-optimize.so || exit 1
fi
cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/net_train/net_train ${net_train_test_path}/net_train || exit 1

# adb push all needed files to the phone
adb -s ${device_id} push ${net_train_test_path} /data/local/tmp/ > ${adb_push_log_file}

# run adb ,run session ,check the result:
echo 'cd /data/local/tmp/net_train_test' > ${adb_cmd_file}
echo 'chmod 777 net_train' >> ${adb_cmd_file}

adb -s ${device_id} shell < ${adb_cmd_file}
# Run mindir converted train models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
continue
fi

# run net_train test without clib data
echo ${model_name}'_train' >> "${run_arm_log_file}"
adb -s ${device_id} push ${train_io_path}/${model_name}_input*.bin ${train_io_path}/${model_name}_outputs.bin /data/local/tmp/net_train_test >> ${adb_push_log_file}
echo 'cd /data/local/tmp/net_train_test' > ${adb_cmd_run_file}
if [ "$1" == arm64 ]; then
echo 'export LD_LIBRARY_PATH=/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${run_arm_log_file}"
echo 'export LD_LIBRARY_PATH=/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${adb_cmd_run_file}"
elif [ "$1" == arm32 ]; then
echo 'export LD_LIBRARY_PATH=/data/local/tmp/:/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${run_arm_log_file}"
echo 'export LD_LIBRARY_PATH=/data/local/tmp/:/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${adb_cmd_run_file}"
fi

adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file}
# TODO: change to arm_type
if [ $? = 0 ]; then
run_result=$1': '${model_name}'_train pass'; echo ${run_result} >> ${run_net_train_result_file}
else
run_result=$1': '${model_name}'_train failed'; echo ${run_result} >> ${run_net_train_result_file}
fi
done < ${models_mindspore_train_config}
}

# Print start msg before run testcase
function MS_PRINT_TESTCASE_START_MSG() {
echo ""
echo -e "-----------------------------------------------------------------------------------------------------------------------------------"
echo -e "env Testcase Result "
echo -e "--- -------- ------ "
}

# Print start msg after run testcase
function MS_PRINT_TESTCASE_END_MSG() {
echo -e "-----------------------------------------------------------------------------------------------------------------------------------"
}

function Print_Result() {
MS_PRINT_TESTCASE_END_MSG
while read line; do
arr=("${line}")
printf "%-15s %-20s %-90s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]} ${arr[3]}
done < $1
MS_PRINT_TESTCASE_END_MSG
}

basepath=$(pwd)
echo ${basepath}

# Example:run_net_train.sh -r /home/emir/Work/TestingEnv/release -m /home/emir/Work/TestingEnv/train_models -i /home/emir/Work/TestingEnv/train_io -d "8KE5T19620002408"
# For running on arm64, use -t to set platform tools path (for using adb commands)
while getopts "r:m:d:i:e:v" opt; do
case ${opt} in
r)
release_path=${OPTARG}
echo "release_path is ${OPTARG}"
;;
m)
models_path=${OPTARG}
echo "models_path is ${OPTARG}"
;;
i)
train_io_path=${OPTARG}
echo "train_io_path is ${OPTARG}"
;;
d)
device_id=${OPTARG}
echo "device_id is ${OPTARG}"
;;
e)
enable_export=${OPTARG}
echo "enable_export = ${OPTARG}"
;;
v)
run_valgrind="valgrind "
echo "Run x86 with valgrind"
;;
?)
echo "unknown para"
exit 1;;
esac
done

arm64_path=${release_path}/android_aarch64
file=$(ls ${arm64_path}/*runtime-arm64*train.tar.gz)
file_name="${file##*/}"
IFS="-" read -r -a file_name_array <<< "$file_name"
version_arm64=${file_name_array[2]}
process_unit_arm64=${file_name_array[5]}

arm32_path=${release_path}/android_aarch32
file=$(ls ${arm32_path}/*runtime-arm32*train.tar.gz)
file_name="${file##*/}"
IFS="-" read -r -a file_name_array <<< "$file_name"
version_arm32=${file_name_array[2]}
process_unit_arm32=${file_name_array[5]}

x86_path=${release_path}/ubuntu_x86
file=$(ls ${x86_path}/*runtime-x86*train.tar.gz)
file_name="${file##*/}"
IFS="-" read -r -a file_name_array <<< "$file_name"
version=${file_name_array[2]}
process_unit_x86=${file_name_array[5]}

# Set models config filepath
models_mindspore_train_config=${basepath}/models_ms_train.cfg

ms_models_path=${models_path}/ms_models

logs_path=${models_path}/logs
rm -rf ${logs_path}
mkdir -p ${logs_path}

# Export model if enabled
if [[ $enable_export == 1 ]]; then
echo "Start Exporting models ..."
# Write export result to temp file
export_log_file=${logs_path}/export_log.txt
echo ' ' > ${export_log_file}

export_result_file=${logs_path}/export_result.txt
echo ' ' > ${export_result_file}
# Run export
Run_Export
Print_Result ${export_result_file}

fi

# Write converter result to temp file
run_converter_log_file=${logs_path}/run_converter_log.txt
echo ' ' > ${run_converter_log_file}

run_converter_result_file=${logs_path}/run_converter_result.txt
echo ' ' > ${run_converter_result_file}

START=$(date +%s.%N)

# Run converter
echo "start run converter ..."
Run_Converter
Run_converter_PID=$!
sleep 1

wait ${Run_converter_PID}
Run_converter_status=$?

# Check converter result and return value
if [[ ${Run_converter_status} = 0 ]];then
echo "Run converter success"
Print_Result ${run_converter_result_file}
else
echo "Run converter failed"
cat ${run_converter_log_file}
Print_Result ${run_converter_result_file}
exit 1
fi


# Write net_train result to temp file
run_net_train_result_file=${logs_path}/run_net_train_result.txt
echo ' ' > ${run_net_train_result_file}

# Create log files
run_x86_log_file=${logs_path}/run_x86_log.txt
echo 'run x86 logs: ' > ${run_x86_log_file}

run_arm64_log_file=${logs_path}/run_arm64_log.txt
echo 'run arm64 logs: ' > ${run_arm64_log_file}
adb_push_arm64_log_file=${logs_path}/adb_push_arm64_log.txt
adb_cmd_arm64_file=${logs_path}/adb_arm64_cmd.txt
adb_cmd_arm64_run_file=${logs_path}/adb_arm64_cmd_run.txt

run_arm32_log_file=${logs_path}/run_arm32_log.txt
echo 'run arm32 logs: ' > ${run_arm64_log_file}
adb_push_arm32_log_file=${logs_path}/adb_push_arm32_log.txt
adb_cmd_arm32_file=${logs_path}/adb_arm32_cmd.txt
adb_cmd_arm32_run_file=${logs_path}/adb_arm32_cmd_run.txt

# Copy the MindSpore models:
echo "Push files to net_train_test folder and run net_train"
net_train_test_path=${models_path}/net_train_test
rm -rf ${net_train_test_path}
mkdir -p ${net_train_test_path}
cp -a ${ms_models_path}/*.ms ${net_train_test_path} || exit 1

# Run on x86
echo "start Run x86 ..."
Run_x86 &
Run_x86_PID=$!
sleep 1

# wait ${Run_x86_PID}
cat ${run_net_train_result_file}
wait ${Run_x86_PID}
Run_x86_status=$?
# exit 0

# Run on arm64
echo "start Run arm64 ..."
Run_arm arm64
Run_arm64_status=$?
sleep 3

# Run on arm32
echo "start Run arm32 ..."
Run_arm arm32
Run_arm32_status=$?
sleep 1

END=$(date +%s.%N)
DIFF=$(echo "$END - $START" | bc)

function Print_Benchmark_Result() {
MS_PRINT_TESTCASE_START_MSG
while read line; do
arr=("${line}")
printf "%-20s %-100s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]}
done < ${run_net_train_result_file}
MS_PRINT_TESTCASE_END_MSG
}

# Check net_train result and return value
if [[ ${Run_x86_status} != 0 ]];then
echo "Run_x86 failed"
cat ${run_x86_log_file}
fi

if [[ ${Run_arm64_status} != 0 ]];then
echo "Run_arm64 failed"
cat ${run_arm64_log_file}
fi

if [[ ${Run_arm32_status} != 0 ]];then
echo "Run_arm32 failed"
cat ${run_arm32_log_file}
fi

echo "Test ended - Results:"
Print_Benchmark_Result
echo "Test run Time:" $DIFF
exit 0

+ 2
- 2
mindspore/lite/test/run_train_ut.sh View File

@@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
cd ./ut/src/runtime/kernel/arm || exit 1 cd ./ut/src/runtime/kernel/arm || exit 1
../../../../../../build/test/lite-test --gtest_filter=NetworkTest.efficient_net ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.efficient_net
../../../../../../build/test/lite-test --gtest_filter=NetworkTest.tuning_layer
../../../../../../build/test/lite-test --gtest_filter=NetworkTest.lenetnet
# ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.tuning_layer
# ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.lenetnet

+ 86
- 0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/activation_grad_fp32_tests.cc View File

@@ -42,11 +42,17 @@ TEST_F(TestActGradFp32, ReluGradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/activationGrad/relu_y_50.bin"; std::string input_path = "./test_data/activationGrad/relu_y_50.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
EXPECT_EQ(input_size, output_data_size * sizeof(float)); EXPECT_EQ(input_size, output_data_size * sizeof(float));

std::string yt_path = "./test_data/activationGrad/relu_yt_50.bin"; std::string yt_path = "./test_data/activationGrad/relu_yt_50.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);
EXPECT_EQ(input_size, output_data_size * sizeof(float)); EXPECT_EQ(input_size, output_data_size * sizeof(float));

auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);

// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
ReluGrad(yt_data, input_data, output_data_size, output_data); ReluGrad(yt_data, input_data, output_data_size, output_data);
@@ -90,10 +96,15 @@ TEST_F(TestActGradFp32, Relu6GradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/activationGrad/relu6_y_50.bin"; std::string input_path = "./test_data/activationGrad/relu6_y_50.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);

std::string yt_path = "./test_data/activationGrad/relu6_yt_50.bin"; std::string yt_path = "./test_data/activationGrad/relu6_yt_50.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);


auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);

// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
Relu6Grad(yt_data, input_data, 50, output_data); Relu6Grad(yt_data, input_data, 50, output_data);
@@ -136,10 +147,15 @@ TEST_F(TestActGradFp32, LReluGradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/activationGrad/lrelu_y_50.bin"; std::string input_path = "./test_data/activationGrad/lrelu_y_50.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);

std::string yt_path = "./test_data/activationGrad/lrelu_yt_50.bin"; std::string yt_path = "./test_data/activationGrad/lrelu_yt_50.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);


auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);

// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
LReluGrad(yt_data, input_data, 50, output_data, 0.1); LReluGrad(yt_data, input_data, 50, output_data, 0.1);
@@ -182,10 +198,15 @@ TEST_F(TestActGradFp32, SigmoidGradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/activationGrad/sigmoid_y_50.bin"; std::string input_path = "./test_data/activationGrad/sigmoid_y_50.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);

std::string yt_path = "./test_data/activationGrad/sigmoid_yt_50.bin"; std::string yt_path = "./test_data/activationGrad/sigmoid_yt_50.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);


auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);

// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
SigmoidGrad(yt_data, input_data, 50, output_data); SigmoidGrad(yt_data, input_data, 50, output_data);
@@ -229,10 +250,15 @@ TEST_F(TestActGradFp32, tanhGradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/activationGrad/tanh_y_50.bin"; std::string input_path = "./test_data/activationGrad/tanh_y_50.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);

std::string yt_path = "./test_data/activationGrad/tanh_yt_50.bin"; std::string yt_path = "./test_data/activationGrad/tanh_yt_50.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);


auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);

// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
TanhGrad(yt_data, input_data, 50, output_data); TanhGrad(yt_data, input_data, 50, output_data);
@@ -274,11 +300,17 @@ TEST_F(TestActGradFp32, hswishGradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/activationGrad/hswish_x_50.bin"; std::string input_path = "./test_data/activationGrad/hswish_x_50.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
EXPECT_EQ(input_size, output_data_size * sizeof(float)); EXPECT_EQ(input_size, output_data_size * sizeof(float));

std::string yt_path = "./test_data/activationGrad/hswish_yt_50.bin"; std::string yt_path = "./test_data/activationGrad/hswish_yt_50.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);
EXPECT_EQ(input_size, output_data_size * sizeof(float)); EXPECT_EQ(input_size, output_data_size * sizeof(float));

auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);

// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
HSwishGrad(yt_data, input_data, static_cast<int>(output_data_size), output_data); HSwishGrad(yt_data, input_data, static_cast<int>(output_data_size), output_data);
@@ -311,4 +343,58 @@ TEST_F(TestActGradFp32, hswishGradFp32) {
delete[] yt_data; delete[] yt_data;
MS_LOG(INFO) << "hswishGradFp32 passed"; MS_LOG(INFO) << "hswishGradFp32 passed";
} }

TEST_F(TestActGradFp32, hsigmoidGradFp32) {
// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
const size_t output_data_size = 10;

size_t input_size;
std::string input_path = "./test_data/activationGrad/hsig_x_50.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
EXPECT_EQ(input_size, output_data_size * sizeof(float));

std::string yt_path = "./test_data/activationGrad/hsig_yt_50.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);
EXPECT_EQ(input_size, output_data_size * sizeof(float));

auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);

// warm up loop
for (int i = 0; i < 3; i++) {
HSigmoidGrad(yt_data, input_data, static_cast<int>(output_data_size), output_data);
}

int loop_count = 100;
auto time_start = mindspore::lite::GetTimeUs();
for (int i = 0; i < loop_count; i++) {
HSigmoidGrad(yt_data, input_data, output_data_size, output_data);
}
auto time_end = mindspore::lite::GetTimeUs();
auto cost = time_end - time_start;
time_avg = cost / loop_count;
printf("single thread running time : %f ms\n", time_avg / 1000.0f);

printf("==================output data=================\n");
size_t min = (output_data_size < 20UL) ? output_data_size : 20UL;
for (size_t i = 0; i < min; i++) {
std::cout << output_data[i] << " ,";
}
std::cout << std::endl;

std::string output_path = "./test_data/activationGrad/hsig_out_50.bin";
int res = CompareRelativeOutput(output_data, output_path);

EXPECT_EQ(res, 0);

delete[] input_data;
delete[] output_data;
delete[] yt_data;
MS_LOG(INFO) << "hsigmoidGradFp32 passed";
}

} // namespace mindspore } // namespace mindspore

+ 237
- 5
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc View File

@@ -24,9 +24,9 @@
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
#include "src/ops/arithmetic_grad.h" #include "src/ops/arithmetic_grad.h"


#ifdef PRIMITIVE_WRITEABLE
namespace mindspore { namespace mindspore {


#ifdef PRIMITIVE_WRITEABLE
ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveType type, ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveType type,
std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> inputs,
std::vector<lite::Tensor *> outputs) { std::vector<lite::Tensor *> outputs) {
@@ -37,6 +37,12 @@ ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveTyp
} }
arithmetic_param->op_parameter_.type_ = type; arithmetic_param->op_parameter_.type_ = type;
schema::PrimitiveT *prim = new schema::PrimitiveT; schema::PrimitiveT *prim = new schema::PrimitiveT;
if (prim == nullptr) {
free(arithmetic_param);
MS_LOG(ERROR) << "new PrimitiveT failed.";
return nullptr;
}

prim->value.type = type; prim->value.type = type;
auto agrad = mindspore::lite::ArithmeticGrad(prim); auto agrad = mindspore::lite::ArithmeticGrad(prim);
agrad.InferShape(inputs, outputs); agrad.InferShape(inputs, outputs);
@@ -55,6 +61,7 @@ class TestArithmeticGradFp32 : public mindspore::CommonTest {


std::vector<lite::Tensor *> GenerateTensorsForTest(const char *test, int test_id) { std::vector<lite::Tensor *> GenerateTensorsForTest(const char *test, int test_id) {
size_t input_size; size_t input_size;
std::vector<lite::Tensor *> ret_vector;
std::vector<int> large_dim({4, 6}); std::vector<int> large_dim({4, 6});
std::vector<int> small_dim({6}); std::vector<int> small_dim({6});
int large_size = (4 * 6); int large_size = (4 * 6);
@@ -80,36 +87,127 @@ std::vector<lite::Tensor *> GenerateTensorsForTest(const char *test, int test_id
} }


auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(test, &input_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(test, &input_size));
if (dy_data == nullptr) {
MS_LOG(ERROR) << "new operator failed";
return ret_vector;
}
lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
if (dy_tensor == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
return ret_vector;
}
dy_tensor->set_data(dy_data); dy_tensor->set_data(dy_data);


auto x1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx1_file, &input_size)); auto x1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx1_file, &input_size));
if (x1_data == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
delete dy_tensor;
return ret_vector;
}
lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
if (x1_tensor == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
delete dy_tensor;
delete[] x1_data;
return ret_vector;
}
x1_tensor->set_data(x1_data); x1_tensor->set_data(x1_data);


auto x2_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx2_file, &input_size)); auto x2_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx2_file, &input_size));
if (x2_data == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
delete dy_tensor;
delete[] x1_data;
delete x1_tensor;
return ret_vector;
}
lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim);
if (x2_tensor == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
delete dy_tensor;
delete[] x1_data;
delete x1_tensor;
delete[] x2_data;
return ret_vector;
}
x2_tensor->set_data(x2_data); x2_tensor->set_data(x2_data);


auto dx1_data = new float[large_size]; auto dx1_data = new float[large_size];
if (dx1_data == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
delete dy_tensor;
delete[] x1_data;
delete x1_tensor;
delete[] x2_data;
delete x2_tensor;
return ret_vector;
}
lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
if (dx1_tensor == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
delete dy_tensor;
delete[] x1_data;
delete x1_tensor;
delete[] x2_data;
delete x2_tensor;
delete[] dx1_data;
return ret_vector;
}
dx1_tensor->set_data(dx1_data); dx1_tensor->set_data(dx1_data);


auto dx2_data = new float[small_size]; auto dx2_data = new float[small_size];
if (dx2_data == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
delete dy_tensor;
delete[] x1_data;
delete x1_tensor;
delete[] x2_data;
delete x2_tensor;
delete[] dx1_data;
delete dx1_tensor;
return ret_vector;
}
lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim);
if (dx2_tensor == nullptr) {
MS_LOG(ERROR) << "new operator failed";
delete[] dy_data;
delete dy_tensor;
delete[] x1_data;
delete x1_tensor;
delete[] x2_data;
delete x2_tensor;
delete[] dx1_data;
delete dx1_tensor;
delete[] dx2_data;
return ret_vector;
}
dx2_tensor->set_data(dx2_data); dx2_tensor->set_data(dx2_data);


std::vector<lite::Tensor *> ret_vector = {dy_tensor, x1_tensor, x2_tensor, dx1_tensor, dx2_tensor};
ret_vector.push_back(dy_tensor);
ret_vector.push_back(x1_tensor);
ret_vector.push_back(x2_tensor);
ret_vector.push_back(dx1_tensor);
ret_vector.push_back(dx2_tensor);

return ret_vector; return ret_vector;
} }


TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { TEST_F(TestArithmeticGradFp32, TestAddGradFp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1);

ASSERT_NE(all_tensors.size(), 0);
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -117,7 +215,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
@@ -144,10 +244,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) {
TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -155,7 +257,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
@@ -184,10 +288,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) {
TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin", 8); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin", 8);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -195,7 +301,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
@@ -225,10 +333,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) {
TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { TEST_F(TestArithmeticGradFp32, TestSubGradFp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_2_dy_4_6.bin", 2); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_2_dy_4_6.bin", 2);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -236,7 +346,9 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
@@ -266,10 +378,12 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) {
TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_3_dy_4_6.bin", 3); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_3_dy_4_6.bin", 3);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -277,7 +391,9 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
@@ -305,10 +421,12 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) {
TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { TEST_F(TestArithmeticGradFp32, TestMulGradFp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -316,8 +434,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
int loop_count = 1000; int loop_count = 1000;
auto time_start = mindspore::lite::GetTimeUs(); auto time_start = mindspore::lite::GetTimeUs();
for (int i = 0; i < loop_count; i++) { for (int i = 0; i < loop_count; i++) {
@@ -354,10 +473,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) {
TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -365,7 +486,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
@@ -394,10 +517,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) {
TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -405,7 +530,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
@@ -434,10 +561,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) {
TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -445,7 +574,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
@@ -474,10 +605,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) {
TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { TEST_F(TestArithmeticGradFp32, TestDivGradFp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_5_dy_4_6.bin", 5); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_5_dy_4_6.bin", 5);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -485,7 +618,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
@@ -514,10 +649,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) {
TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_6_dy_4_6.bin", 6); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_6_dy_4_6.bin", 6);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -525,7 +662,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
@@ -555,10 +694,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) {
TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin", 10); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin", 10);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -566,7 +707,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
@@ -595,10 +738,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) {
TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) {
std::vector<lite::Tensor *> all_tensors = std::vector<lite::Tensor *> all_tensors =
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin", 7); GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin", 7);
ASSERT_NE(all_tensors.size(), 0);


std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs);
ASSERT_NE(param, nullptr);


lite::InnerContext ctx; lite::InnerContext ctx;
ctx.thread_num_ = 1; ctx.thread_num_ = 1;
@@ -606,7 +751,9 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
@@ -630,6 +777,91 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) {
MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; MS_LOG(INFO) << "TestDivGrad2Fp32 passed";
} }


} // namespace mindspore
TEST_F(TestArithmeticGradFp32, TestMaximumGradBroadcastFp32) {
std::vector<int> large_dim({4, 6});
std::vector<int> small_dim({6});

large_dim = std::vector<int>({1, 2, 3});
small_dim = std::vector<int>({1, 3});
int large_size = (2 * 3);
int small_size = 3;
size_t input_size;
char *dx1_file = const_cast<char *>("./test_data/operators/x1_maximum.bin");
char *dx2_file = const_cast<char *>("./test_data/operators/x2_maximum.bin");

std::string yt_path = "./test_data/operators/yt_maximum.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(dy_data, nullptr);
EXPECT_EQ(input_size, large_size * sizeof(float));
lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
ASSERT_NE(dy_tensor, nullptr);
dy_tensor->set_data(dy_data);

auto x1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx1_file, &input_size));
ASSERT_NE(x1_data, nullptr);
lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim);
ASSERT_NE(x1_tensor, nullptr);
x1_tensor->set_data(x1_data);

auto x2_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx2_file, &input_size));
ASSERT_NE(x2_data, nullptr);
lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
ASSERT_NE(x2_tensor, nullptr);
x2_tensor->set_data(x2_data);

auto dx1_data = new float[small_size];
ASSERT_NE(dx1_data, nullptr);
lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim);
ASSERT_NE(dx1_tensor, nullptr);
dx1_tensor->set_data(dx1_data);


auto dx2_data = new float[large_size];
ASSERT_NE(dx2_data, nullptr);
lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
ASSERT_NE(dx2_tensor, nullptr);
dx2_tensor->set_data(dx2_data);

std::vector<lite::Tensor *> inputs = {x1_tensor, x2_tensor, dy_tensor};
std::vector<lite::Tensor *> outputs = {dx1_tensor, dx2_tensor};

auto param = PopulateArithmeticParameter(schema::PrimitiveType_MaximumGrad, inputs, outputs);
ASSERT_NE(param, nullptr);

lite::InnerContext ctx;
ctx.thread_num_ = 1;
ASSERT_EQ(lite::RET_OK, ctx.Init());

kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MaximumGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run();

float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
printf("==================output data=================\n");
for (int i = 0; i < 6; i++) {
std::cout << output_ptr[i] << " ,";
}
std::cout << std::endl;

std::string dx1_path = "./test_data/operators/x1_grad_maximum.bin";
EXPECT_EQ(0, CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), dx1_path));

std::string output_path = "./test_data/operators/x2_grad_maximum.bin";
EXPECT_EQ(0, CompareRelativeOutput(output_ptr, output_path));
for (auto tensor : inputs) {
delete[] reinterpret_cast<float *>(tensor->MutableData());
tensor->set_data(nullptr);
delete tensor;
}
for (auto tensor : outputs) {
delete[] reinterpret_cast<float *>(tensor->MutableData());
tensor->set_data(nullptr);
delete tensor;
}
delete kernel_obj;
MS_LOG(INFO) << "TestMaximumGradBroadcastFp32 passed";
}
#endif #endif
} // namespace mindspore

+ 58
- 2
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc View File

@@ -31,15 +31,20 @@ class TestBiasGradFp32 : public mindspore::CommonTest {
TEST_F(TestBiasGradFp32, BiasGradFp32) { TEST_F(TestBiasGradFp32, BiasGradFp32) {
// prepare stage // prepare stage
ArithmeticParameter *bias_param = static_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter))); ArithmeticParameter *bias_param = static_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter)));
ASSERT_NE(bias_param, nullptr);

size_t input_size; size_t input_size;
std::string input_path = "./test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin"; std::string input_path = "./test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);

std::vector<int> dim_dy({10, 28, 28, 7}); std::vector<int> dim_dy({10, 28, 28, 7});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(input_data); dy_tensor.set_data(input_data);


std::vector<lite::Tensor *> inputs = {&dy_tensor}; std::vector<lite::Tensor *> inputs = {&dy_tensor};
auto output_data = new float[7]; auto output_data = new float[7];
ASSERT_NE(output_data, nullptr);
std::vector<int> dim_dw = {7}; std::vector<int> dim_dw = {7};
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(output_data); dw_tensor.set_data(output_data);
@@ -51,8 +56,9 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bias_param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bias_param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run(); kernel_obj->Run();


printf("==================output data=================\n"); printf("==================output data=================\n");
@@ -61,7 +67,57 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) {
} }
std::cout << std::endl; std::cout << std::endl;
std::string output_path = "./test_data/operators/biasgradfp32_1_db_7.bin"; std::string output_path = "./test_data/operators/biasgradfp32_1_db_7.bin";
CompareOutput(output_data, 7, output_path);
auto res = CompareRelativeOutput(output_data, output_path);
EXPECT_EQ(res, 0);

delete[] input_data;
delete[] output_data;
// delete bias_param;
dy_tensor.set_data(nullptr);
dw_tensor.set_data(nullptr);
delete kernel_obj;
MS_LOG(INFO) << "BiasGradFp32 passed";
}

TEST_F(TestBiasGradFp32, BiasGrad2DFp32) {
// prepare stage
ArithmeticParameter *bias_param = static_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter)));
ASSERT_NE(bias_param, nullptr);

size_t input_size;
std::string input_path = "./test_data/operators/fc_yt.f32";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
std::vector<int> dim_dy({2, 20});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(input_data);

std::vector<lite::Tensor *> inputs = {&dy_tensor};
auto output_data = new float[20];
ASSERT_NE(output_data, nullptr);
std::vector<int> dim_dw = {20};
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(output_data);
std::vector<lite::Tensor *> outputs = {&dw_tensor};

lite::InnerContext ctx;
ctx.thread_num_ = 1;
ASSERT_EQ(lite::RET_OK, ctx.Init());

kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bias_param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
kernel_obj->Run();

printf("==================output data=================\n");
for (int i = 0; i < 20; i++) {
std::cout << output_data[i] << " ,";
}
std::cout << std::endl;
std::string output_path = "./test_data/operators/fc_b_grad.f32";
auto res = CompareRelativeOutput(output_data, output_path);
EXPECT_EQ(res, 0);


delete[] input_data; delete[] input_data;
delete[] output_data; delete[] output_data;


+ 35
- 24
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc View File

@@ -35,6 +35,10 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector<
size_t input_size = 0; size_t input_size = 0;
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_name.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_name.c_str(), &input_size));
auto tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, dim); auto tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, dim);
if (tensor == nullptr) {
MS_LOG(ERROR) << "new tensor failed";
return nullptr;
}
tensor->set_data(input_data); tensor->set_data(input_data);
EXPECT_EQ(input_size, tensor->Size()); EXPECT_EQ(input_size, tensor->Size());
return tensor; return tensor;
@@ -43,7 +47,9 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector<
TEST_F(TestBNGradFp32, BNGradFp32) { TEST_F(TestBNGradFp32, BNGradFp32) {
// prepare stage // prepare stage
auto bn_param = static_cast<BNGradParameter *>(malloc(sizeof(BNGradParameter))); auto bn_param = static_cast<BNGradParameter *>(malloc(sizeof(BNGradParameter)));
bn_param->epsilon_ = 0.00001;
ASSERT_NE(bn_param, nullptr);

bn_param->epsilon_ = 1e-2;
bn_param->momentum_ = 0.1; bn_param->momentum_ = 0.1;
const int batch = 2; const int batch = 2;
const int channels = 3; const int channels = 3;
@@ -51,10 +57,16 @@ TEST_F(TestBNGradFp32, BNGradFp32) {
const int width = 5; const int width = 5;


auto dy_tensor = CreateInTensor("./test_data/bngrad/dy_2_4_5_3.bin", {batch, height, width, channels}); auto dy_tensor = CreateInTensor("./test_data/bngrad/dy_2_4_5_3.bin", {batch, height, width, channels});
ASSERT_NE(dy_tensor, nullptr);
auto x_tensor = CreateInTensor("./test_data/bngrad/input_x_2_4_5_3.bin", {batch, height, width, channels}); auto x_tensor = CreateInTensor("./test_data/bngrad/input_x_2_4_5_3.bin", {batch, height, width, channels});
ASSERT_NE(x_tensor, nullptr);
auto scale_tensor = CreateInTensor("./test_data/bngrad/scale_3.bin", {1, 1, 1, channels}); auto scale_tensor = CreateInTensor("./test_data/bngrad/scale_3.bin", {1, 1, 1, channels});
ASSERT_NE(scale_tensor, nullptr);
auto mean_tensor = CreateInTensor("./test_data/bngrad/save_mean_3.bin", {1, 1, 1, channels}); auto mean_tensor = CreateInTensor("./test_data/bngrad/save_mean_3.bin", {1, 1, 1, channels});
ASSERT_NE(mean_tensor, nullptr);
auto var_tensor = CreateInTensor("././test_data/bngrad/save_var_3.bin", {1, 1, 1, channels}); auto var_tensor = CreateInTensor("././test_data/bngrad/save_var_3.bin", {1, 1, 1, channels});
ASSERT_NE(var_tensor, nullptr);

// prepare output tensors // prepare output tensors
lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, {batch, height, width, channels}); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, {batch, height, width, channels});
ASSERT_EQ(dx_tensor.MallocData(), 0); ASSERT_EQ(dx_tensor.MallocData(), 0);
@@ -72,27 +84,18 @@ TEST_F(TestBNGradFp32, BNGradFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BNGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BNGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bn_param), &ctx, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bn_param), &ctx, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize());


for (int i = 0; i < 3; i++) {
kernel_obj->Run();
}

int loop_count = 100;
auto time_start = mindspore::lite::GetTimeUs();
for (int i = 0; i < loop_count; i++) {
kernel_obj->Run();
}
auto time_end = mindspore::lite::GetTimeUs();
auto cost = time_end - time_start;
auto time_avg = cost / loop_count;
std::cout << "single thread running time : " << time_avg << "us\n";
kernel_obj->Run();
std::cout << "==========dx==========\n"; std::cout << "==========dx==========\n";
auto dx = reinterpret_cast<float *>(outputs[0]->MutableData()); auto dx = reinterpret_cast<float *>(outputs[0]->MutableData());
for (int i = 0; i < 7; i++) std::cout << dx[i] << " "; for (int i = 0; i < 7; i++) std::cout << dx[i] << " ";
std::cout << "\n"; std::cout << "\n";
auto res = CompareRelativeOutput(dx, "./test_data/bngrad/output_dx_2_4_5_3.bin"); auto res = CompareRelativeOutput(dx, "./test_data/bngrad/output_dx_2_4_5_3.bin");
EXPECT_EQ(res, 0);
std::cout << "\n=======dscale=======\n"; std::cout << "\n=======dscale=======\n";
auto dscale = reinterpret_cast<float *>(outputs[1]->MutableData()); auto dscale = reinterpret_cast<float *>(outputs[1]->MutableData());
for (int i = 0; i < channels; i++) std::cout << dscale[i] << " "; for (int i = 0; i < channels; i++) std::cout << dscale[i] << " ";
@@ -104,7 +107,6 @@ TEST_F(TestBNGradFp32, BNGradFp32) {
for (int i = 0; i < 3; i++) std::cout << dbias[i] << " "; for (int i = 0; i < 3; i++) std::cout << dbias[i] << " ";
std::cout << "\n"; std::cout << "\n";
res = CompareRelativeOutput(dbias, "./test_data/bngrad/output_dbias_3.bin"); res = CompareRelativeOutput(dbias, "./test_data/bngrad/output_dbias_3.bin");
EXPECT_EQ(res, 0);
for (auto v : inputs) { for (auto v : inputs) {
delete[] reinterpret_cast<float *>(v->MutableData()); delete[] reinterpret_cast<float *>(v->MutableData());
v->set_data(nullptr); v->set_data(nullptr);
@@ -117,8 +119,10 @@ TEST_F(TestBNGradFp32, BNGradFp32) {


TEST_F(TestBNGradFp32, BNTtrainFp32) { TEST_F(TestBNGradFp32, BNTtrainFp32) {
auto bn_param = static_cast<BatchNormParameter *>(malloc(sizeof(BatchNormParameter))); auto bn_param = static_cast<BatchNormParameter *>(malloc(sizeof(BatchNormParameter)));
bn_param->epsilon_ = 0.00001;
bn_param->momentum_ = 0.;
ASSERT_NE(bn_param, nullptr);

bn_param->epsilon_ = 1e-2;
bn_param->momentum_ = 0.1;
const int batch = 2; const int batch = 2;
const int channels = 3; const int channels = 3;
const int height = 4; const int height = 4;
@@ -173,27 +177,34 @@ TEST_F(TestBNGradFp32, BNTtrainFp32) {
ASSERT_EQ(lite::RET_OK, context.Init()); ASSERT_EQ(lite::RET_OK, context.Init());


auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bn_param), &context, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bn_param), &context, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize());


float *save_mean = reinterpret_cast<float *>(save_mean_tensor.MutableData()); float *save_mean = reinterpret_cast<float *>(save_mean_tensor.MutableData());
float *save_var = reinterpret_cast<float *>(save_var_tensor.MutableData()); float *save_var = reinterpret_cast<float *>(save_var_tensor.MutableData());
std::fill(save_mean, save_mean + channels, 0.f);
std::fill(save_var, save_var + channels, 0.f);
for (int i = 0; i < channels; i++) {
save_var[i] = 1.f;
save_mean[i] = 0.f;
}
float *curr_mean = reinterpret_cast<float *>(mean_tensor.MutableData());
float *curr_var = reinterpret_cast<float *>(var_tensor.MutableData());


kernel_obj->train();
kernel_obj->Train();
kernel_obj->SetTrainable(true);
kernel_obj->Run(); kernel_obj->Run();


std::cout << "================save_mean==============================\n"; std::cout << "================save_mean==============================\n";
for (int i = 0; i < channels; i++) std::cout << save_mean[i] << " ";
for (int i = 0; i < channels; i++) std::cout << curr_mean[i] << " ";
std::cout << "\n"; std::cout << "\n";
std::cout << "===============save_var==============================\n"; std::cout << "===============save_var==============================\n";
for (int i = 0; i < channels; i++) std::cout << save_var[i] << " ";
for (int i = 0; i < channels; i++) std::cout << curr_var[i] << " ";
std::cout << "\n"; std::cout << "\n";
delete[] reinterpret_cast<float *>(x_tensor->MutableData()); delete[] reinterpret_cast<float *>(x_tensor->MutableData());
auto res = CompareRelativeOutput(save_mean, "./test_data/bngrad/running_mean_3.bin");
auto res = CompareRelativeOutput(curr_mean, "./test_data/bngrad/running_mean_3.bin");
EXPECT_EQ(res, 0); EXPECT_EQ(res, 0);
res = CompareRelativeOutput(save_var, "./test_data/bngrad/running_var_3.bin");
res = CompareRelativeOutput(curr_var, "./test_data/bngrad/running_var_3.bin");
EXPECT_EQ(res, 0); EXPECT_EQ(res, 0);


x_tensor->set_data(nullptr); x_tensor->set_data(nullptr);


+ 50
- 9
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc View File

@@ -77,11 +77,13 @@ void InitConvParamGroup3Dilation2FP32(ConvParameter *conv_param) {
TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
InitConvParamGroup1FP32(conv_param);
ASSERT_NE(conv_param, nullptr);


InitConvParamGroup1FP32(conv_param);
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin"; std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
ASSERT_NE(dy_data, nullptr);
std::vector<int> dim_dy({1, 28, 28, 32}); std::vector<int> dim_dy({1, 28, 28, 32});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(dy_data); dy_tensor.set_data(dy_data);
@@ -95,11 +97,13 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/conv/convfp32_x_1_28_28_3.bin"; std::string input_path = "./test_data/conv/convfp32_x_1_28_28_3.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_x({1, 28, 28, 3}); std::vector<int> dim_x({1, 28, 28, 3});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({32, 3, 3, 3}); std::vector<int> dim_dw({32, 3, 3, 3});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -112,7 +116,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());
// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
@@ -149,8 +155,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) {
TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
InitConvParamGroup1FP32(conv_param);
ASSERT_NE(conv_param, nullptr);


InitConvParamGroup1FP32(conv_param);
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin"; std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
@@ -168,6 +175,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) {
size_t output_data_size = size_t output_data_size =
conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_;
auto dx_data = new float[output_data_size]; auto dx_data = new float[output_data_size];
ASSERT_NE(dx_data, nullptr);
std::vector<int> dim_dx({1, 28, 28, 3}); std::vector<int> dim_dx({1, 28, 28, 3});
lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx);
dx_tensor.set_data(dx_data); dx_tensor.set_data(dx_data);
@@ -185,7 +193,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop
@@ -222,8 +232,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) {
TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
InitConvParamGroup3FP32(conv_param);
ASSERT_NE(conv_param, nullptr);


InitConvParamGroup3FP32(conv_param);
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin"; std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
@@ -245,6 +256,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) {
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({18, 3, 3, 1}); std::vector<int> dim_dw({18, 3, 3, 1});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -257,7 +269,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());
// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
@@ -293,8 +307,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) {
TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
InitConvParamGroup3FP32(conv_param);
ASSERT_NE(conv_param, nullptr);


InitConvParamGroup3FP32(conv_param);
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin"; std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
@@ -312,6 +327,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) {
size_t output_data_size = size_t output_data_size =
conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_;
auto dx_data = new float[output_data_size]; auto dx_data = new float[output_data_size];
ASSERT_NE(dx_data, nullptr);
std::vector<int> dim_dx({1, 28, 28, 3}); std::vector<int> dim_dx({1, 28, 28, 3});
lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx);
dx_tensor.set_data(dx_data); dx_tensor.set_data(dx_data);
@@ -329,7 +345,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());
// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
@@ -365,9 +383,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) {
TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);


InitConvParamGroup3Dilation2FP32(conv_param); InitConvParamGroup3Dilation2FP32(conv_param);

size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin"; std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
@@ -389,6 +407,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) {
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({18, 3, 3, 1}); std::vector<int> dim_dw({18, 3, 3, 1});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -401,7 +420,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop
@@ -437,8 +458,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) {
TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
InitConvParamGroup3Dilation2FP32(conv_param);
ASSERT_NE(conv_param, nullptr);


InitConvParamGroup3Dilation2FP32(conv_param);
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin"; std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
@@ -456,6 +478,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) {
size_t output_data_size = size_t output_data_size =
conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_;
auto dx_data = new float[output_data_size]; auto dx_data = new float[output_data_size];
ASSERT_NE(dx_data, nullptr);
std::vector<int> dim_dx({1, 28, 28, 3}); std::vector<int> dim_dx({1, 28, 28, 3});
lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx);
dx_tensor.set_data(dx_data); dx_tensor.set_data(dx_data);
@@ -473,7 +496,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


int loop_count = 100; int loop_count = 100;
@@ -504,8 +529,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) {
TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { TEST_F(TestConvolutionGradFp32, ConvGroupDilation) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
InitConvParamGroup3Dilation2FP32(conv_param);
ASSERT_NE(conv_param, nullptr);


InitConvParamGroup3Dilation2FP32(conv_param);
size_t x_size; size_t x_size;
std::string x_path = "./test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin"; std::string x_path = "./test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin";
auto x_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(x_path.c_str(), &x_size)); auto x_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(x_path.c_str(), &x_size));
@@ -523,6 +549,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) {
size_t output_data_size = size_t output_data_size =
conv_param->output_batch_ * conv_param->output_h_ * conv_param->output_w_ * conv_param->output_channel_; conv_param->output_batch_ * conv_param->output_h_ * conv_param->output_w_ * conv_param->output_channel_;
auto y_data = new float[output_data_size]; auto y_data = new float[output_data_size];
ASSERT_NE(y_data, nullptr);
std::vector<int> dim_y({1, 26, 26, 18}); std::vector<int> dim_y({1, 26, 26, 18});
lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
y_tensor.set_data(y_data); y_tensor.set_data(y_data);
@@ -540,11 +567,12 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) {


auto *kernel = new mindspore::kernel::ConvolutionTrainCPUKernel(reinterpret_cast<OpParameter *>(conv_param), inputs, auto *kernel = new mindspore::kernel::ConvolutionTrainCPUKernel(reinterpret_cast<OpParameter *>(conv_param), inputs,
outputs, &context, 0); outputs, &context, 0);
ASSERT_NE(kernel, nullptr);
kernel->Init(); kernel->Init();
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


kernel->train();
EXPECT_EQ(kernel->is_train(), 1);
kernel->Train();
EXPECT_EQ(kernel->IsTrain(), 1);


// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
@@ -580,6 +608,8 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) {
TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);

conv_param->input_batch_ = 2; conv_param->input_batch_ = 2;
conv_param->input_h_ = 32; conv_param->input_h_ = 32;
conv_param->input_w_ = 32; conv_param->input_w_ = 32;
@@ -624,11 +654,13 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/conv/convfp32_input0_d2_g2_s2_2_4_32_32.bin"; std::string input_path = "./test_data/conv/convfp32_input0_d2_g2_s2_2_4_32_32.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_x({2, 32, 32, 4}); std::vector<int> dim_x({2, 32, 32, 4});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({12, 3, 3, 2}); std::vector<int> dim_dw({12, 3, 3, 2});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -641,7 +673,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop
@@ -679,6 +713,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) {
TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);

conv_param->input_batch_ = 2; conv_param->input_batch_ = 2;
conv_param->input_h_ = 32; conv_param->input_h_ = 32;
conv_param->input_w_ = 32; conv_param->input_w_ = 32;
@@ -710,6 +746,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) {
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/conv/convfp32_dy_d2_g2_s2_2_12_15_15.bin"; std::string dy_path = "./test_data/conv/convfp32_dy_d2_g2_s2_2_12_15_15.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
ASSERT_NE(dy_data, nullptr);
std::vector<int> dim_dy({2, 15, 15, 12}); std::vector<int> dim_dy({2, 15, 15, 12});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(dy_data); dy_tensor.set_data(dy_data);
@@ -717,6 +754,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) {
size_t w_size; size_t w_size;
std::string w_path = "./test_data/conv/convfp32_w_d2_g2_s2_12_2_3_3.bin"; std::string w_path = "./test_data/conv/convfp32_w_d2_g2_s2_12_2_3_3.bin";
auto w_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(w_path.c_str(), &w_size)); auto w_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(w_path.c_str(), &w_size));
ASSERT_NE(w_data, nullptr);
std::vector<int> dim_w({12, 3, 3, 2}); std::vector<int> dim_w({12, 3, 3, 2});
lite::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w); lite::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w);
w_tensor.set_data(w_data); w_tensor.set_data(w_data);
@@ -724,6 +762,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) {
size_t output_data_size = size_t output_data_size =
conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_;
auto dx_data = new float[output_data_size]; auto dx_data = new float[output_data_size];
ASSERT_NE(dx_data, nullptr);
std::vector<int> dim_dx({2, 32, 32, 4}); std::vector<int> dim_dx({2, 32, 32, 4});
lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx);
dx_tensor.set_data(dx_data); dx_tensor.set_data(dx_data);
@@ -741,7 +780,9 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop


+ 54
- 12
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc View File

@@ -32,6 +32,8 @@ class TestDeConvolutionGradFp32 : public mindspore::CommonTest {
TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);

conv_param->input_batch_ = 2; conv_param->input_batch_ = 2;
conv_param->input_h_ = 32; conv_param->input_h_ = 32;
conv_param->input_w_ = 32; conv_param->input_w_ = 32;
@@ -63,24 +65,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) {
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/deconv/deconvfp32_dy_2_9_63_63.bin"; std::string dy_path = "./test_data/deconv/deconvfp32_dy_2_9_63_63.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
ASSERT_NE(dy_data, nullptr);
std::vector<int> dim_dy({2, 63, 63, 9}); std::vector<int> dim_dy({2, 63, 63, 9});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(dy_data); dy_tensor.set_data(dy_data);


// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
size_t output_data_size = size_t output_data_size =
conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;


size_t input_size; size_t input_size;
std::string input_path = "./test_data/deconv/deconvfp32_input0_2_3_32_32.bin"; std::string input_path = "./test_data/deconv/deconvfp32_input0_2_3_32_32.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_x({2, 32, 32, 3}); std::vector<int> dim_x({2, 32, 32, 3});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({3, 3, 3, 9}); std::vector<int> dim_dw({3, 3, 3, 9});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -93,7 +95,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop
@@ -101,6 +105,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) {
kernel->Run(); kernel->Run();
} }


// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
int loop_count = 100; int loop_count = 100;
auto time_start = mindspore::lite::GetTimeUs(); auto time_start = mindspore::lite::GetTimeUs();
for (int i = 0; i < loop_count; i++) { for (int i = 0; i < loop_count; i++) {
@@ -131,6 +138,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) {
TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);

conv_param->input_batch_ = 2; conv_param->input_batch_ = 2;
conv_param->input_h_ = 32; conv_param->input_h_ = 32;
conv_param->input_w_ = 32; conv_param->input_w_ = 32;
@@ -162,24 +171,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) {
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_2_9_65_65.bin"; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_2_9_65_65.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
ASSERT_NE(dy_data, nullptr);
std::vector<int> dim_dy({2, 65, 65, 9}); std::vector<int> dim_dy({2, 65, 65, 9});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(dy_data); dy_tensor.set_data(dy_data);


// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
size_t output_data_size = size_t output_data_size =
conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;


size_t input_size; size_t input_size;
std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_2_3_32_32.bin"; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_2_3_32_32.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_x({2, 32, 32, 3}); std::vector<int> dim_x({2, 32, 32, 3});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({9, 3, 3, 3}); std::vector<int> dim_dw({9, 3, 3, 3});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -192,7 +201,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop
@@ -200,6 +211,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) {
kernel->Run(); kernel->Run();
} }


// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
int loop_count = 100; int loop_count = 100;
auto time_start = mindspore::lite::GetTimeUs(); auto time_start = mindspore::lite::GetTimeUs();
for (int i = 0; i < loop_count; i++) { for (int i = 0; i < loop_count; i++) {
@@ -230,6 +244,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) {
TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);

conv_param->input_batch_ = 2; conv_param->input_batch_ = 2;
conv_param->input_h_ = 32; conv_param->input_h_ = 32;
conv_param->input_w_ = 32; conv_param->input_w_ = 32;
@@ -261,6 +277,7 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) {
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_2_9_65_65.bin"; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_2_9_65_65.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
ASSERT_NE(dy_data, nullptr);
std::vector<int> dim_dy({2, 65, 65, 9}); std::vector<int> dim_dy({2, 65, 65, 9});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(dy_data); dy_tensor.set_data(dy_data);
@@ -274,11 +291,13 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_2_3_32_32.bin"; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_2_3_32_32.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_x({2, 32, 32, 3}); std::vector<int> dim_x({2, 32, 32, 3});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({3, 3, 3, 3}); std::vector<int> dim_dw({3, 3, 3, 3});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -291,7 +310,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop
@@ -329,6 +350,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) {
TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);

conv_param->input_batch_ = 2; conv_param->input_batch_ = 2;
conv_param->input_h_ = 32; conv_param->input_h_ = 32;
conv_param->input_w_ = 32; conv_param->input_w_ = 32;
@@ -360,24 +383,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) {
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_s1_2_9_34_34.bin"; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_s1_2_9_34_34.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
ASSERT_NE(dy_data, nullptr);
std::vector<int> dim_dy({2, 34, 34, 9}); std::vector<int> dim_dy({2, 34, 34, 9});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(dy_data); dy_tensor.set_data(dy_data);


// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
size_t output_data_size = size_t output_data_size =
conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;


size_t input_size; size_t input_size;
std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_s1_2_3_32_32.bin"; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_s1_2_3_32_32.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_x({2, 32, 32, 3}); std::vector<int> dim_x({2, 32, 32, 3});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({3, 3, 3, 3}); std::vector<int> dim_dw({3, 3, 3, 3});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -390,7 +413,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop
@@ -398,6 +423,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) {
kernel->Run(); kernel->Run();
} }


// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
int loop_count = 100; int loop_count = 100;
auto time_start = mindspore::lite::GetTimeUs(); auto time_start = mindspore::lite::GetTimeUs();
for (int i = 0; i < loop_count; i++) { for (int i = 0; i < loop_count; i++) {
@@ -428,6 +456,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) {
TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);

conv_param->input_batch_ = 2; conv_param->input_batch_ = 2;
conv_param->input_h_ = 32; conv_param->input_h_ = 32;
conv_param->input_w_ = 32; conv_param->input_w_ = 32;
@@ -459,24 +489,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) {
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g2_s2_2_12_65_65.bin"; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g2_s2_2_12_65_65.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
ASSERT_NE(dy_data, nullptr);
std::vector<int> dim_dy({2, 65, 65, 12}); std::vector<int> dim_dy({2, 65, 65, 12});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(dy_data); dy_tensor.set_data(dy_data);


// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
size_t output_data_size = size_t output_data_size =
conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;


size_t input_size; size_t input_size;
std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g2_s2_2_4_32_32.bin"; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g2_s2_2_4_32_32.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_x({2, 32, 32, 4}); std::vector<int> dim_x({2, 32, 32, 4});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({6, 3, 3, 4}); std::vector<int> dim_dw({6, 3, 3, 4});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -489,7 +519,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop
@@ -497,6 +529,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) {
kernel->Run(); kernel->Run();
} }


// runtime part
printf("Calculating runtime cost...\n");
uint64_t time_avg = 0;
int loop_count = 100; int loop_count = 100;
auto time_start = mindspore::lite::GetTimeUs(); auto time_start = mindspore::lite::GetTimeUs();
for (int i = 0; i < loop_count; i++) { for (int i = 0; i < loop_count; i++) {
@@ -527,6 +562,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) {
TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) {
// prepare stage // prepare stage
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
ASSERT_NE(conv_param, nullptr);

conv_param->input_batch_ = 2; conv_param->input_batch_ = 2;
conv_param->input_h_ = 32; conv_param->input_h_ = 32;
conv_param->input_w_ = 32; conv_param->input_w_ = 32;
@@ -558,6 +595,7 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) {
size_t dy_size; size_t dy_size;
std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g12_s2_2_12_65_65.bin"; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g12_s2_2_12_65_65.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size));
ASSERT_NE(dy_data, nullptr);
std::vector<int> dim_dy({2, 65, 65, 12}); std::vector<int> dim_dy({2, 65, 65, 12});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(dy_data); dy_tensor.set_data(dy_data);
@@ -571,11 +609,13 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g12_s2_2_12_32_32.bin"; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g12_s2_2_12_32_32.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_x({2, 32, 32, 12}); std::vector<int> dim_x({2, 32, 32, 12});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input_data); x_tensor.set_data(input_data);


auto dw_data = new float[output_data_size]; auto dw_data = new float[output_data_size];
ASSERT_NE(dw_data, nullptr);
std::vector<int> dim_dw({1, 3, 3, 12}); std::vector<int> dim_dw({1, 3, 3, 12});
lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
dw_tensor.set_data(dw_data); dw_tensor.set_data(dw_data);
@@ -588,7 +628,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr);
ASSERT_NE(kernel, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize());


// warm up loop // warm up loop


+ 14
- 47
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc View File

@@ -90,6 +90,7 @@ TEST_F(NetworkTest, tuning_layer) {
node->primitive = std::make_unique<schema::PrimitiveT>(); node->primitive = std::make_unique<schema::PrimitiveT>();
node->primitive->value.type = schema::PrimitiveType_Activation; node->primitive->value.type = schema::PrimitiveType_Activation;
auto primitive = new schema::ActivationT; auto primitive = new schema::ActivationT;
ASSERT_NE(primitive, nullptr);
primitive->type = schema::ActivationType_RELU; primitive->type = schema::ActivationType_RELU;
node->primitive->value.value = primitive; node->primitive->value.value = primitive;
node->name = "ReLU"; node->name = "ReLU";
@@ -102,6 +103,7 @@ TEST_F(NetworkTest, tuning_layer) {
node->primitive = std::make_unique<schema::PrimitiveT>(); node->primitive = std::make_unique<schema::PrimitiveT>();
node->primitive->value.type = schema::PrimitiveType_MatMul; node->primitive->value.type = schema::PrimitiveType_MatMul;
auto primitive = new schema::MatMulT; auto primitive = new schema::MatMulT;
ASSERT_NE(primitive, nullptr);
primitive->transposeA = false; primitive->transposeA = false;
primitive->transposeB = true; primitive->transposeB = true;
node->primitive->value.value = primitive; node->primitive->value.value = primitive;
@@ -115,6 +117,7 @@ TEST_F(NetworkTest, tuning_layer) {
node->primitive = std::make_unique<schema::PrimitiveT>(); node->primitive = std::make_unique<schema::PrimitiveT>();
node->primitive->value.type = schema::PrimitiveType_BiasAdd; node->primitive->value.type = schema::PrimitiveType_BiasAdd;
auto primitive = new schema::BiasAddT; auto primitive = new schema::BiasAddT;
ASSERT_NE(primitive, nullptr);
primitive->axis.push_back(0); primitive->axis.push_back(0);
node->primitive->value.value = primitive; node->primitive->value.value = primitive;
node->name = "BiasAdd"; node->name = "BiasAdd";
@@ -127,6 +130,7 @@ TEST_F(NetworkTest, tuning_layer) {
node->primitive = std::make_unique<schema::PrimitiveT>(); node->primitive = std::make_unique<schema::PrimitiveT>();
node->primitive->value.type = schema::PrimitiveType_SoftmaxCrossEntropy; node->primitive->value.type = schema::PrimitiveType_SoftmaxCrossEntropy;
auto primitive = new schema::SoftmaxCrossEntropyT; auto primitive = new schema::SoftmaxCrossEntropyT;
ASSERT_NE(primitive, nullptr);
primitive->axis.push_back(0); primitive->axis.push_back(0);
node->primitive->value.value = primitive; node->primitive->value.value = primitive;
node->name = "SoftmaxCrossEntropy"; node->name = "SoftmaxCrossEntropy";
@@ -139,6 +143,7 @@ TEST_F(NetworkTest, tuning_layer) {
node->primitive = std::make_unique<schema::PrimitiveT>(); node->primitive = std::make_unique<schema::PrimitiveT>();
node->primitive->value.type = schema::PrimitiveType_BiasGrad; node->primitive->value.type = schema::PrimitiveType_BiasGrad;
auto primitive = new schema::BiasGradT; auto primitive = new schema::BiasGradT;
ASSERT_NE(primitive, nullptr);
primitive->axis.push_back(0); primitive->axis.push_back(0);
node->primitive->value.value = primitive; node->primitive->value.value = primitive;
node->name = "BiasGrad"; node->name = "BiasGrad";
@@ -151,6 +156,7 @@ TEST_F(NetworkTest, tuning_layer) {
node->primitive = std::make_unique<schema::PrimitiveT>(); node->primitive = std::make_unique<schema::PrimitiveT>();
node->primitive->value.type = schema::PrimitiveType_MatMul; node->primitive->value.type = schema::PrimitiveType_MatMul;
auto primitive = new schema::MatMulT; auto primitive = new schema::MatMulT;
ASSERT_NE(primitive, nullptr);
primitive->transposeA = true; primitive->transposeA = true;
primitive->transposeB = false; primitive->transposeB = false;
node->primitive->value.value = primitive; node->primitive->value.value = primitive;
@@ -164,6 +170,7 @@ TEST_F(NetworkTest, tuning_layer) {
node->primitive = std::make_unique<schema::PrimitiveT>(); node->primitive = std::make_unique<schema::PrimitiveT>();
node->primitive->value.type = schema::PrimitiveType_ApplyMomentum; node->primitive->value.type = schema::PrimitiveType_ApplyMomentum;
auto primitive = new schema::ApplyMomentumT; auto primitive = new schema::ApplyMomentumT;
ASSERT_NE(primitive, nullptr);
node->primitive->value.value = primitive; node->primitive->value.value = primitive;
node->name = "Momentum"; node->name = "Momentum";
meta_graph->nodes.emplace_back(std::move(node)); meta_graph->nodes.emplace_back(std::move(node));
@@ -175,6 +182,7 @@ TEST_F(NetworkTest, tuning_layer) {
node->primitive = std::make_unique<schema::PrimitiveT>(); node->primitive = std::make_unique<schema::PrimitiveT>();
node->primitive->value.type = schema::PrimitiveType_ApplyMomentum; node->primitive->value.type = schema::PrimitiveType_ApplyMomentum;
auto primitive = new schema::ApplyMomentumT; auto primitive = new schema::ApplyMomentumT;
ASSERT_NE(primitive, nullptr);
node->primitive->value.value = primitive; node->primitive->value.value = primitive;
node->name = "Momentum"; node->name = "Momentum";
meta_graph->nodes.emplace_back(std::move(node)); meta_graph->nodes.emplace_back(std::move(node));
@@ -450,9 +458,6 @@ TEST_F(NetworkTest, tuning_layer) {
std::cout << std::endl; std::cout << std::endl;
error = RelativeOutputError(outData, output_path); error = RelativeOutputError(outData, output_path);
EXPECT_LT(error, 2e-3); EXPECT_LT(error, 2e-3);

delete session;
MS_LOG(INFO) << "TuningLayer passed";
} }


int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path, int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path,
@@ -516,6 +521,7 @@ TEST_F(NetworkTest, efficient_net) {
auto model = lite::TrainModel::Import(buf, net_size); auto model = lite::TrainModel::Import(buf, net_size);
delete[] buf; delete[] buf;
auto context = new lite::Context; auto context = new lite::Context;
ASSERT_NE(context, nullptr);
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND;
context->thread_num_ = 1; context->thread_num_ = 1;


@@ -533,48 +539,6 @@ TEST_F(NetworkTest, efficient_net) {
ASSERT_EQ(res, 0); ASSERT_EQ(res, 0);
} }


TEST_F(NetworkTest, lenetnet) {
char *buf = nullptr;
size_t net_size = 0;
std::string net = "./test_data/nets/lenet_train.ms";
ReadFile(net.c_str(), &net_size, &buf);
auto model = lite::TrainModel::Import(buf, net_size);
delete[] buf;
auto context = new lite::Context;
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND;
context->thread_num_ = 1;

// check registration
mindspore::lite::KernelRegistry *reg = mindspore::lite::KernelRegistry::GetInstance();
mindspore::kernel::KernelKey desc1 = {mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32,
mindspore::schema::PrimitiveType_Conv2D};
mindspore::kernel::KernelKey desc2 = {mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32,
mindspore::schema::PrimitiveType_DepthwiseConv2D};
auto regb1 = reg->GetCreator(desc1);
auto regb2 = reg->GetCreator(desc2);
ASSERT_EQ(regb1 == mindspore::kernel::CpuConvTrainFp32KernelCreator, false);

auto session = session::TrainSession::CreateSession(context);
ASSERT_NE(session, nullptr);
auto ret = session->CompileTrainGraph(model);
ASSERT_EQ(lite::RET_OK, ret);

auto rega1 = reg->GetCreator(desc1);
auto rega2 = reg->GetCreator(desc2);
ASSERT_EQ(regb1, rega1);
ASSERT_EQ(regb2, rega2);
ASSERT_EQ(rega1 == mindspore::kernel::CpuConvTrainFp32KernelCreator, false);
// end of check registration

session->Eval();
std::string in = "./test_data/nets/x_lenet.bin";
std::string out = "./test_data/nets/y_lenet.bin";
auto res = runNet(session, in, out, "24");
delete session;
delete context;
ASSERT_EQ(res, 0);
}

TEST_F(NetworkTest, retina_net) { TEST_F(NetworkTest, retina_net) {
char *buf = nullptr; char *buf = nullptr;
size_t net_size = 0; size_t net_size = 0;
@@ -585,6 +549,7 @@ TEST_F(NetworkTest, retina_net) {
auto model = lite::Model::Import(buf, net_size); auto model = lite::Model::Import(buf, net_size);
delete[] buf; delete[] buf;
auto context = new lite::Context; auto context = new lite::Context;
ASSERT_NE(context, nullptr);
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND;
context->thread_num_ = 1; context->thread_num_ = 1;


@@ -592,7 +557,7 @@ TEST_F(NetworkTest, retina_net) {
auto session = session::LiteSession::CreateSession(context); auto session = session::LiteSession::CreateSession(context);
ASSERT_NE(session, nullptr); ASSERT_NE(session, nullptr);
auto ret = session->CompileGraph(model); auto ret = session->CompileGraph(model);
ASSERT_EQ(lite::RET_OK, ret);
EXPECT_EQ(lite::RET_OK, ret);
// session->Eval(); // session->Eval();


std::string in = "./test_data/nets/test1.hwc_normalized_f32"; std::string in = "./test_data/nets/test1.hwc_normalized_f32";
@@ -619,8 +584,9 @@ TEST_F(NetworkTest, retina_net) {
final_res |= res; final_res |= res;
} }


ASSERT_EQ(final_res, 0);
EXPECT_EQ(final_res, 0);


delete model;
delete session; delete session;
delete context; delete context;
} }
@@ -635,6 +601,7 @@ TEST_F(NetworkTest, mobileface_net) {
auto model = lite::Model::Import(buf, net_size); auto model = lite::Model::Import(buf, net_size);
delete[] buf; delete[] buf;
auto context = new lite::Context; auto context = new lite::Context;
ASSERT_NE(context, nullptr);
context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND;
context->thread_num_ = 1; context->thread_num_ = 1;




+ 56
- 2
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc View File

@@ -60,6 +60,8 @@ void InitPoolingParamFP32(PoolingParameter *pooling_param) {
TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) {
// prepare stage // prepare stage
auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
ASSERT_NE(pooling_param, nullptr);

InitPoolingParamFP32(pooling_param); InitPoolingParamFP32(pooling_param);
pooling_param->output_channel_ = 3; pooling_param->output_channel_ = 3;
pooling_param->pool_mode_ = PoolMode_AvgPool; pooling_param->pool_mode_ = PoolMode_AvgPool;
@@ -73,8 +75,10 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin"; std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);


auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);
// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
AvgPoolingGrad(input_data, output_data, pooling_param, 1); AvgPoolingGrad(input_data, output_data, pooling_param, 1);
@@ -108,6 +112,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) {
TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
// prepare stage // prepare stage
auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
ASSERT_NE(pooling_param, nullptr);

InitPoolingParamFP32(pooling_param); InitPoolingParamFP32(pooling_param);
pooling_param->output_channel_ = 3; pooling_param->output_channel_ = 3;
pooling_param->pool_mode_ = PoolMode_AvgPool; pooling_param->pool_mode_ = PoolMode_AvgPool;
@@ -121,12 +127,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin"; std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_dy({1, 28, 28, 3}); std::vector<int> dim_dy({1, 28, 28, 3});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(input_data); dy_tensor.set_data(input_data);


std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin"; std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin";
auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size));
ASSERT_NE(input1_data, nullptr);
std::vector<int> dim_x({1, 28, 28, 3}); std::vector<int> dim_x({1, 28, 28, 3});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input1_data); x_tensor.set_data(input1_data);
@@ -134,6 +142,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
std::vector<lite::Tensor *> inputs = {&dy_tensor, &x_tensor}; std::vector<lite::Tensor *> inputs = {&dy_tensor, &x_tensor};


auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);
std::vector<int> dim_dx({1, 28, 28, 3}); std::vector<int> dim_dx({1, 28, 28, 3});
lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx);
dx_tensor.set_data(output_data); dx_tensor.set_data(output_data);
@@ -145,7 +154,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), &context, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), &context, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);


kernel_obj->Run(); kernel_obj->Run();


@@ -172,8 +183,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) {
// prepare stage // prepare stage
auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
InitPoolingParamFP32(pooling_param);
ASSERT_NE(pooling_param, nullptr);


InitPoolingParamFP32(pooling_param);
pooling_param->output_channel_ = 3; pooling_param->output_channel_ = 3;
pooling_param->input_batch_ = 3; pooling_param->input_batch_ = 3;
pooling_param->output_batch_ = 3; pooling_param->output_batch_ = 3;
@@ -185,12 +197,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) {
size_t input_size; size_t input_size;
std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_3_28_28_3.bin"; std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_3_28_28_3.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_dy({3, 28, 28, 3}); std::vector<int> dim_dy({3, 28, 28, 3});
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
dy_tensor.set_data(input_data); dy_tensor.set_data(input_data);


std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_3_28_28_3.bin"; std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_3_28_28_3.bin";
auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size));
ASSERT_NE(input1_data, nullptr);
std::vector<int> dim_x({3, 28, 28, 3}); std::vector<int> dim_x({3, 28, 28, 3});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(input1_data); x_tensor.set_data(input1_data);
@@ -209,7 +223,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), &context, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), &context, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);


kernel_obj->Run(); kernel_obj->Run();


@@ -236,6 +252,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
// prepare stage // prepare stage
// input size will be equal to the original size of x, output size will be the output size as in forward // input size will be equal to the original size of x, output size will be the output size as in forward
auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
ASSERT_NE(pool, nullptr);

InitPoolingParamFP32(pool); InitPoolingParamFP32(pool);
pool->output_channel_ = 3; pool->output_channel_ = 3;
pool->pool_mode_ = PoolMode_AvgPool; pool->pool_mode_ = PoolMode_AvgPool;
@@ -250,12 +268,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {


auto x_data = reinterpret_cast<float *>( auto x_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_x_3_28_28_3.bin", &input_size));
ASSERT_NE(x_data, nullptr);
std::vector<int> dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_}); std::vector<int> dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(x_data); x_tensor.set_data(x_data);


auto yt_data = reinterpret_cast<float *>( auto yt_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size));
ASSERT_NE(yt_data, nullptr);
std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_}); std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_});
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
yt_tensor.set_data(yt_data); yt_tensor.set_data(yt_data);
@@ -271,7 +291,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {


kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc); auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc);
ASSERT_NE(pool_creator, nullptr);
auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), &context, pool_desc, nullptr); auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), &context, pool_desc, nullptr);
ASSERT_NE(kernel, nullptr);


kernel->Init(); kernel->Init();


@@ -295,6 +317,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {
// prepare stage // prepare stage
// input size will be equal to the original size of x, output size will be the output size as in forward // input size will be equal to the original size of x, output size will be the output size as in forward
auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
ASSERT_NE(pool, nullptr);

InitPoolingParamFP32(pool); InitPoolingParamFP32(pool);
pool->output_channel_ = 3; pool->output_channel_ = 3;
pool->pool_mode_ = PoolMode_AvgPool; pool->pool_mode_ = PoolMode_AvgPool;
@@ -309,12 +333,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {


auto x_data = reinterpret_cast<float *>( auto x_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_x_3_28_28_3.bin", &input_size));
ASSERT_NE(x_data, nullptr);
std::vector<int> dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_}); std::vector<int> dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(x_data); x_tensor.set_data(x_data);


auto yt_data = reinterpret_cast<float *>( auto yt_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size));
ASSERT_NE(yt_data, nullptr);
std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_}); std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_});
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
yt_tensor.set_data(yt_data); yt_tensor.set_data(yt_data);
@@ -332,7 +358,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {


kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc); auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc);
ASSERT_NE(pool_creator, nullptr);
auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), &context, pool_desc, nullptr); auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), &context, pool_desc, nullptr);
ASSERT_NE(kernel, nullptr);


kernel->Init(); kernel->Init();


@@ -356,6 +384,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {
TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) {
// prepare stage // prepare stage
auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
ASSERT_NE(pooling_param, nullptr);

InitPoolingParamFP32(pooling_param); InitPoolingParamFP32(pooling_param);
pooling_param->output_channel_ = 3; pooling_param->output_channel_ = 3;
pooling_param->pool_mode_ = PoolMode_MaxPool; pooling_param->pool_mode_ = PoolMode_MaxPool;
@@ -368,14 +398,18 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) {
size_t input_size; size_t input_size;
std::string i_path = "./test_data/pooling/maxpoolgradfp32_1_x_1_28_28_3.bin"; std::string i_path = "./test_data/pooling/maxpoolgradfp32_1_x_1_28_28_3.bin";
auto in_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(i_path.c_str(), &input_size)); auto in_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(i_path.c_str(), &input_size));
ASSERT_NE(in_data, nullptr);


std::string dy_path = "./test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin"; std::string dy_path = "./test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin";
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &input_size)); auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &input_size));
ASSERT_NE(dy_data, nullptr);


std::string dx_path = "./test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin"; std::string dx_path = "./test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin";
auto dx_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx_path.c_str(), &input_size)); auto dx_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx_path.c_str(), &input_size));
ASSERT_NE(dx_data, nullptr);


auto output_data = new float[output_data_size]; auto output_data = new float[output_data_size];
ASSERT_NE(output_data, nullptr);
// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param, 1); MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param, 1);
@@ -412,6 +446,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {
// prepare stage // prepare stage
// input size will be equal to the original size of x, output size will be the output size as in forward // input size will be equal to the original size of x, output size will be the output size as in forward
auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
ASSERT_NE(maxpool, nullptr);

InitPoolingParamFP32(maxpool); InitPoolingParamFP32(maxpool);
maxpool->output_channel_ = 3; maxpool->output_channel_ = 3;
maxpool->pool_mode_ = PoolMode_MaxPool; maxpool->pool_mode_ = PoolMode_MaxPool;
@@ -422,18 +458,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {


auto x_data = reinterpret_cast<float *>( auto x_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_x_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_x_3_28_28_3.bin", &input_size));
ASSERT_NE(x_data, nullptr);
std::vector<int> dim_x({3, 28, 28, 3}); std::vector<int> dim_x({3, 28, 28, 3});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(x_data); x_tensor.set_data(x_data);


auto y_data = reinterpret_cast<float *>( auto y_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dx_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dx_3_28_28_3.bin", &input_size));
ASSERT_NE(y_data, nullptr);
std::vector<int> dim_y({3, 28, 28, 3}); std::vector<int> dim_y({3, 28, 28, 3});
lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
y_tensor.set_data(y_data); y_tensor.set_data(y_data);


auto yt_data = reinterpret_cast<float *>( auto yt_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dy_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dy_3_28_28_3.bin", &input_size));
ASSERT_NE(yt_data, nullptr);
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
yt_tensor.set_data(yt_data); yt_tensor.set_data(yt_data);


@@ -449,8 +488,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {


kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc);
ASSERT_NE(maxpool_creator, nullptr);
auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context,
maxpool_desc, nullptr); maxpool_desc, nullptr);
ASSERT_NE(kernel, nullptr);


kernel->Init(); kernel->Init();


@@ -477,6 +518,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {
// prepare stage // prepare stage
// input size will be equal to the original size of x, output size will be the output size as in forward // input size will be equal to the original size of x, output size will be the output size as in forward
auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
ASSERT_NE(maxpool, nullptr);

InitPoolingParamFP32(maxpool); InitPoolingParamFP32(maxpool);
maxpool->output_channel_ = 3; maxpool->output_channel_ = 3;
maxpool->input_channel_ = 3; maxpool->input_channel_ = 3;
@@ -492,18 +535,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {


auto x_data = reinterpret_cast<float *>( auto x_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_x_3_28_28_3.bin", &input_size));
ASSERT_NE(x_data, nullptr);
std::vector<int> dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_}); std::vector<int> dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(x_data); x_tensor.set_data(x_data);


auto y_data = reinterpret_cast<float *>( auto y_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dx_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dx_3_28_28_3.bin", &input_size));
ASSERT_NE(y_data, nullptr);
std::vector<int> dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_}); std::vector<int> dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_});
lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
y_tensor.set_data(y_data); y_tensor.set_data(y_data);


auto yt_data = reinterpret_cast<float *>( auto yt_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size));
ASSERT_NE(yt_data, nullptr);
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
yt_tensor.set_data(yt_data); yt_tensor.set_data(yt_data);


@@ -520,8 +566,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {


kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc);
ASSERT_NE(maxpool_creator, nullptr);
auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context,
maxpool_desc, nullptr); maxpool_desc, nullptr);
ASSERT_NE(kernel, nullptr);


kernel->Init(); kernel->Init();


@@ -548,6 +596,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {
// prepare stage // prepare stage
// input size will be equal to the original size of x, output size will be the output size as in forward // input size will be equal to the original size of x, output size will be the output size as in forward
auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
ASSERT_NE(maxpool, nullptr);

InitPoolingParamFP32(maxpool); InitPoolingParamFP32(maxpool);
maxpool->output_channel_ = 3; maxpool->output_channel_ = 3;
maxpool->input_channel_ = 3; maxpool->input_channel_ = 3;
@@ -563,18 +613,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {


auto x_data = reinterpret_cast<float *>( auto x_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_x_3_28_28_3.bin", &input_size));
ASSERT_NE(x_data, nullptr);
std::vector<int> dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_}); std::vector<int> dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_});
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
x_tensor.set_data(x_data); x_tensor.set_data(x_data);


auto y_data = reinterpret_cast<float *>( auto y_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dx_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dx_3_28_28_3.bin", &input_size));
ASSERT_NE(y_data, nullptr);
std::vector<int> dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_}); std::vector<int> dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_});
lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
y_tensor.set_data(y_data); y_tensor.set_data(y_data);


auto yt_data = reinterpret_cast<float *>( auto yt_data = reinterpret_cast<float *>(
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size)); mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size));
ASSERT_NE(yt_data, nullptr);
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
yt_tensor.set_data(yt_data); yt_tensor.set_data(yt_data);


@@ -591,11 +644,12 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {


kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc);
ASSERT_NE(maxpool_creator, nullptr);
auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context,
maxpool_desc, nullptr); maxpool_desc, nullptr);
ASSERT_NE(kernel, nullptr);


kernel->Init(); kernel->Init();

kernel->Run(); kernel->Run();


std::string output_path = "./test_data/pooling/maxpoolgradfp32_s3_xgrad_3_28_28_3.bin"; std::string output_path = "./test_data/pooling/maxpoolgradfp32_s3_xgrad_3_28_28_3.bin";


+ 15
- 3
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc View File

@@ -31,17 +31,21 @@ class TestSoftmaxCrossEntropyFp32 : public mindspore::CommonTest {
TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {
// prepare stage // prepare stage
auto sce_param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(malloc(sizeof(SoftmaxCrossEntropyParameter))); auto sce_param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(malloc(sizeof(SoftmaxCrossEntropyParameter)));
ASSERT_NE(sce_param, nullptr);
size_t input_size; size_t input_size;


std::string input_path = "./test_data/operators/sce_fp32_1_y_6_4.bin"; std::string input_path = "./test_data/operators/sce_fp32_1_y_6_4.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::vector<int> dim_y({6, 4}); std::vector<int> dim_y({6, 4});
lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
y_tensor.set_data(input_data); y_tensor.set_data(input_data);


std::string label_path = "./test_data/operators/sce_fp32_1_l_6.bin"; std::string label_path = "./test_data/operators/sce_fp32_1_l_6.bin";
auto ll_labels = reinterpret_cast<int64_t *>(mindspore::lite::ReadFile(label_path.c_str(), &input_size)); auto ll_labels = reinterpret_cast<int64_t *>(mindspore::lite::ReadFile(label_path.c_str(), &input_size));
ASSERT_NE(ll_labels, nullptr);
auto labels = new float[6 * 4]; auto labels = new float[6 * 4];
ASSERT_NE(labels, nullptr);
std::fill(labels, labels + 6 * 4, 0.f); std::fill(labels, labels + 6 * 4, 0.f);
for (int i = 0; i < 6; i++) labels[i * 4 + ll_labels[i]] = 1.0; for (int i = 0; i < 6; i++) labels[i * 4 + ll_labels[i]] = 1.0;


@@ -52,10 +56,12 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {
std::vector<lite::Tensor *> inputs = {&y_tensor, &l_tensor}; std::vector<lite::Tensor *> inputs = {&y_tensor, &l_tensor};


auto loss = new float[1]; auto loss = new float[1];
ASSERT_NE(loss, nullptr);
std::vector<int> dim_dw({1}); std::vector<int> dim_dw({1});
lite::Tensor loss_tensor(TypeId::kNumberTypeFloat32, dim_dw); lite::Tensor loss_tensor(TypeId::kNumberTypeFloat32, dim_dw);
loss_tensor.set_data(loss); loss_tensor.set_data(loss);
auto grad = new float[24]; auto grad = new float[24];
ASSERT_NE(grad, nullptr);
lite::Tensor grad_tensor(TypeId::kNumberTypeFloat32, dim_y); lite::Tensor grad_tensor(TypeId::kNumberTypeFloat32, dim_y);
grad_tensor.set_data(grad); grad_tensor.set_data(grad);
std::vector<lite::Tensor *> outputs = {&loss_tensor, &grad_tensor}; std::vector<lite::Tensor *> outputs = {&loss_tensor, &grad_tensor};
@@ -66,7 +72,9 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {


kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftmaxCrossEntropy}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftmaxCrossEntropy};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(sce_param), &context, desc, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(sce_param), &context, desc, nullptr);
ASSERT_NE(kernel_obj, nullptr);
mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize());
kernel_obj->Run(); kernel_obj->Run();


@@ -78,16 +86,20 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {
std::string output_path = "./test_data/operators/sce_fp32_1_loss_1.bin"; std::string output_path = "./test_data/operators/sce_fp32_1_loss_1.bin";
CompareOutput(loss, 1, output_path); CompareOutput(loss, 1, output_path);


((mindspore::kernel::SparseSoftmaxCrossEntropyWithLogitsCPUKernel *)kernel_obj)->train();
((mindspore::kernel::SparseSoftmaxCrossEntropyWithLogitsCPUKernel *)kernel_obj)->Train();
kernel_obj->Run(); kernel_obj->Run();

// normalize by batch size the result
for (int i = 0; i < 24; i++) {
grad[i] /= 6;
}
printf("==================output data=================\n"); printf("==================output data=================\n");
for (int i = 0; i < 12; i++) { for (int i = 0; i < 12; i++) {
std::cout << grad[i] << " ,"; std::cout << grad[i] << " ,";
} }
std::cout << std::endl; std::cout << std::endl;
std::string grad_path = "./test_data/operators/sce_fp32_1_dy_6_4.bin"; std::string grad_path = "./test_data/operators/sce_fp32_1_dy_6_4.bin";
CompareOutput(grad, 24, grad_path);
auto res = CompareRelativeOutput(grad, grad_path);
EXPECT_EQ(res, 0);


delete[] ll_labels; delete[] ll_labels;
delete[] labels; delete[] labels;


+ 31
- 5
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc View File

@@ -55,6 +55,7 @@ void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis, int n, int c, i


TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) {
auto softmax_param = new SoftmaxParameter(); auto softmax_param = new SoftmaxParameter();
ASSERT_NE(softmax_param, nullptr);
// set parameters // set parameters
InitSoftMaxParam(softmax_param, 0); InitSoftMaxParam(softmax_param, 0);


@@ -64,21 +65,23 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) {
inner_size *= softmax_param->input_shape_[i]; inner_size *= softmax_param->input_shape_[i];
} }
float *sum_data = new (std::nothrow) float[inner_size]; float *sum_data = new (std::nothrow) float[inner_size];
ASSERT_NE(sum_data, nullptr);
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
ASSERT_NE(sum_mul, nullptr);
std::vector<int> shape = {1, 9, 11, 12}; std::vector<int> shape = {1, 9, 11, 12};
size_t input_size; size_t input_size;
std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin"; std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin"; std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);
// runtime part // runtime part
printf("Calculating runtime cost...\n"); printf("Calculating runtime cost...\n");
uint64_t time_avg = 0; uint64_t time_avg = 0;


auto out_data = new float[softmax_param->element_size_]; auto out_data = new float[softmax_param->element_size_];
ASSERT_NE(out_data, nullptr);
// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
@@ -112,6 +115,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) {


TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) {
auto softmax_param = new SoftmaxParameter(); auto softmax_param = new SoftmaxParameter();
ASSERT_NE(softmax_param, nullptr);
// set parameters // set parameters
InitSoftMaxParam(softmax_param, 1); InitSoftMaxParam(softmax_param, 1);


@@ -121,21 +125,26 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) {
inner_size *= softmax_param->input_shape_[i]; inner_size *= softmax_param->input_shape_[i];
} }
float *sum_data = new (std::nothrow) float[inner_size]; float *sum_data = new (std::nothrow) float[inner_size];
ASSERT_NE(sum_data, nullptr);
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
ASSERT_NE(sum_mul, nullptr);


std::vector<int> shape = {1, 9, 11, 12}; std::vector<int> shape = {1, 9, 11, 12};
size_t input_size; size_t input_size;
std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin"; std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);


std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin"; std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);


// runtime part // runtime part
printf("Calculating runtime cost...\n"); printf("Calculating runtime cost...\n");
uint64_t time_avg = 0; uint64_t time_avg = 0;


auto out_data = new float[softmax_param->element_size_]; auto out_data = new float[softmax_param->element_size_];
ASSERT_NE(out_data, nullptr);


// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
@@ -171,6 +180,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) {


TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) {
auto softmax_param = new SoftmaxParameter(); auto softmax_param = new SoftmaxParameter();
ASSERT_NE(softmax_param, nullptr);
// set parameters // set parameters
InitSoftMaxParam(softmax_param, 2); InitSoftMaxParam(softmax_param, 2);


@@ -180,21 +190,26 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) {
inner_size *= softmax_param->input_shape_[i]; inner_size *= softmax_param->input_shape_[i];
} }
float *sum_data = new (std::nothrow) float[inner_size]; float *sum_data = new (std::nothrow) float[inner_size];
ASSERT_NE(sum_data, nullptr);
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
ASSERT_NE(sum_mul, nullptr);


std::vector<int> shape = {1, 9, 11, 12}; std::vector<int> shape = {1, 9, 11, 12};
size_t input_size; size_t input_size;
std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin"; std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);


std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin"; std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);


// runtime part // runtime part
printf("Calculating runtime cost...\n"); printf("Calculating runtime cost...\n");
uint64_t time_avg = 0; uint64_t time_avg = 0;


auto out_data = new float[softmax_param->element_size_]; auto out_data = new float[softmax_param->element_size_];
ASSERT_NE(out_data, nullptr);


// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
@@ -230,6 +245,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) {


TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) {
auto softmax_param = new SoftmaxParameter(); auto softmax_param = new SoftmaxParameter();
ASSERT_NE(softmax_param, nullptr);
// set parameters // set parameters
InitSoftMaxParam(softmax_param, 3); InitSoftMaxParam(softmax_param, 3);


@@ -239,21 +255,25 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) {
inner_size *= softmax_param->input_shape_[i]; inner_size *= softmax_param->input_shape_[i];
} }
float *sum_data = new (std::nothrow) float[inner_size]; float *sum_data = new (std::nothrow) float[inner_size];
ASSERT_NE(sum_data, nullptr);
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
ASSERT_NE(sum_mul, nullptr);


std::vector<int> shape = {1, 9, 11, 12}; std::vector<int> shape = {1, 9, 11, 12};
size_t input_size; size_t input_size;
std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin"; std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);
std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin"; std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);


// runtime part // runtime part
printf("Calculating runtime cost...\n"); printf("Calculating runtime cost...\n");
uint64_t time_avg = 0; uint64_t time_avg = 0;


auto out_data = new float[softmax_param->element_size_]; auto out_data = new float[softmax_param->element_size_];
ASSERT_NE(out_data, nullptr);


// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
@@ -289,6 +309,8 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) {


TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) {
auto softmax_param = new SoftmaxParameter(); auto softmax_param = new SoftmaxParameter();
ASSERT_NE(softmax_param, nullptr);

// set parameters // set parameters
InitSoftMaxParam(softmax_param, -1); InitSoftMaxParam(softmax_param, -1);


@@ -298,21 +320,25 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) {
inner_size *= softmax_param->input_shape_[i]; inner_size *= softmax_param->input_shape_[i];
} }
float *sum_data = new (std::nothrow) float[inner_size]; float *sum_data = new (std::nothrow) float[inner_size];
ASSERT_NE(sum_data, nullptr);
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
ASSERT_NE(sum_mul, nullptr);


std::vector<int> shape = {1, 9, 11, 12}; std::vector<int> shape = {1, 9, 11, 12};
size_t input_size; size_t input_size;
std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin"; std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
ASSERT_NE(input_data, nullptr);


std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin"; std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin";
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
ASSERT_NE(yt_data, nullptr);
// runtime part // runtime part
printf("Calculating runtime cost...\n"); printf("Calculating runtime cost...\n");
uint64_t time_avg = 0; uint64_t time_avg = 0;


auto out_data = new float[softmax_param->element_size_]; auto out_data = new float[softmax_param->element_size_];
ASSERT_NE(out_data, nullptr);


// warm up loop // warm up loop
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {


BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_out_50.bin View File


BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_x_50.bin View File


BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_yt_50.bin View File


BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin View File


BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_x_50.bin View File


BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin View File


BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/dy_2_4_5_3.bin View File


+ 2
- 2
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/input_x_2_4_5_3.bin View File

@@ -1,2 +1,2 @@
V_‚?�K¿ŒÏ§¿øÓà¿…�î>J?/=Å"m?LÒu¿œÂj@!U$?f=ˆ?e¥[?W·Ú¾òÎ ½m© ?æ e¾O™·?}4¿¬žˆ?˜B<–ÚK¿íÕÀše›¾¶ÆÚ½Ýø¿p~|>½?ƒ¿Š7ì½ç§ø½E :?JÍ¿̬>— ? Ä~?Ï«¾óN1¿?> HV¿ú|ʾ¨œØ=€{‰¿IðU?x›¿©v¾°W>úÉ[¾$˜î? ú•¿]›¿4Bu¿û  ¾ç­4@Ç׿¦+?Ÿ…z>ušB?Åä™=|e >MÚê>¢Ÿ>í¶ß?}Þ0?¹ö§¾©¿ëœ>�û†¾ ù @<ç€?Âv�?våZ?zäÅ¿@±í¾è.ο•8B?ðîo½ Œ¿ªâÔ¾q"m¿n¯‰?”k>=ì"ê>:©¿³» @ÇÉ<>+R¿
b±¿6.Œ?“i?¶›v?`j6¿R~]?çJU¼6„s¿GöŒ?M·—¿% ž?Äh”>£È¿½ÇÍ ¿¯ÚG¼½?„¦>³Ó“¾«'6?Æ÷@¿�¥ð¿2¿ƒ/V¾K5è¿TÆ>X]?„[Ý?v_Ø¿¥ü¤¾”j?pý˜?€\l?ã.l=°äÀb©?
ûCE¾Â€‡¾&sò¾ÁðľÎpÑ¿ ·¾fe'ÀÇäº?æ
©¿Ï«?uëP>¦(†?¡ç/>�¨Â¿[óQ¾ŠÈR>™YÞ;Ãê¿o…v¿´1ù?rUt?jb?úWM>æóÁ¿´ç,¾;4%À\4¿|Ä¿°_k¿!Ȉ¿ÈTŸ½éÚ¾ f¿h^Ï>µ×&?Ó%V¿¥}¾';�½#Öf> ND¿m´\>³X¹>¦1Ï?ž³[>cË@‹B@3 À¯HÍ?H´¿K�6?Bäî¼{{¾ä— ¿z?*>0ç¦?�1*ÀÉjŠ? ßq?/!"¾I5�>HA?Pc¿< �¿O@P?+Ú³¿vå>ƒçð?Ñëú¿›“(@nŸ?¤±T¾=™j¾¥ ?…f:?MJC>çÑH¿›D¿ë¼µ=¢—Å?õ ‚¾¦Æ†¿!J°¿Ã \¿U§? Uµ¾Füõ¿�lÀ·­ø>¡‘%½’÷¿ˆ ?øµ¿D¶Þ¿cÊK>¿?YQd¿äi0>ûyÏ?ê_¬?b¡*¿Áƒ ?f<‘¿dÉϾ6Ë¿˜?"¾¹/¾£¾à.�¿‹¦q>„23?‹Â5>iUÂ>J^(?“8–=’º¿è~\¾¢K>e¥ž?›¾=KK~=

BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dbias_3.bin View File


+ 1
- 1
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dscale_3.bin View File

@@ -1 +1 @@
���B���B���B
r��B���B]��B

BIN
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin View File


+ 1
- 1
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_mean_3.bin View File

@@ -1 +1 @@
�-�<���<���=
8�<"��>���

+ 1
- 1
mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_var_3.bin View File

@@ -1 +1 @@
cה¶?‘ ƒ?;…[?
”€�?`י‚?ֱז‚?

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save