From: @yonibaehr_admin Reviewed-by: Signed-off-by:tags/v1.1.0
| @@ -219,6 +219,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/internal) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl) | |||
| if (ENABLE_TOOLS) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) | |||
| if (SUPPORT_TRAIN) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/net_train) | |||
| endif() | |||
| endif() | |||
| if (NOT WIN32) | |||
| if (ENABLE_TOOLS) | |||
| @@ -18,32 +18,36 @@ | |||
| #include <vector> | |||
| #include "include/model.h" | |||
| namespace mindspore::lite { | |||
| namespace mindspore { | |||
| namespace lite { | |||
| /// \brief TrainModel Defines a class that allows to import and export a mindsport trainable model | |||
| struct TrainModel : public lite::Model { | |||
| /// \brief Static method to create a TrainModel pointer. | |||
| /// | |||
| /// \param[in] model_buf Define the buffer read from a model file. | |||
| /// \param[in] size Define bytes number of model buffer. | |||
| /// \brief Static method to create a TrainModel object | |||
| /// | |||
| /// \return Pointer of MindSpore Lite TrainModel. | |||
| /// \param[in] model_buf A buffer that was read from a MS model file | |||
| /// \param[in] size Length of the buffer | |||
| // | |||
| /// \return Pointer to MindSpore Lite TrainModel | |||
| static TrainModel *Import(const char *model_buf, size_t size); | |||
| /// \brief Free meta graph temporary buffer | |||
| /// \brief Free meta graph related data | |||
| void Free() override; | |||
| /// \brief TrainModel destruct, free all memory | |||
| /// \brief Class destructor, free all memory | |||
| virtual ~TrainModel(); | |||
| /// \brief Export Model into buf. | |||
| /// \brief Export Model into a buffer | |||
| /// | |||
| /// \param[in] buf Define the buffer to Export into. If nullptr, buf will be allocated | |||
| /// \param[in] len size of the buffer. | |||
| /// \param[in] buf The buffer to Export into. If equal to nullptr, buf will be allocated | |||
| /// \param[in,out] len Size of the pre-allocated buffer, and returned size of the exported buffer | |||
| /// | |||
| /// \return Pointer to buffer with exported model | |||
| char* ExportBuf(char* buf, size_t* len) const; | |||
| char *ExportBuf(char *buf, size_t *len) const; | |||
| size_t buf_size_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_INCLUDE_TRAIN_MODEL_H_ | |||
| @@ -25,16 +25,59 @@ | |||
| namespace mindspore { | |||
| namespace session { | |||
| /// \brief TrainSession Defines a class that allows training a MindSpore model | |||
| class TrainSession : public session::LiteSession { | |||
| public: | |||
| /// \brief Class destructor | |||
| virtual ~TrainSession() = default; | |||
| /// \brief Static method to create a TrainSession object | |||
| /// | |||
| /// \param[in] context Defines the context of the session to be created | |||
| /// | |||
| /// \return Pointer of MindSpore Lite TrainSession | |||
| static TrainSession *CreateSession(lite::Context *context); | |||
| /// \brief Compile MindSpore Lite train model | |||
| /// | |||
| /// \note CompileTrainGraph should be called before RunGraph | |||
| /// | |||
| /// \param[in] model Define the model to be compiled | |||
| /// | |||
| /// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h | |||
| virtual int CompileTrainGraph(lite::TrainModel *model) = 0; | |||
| /// \brief Export the trained model into a buffer | |||
| /// | |||
| /// \param[in] buf The buffer to Export into. If equal to nullptr, buf will be allocated | |||
| /// \param[in,out] len Size of the pre-allocated buffer, and returned size of the exported buffer | |||
| /// | |||
| /// \return pointer to the export buffer | |||
| virtual void *ExportToBuf(char *buf, size_t *len) const = 0; | |||
| virtual void Train() = 0; | |||
| /// \brief Save the trained model into a flatbuffer file | |||
| /// | |||
| /// \param[in] filename Filename to save flatbuffer to | |||
| /// | |||
| /// \return 0 on success or -1 in case of error | |||
| virtual int SaveToFile(const std::string &filename) const = 0; | |||
| /// \brief Set model to train mode | |||
| /// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h | |||
| virtual int Train() = 0; | |||
| /// \brief Check mode of model | |||
| /// | |||
| /// \return boolean indication if model is in train mode | |||
| bool IsTrain() { return train_mode_ == true; } | |||
| virtual void Eval() = 0; | |||
| /// \brief Set model to eval mode | |||
| /// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h | |||
| virtual int Eval() = 0; | |||
| /// \brief Check mode of model | |||
| /// | |||
| /// \return boolean indication if model is in eval mode | |||
| bool IsEval() { return train_mode_ == false; } | |||
| protected: | |||
| @@ -270,11 +270,13 @@ if (BUILD_MINDDATA STREQUAL "full") | |||
| ${CORE_DIR}/utils/ms_utils.cc | |||
| ) | |||
| find_package(Threads REQUIRED) | |||
| target_link_libraries(minddata-lite | |||
| securec | |||
| jpeg-turbo | |||
| jpeg | |||
| mindspore::json | |||
| Threads::Threads | |||
| ) | |||
| # ref: https://github.com/android/ndk/issues/1202 | |||
| @@ -55,20 +55,30 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset | |||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, | |||
| float *save_mean, float *save_var) { | |||
| float N = (float)param->unit_; | |||
| const float N = (float)param->unit_; | |||
| const float VN = N; | |||
| const float VNUB = (N > 1.0f) ? (N - 1.0f) : 1.0f; | |||
| const float momentum = (1.0f - param->momentum_); | |||
| for (int i = 0; i < param->unit_; i++) { | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| int idx = i * param->channel_ + c; | |||
| run_mean[c] += input[idx]; | |||
| run_var[c] += input[idx] * input[idx]; | |||
| } | |||
| } | |||
| const float VN = (N > 1.0f) ? (N - 1.0f) : 1.0f; | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| run_mean[c] = run_mean[c] / N; | |||
| run_var[c] = run_var[c] / VN - run_mean[c] * run_mean[c]; | |||
| save_mean[c] = param->momentum_ * save_mean[c] + (1 - param->momentum_) * run_mean[c]; | |||
| const float var = run_var[c]; | |||
| save_var[c] = param->momentum_ * save_var[c] + (1 - param->momentum_) * var; | |||
| run_mean[c] /= N; | |||
| } | |||
| for (int i = 0; i < param->unit_; i++) { | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| int idx = i * param->channel_ + c; | |||
| run_var[c] += (input[idx] - run_mean[c]) * (input[idx] - run_mean[c]); | |||
| } | |||
| } | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| float unbiased_var = (run_var[c] / VNUB); | |||
| run_var[c] = (run_var[c] / VN); | |||
| save_mean[c] = momentum * save_mean[c] + (1.0f - momentum) * run_mean[c]; | |||
| save_var[c] = momentum * save_var[c] + (1.0f - momentum) * unbiased_var; | |||
| } | |||
| } | |||
| @@ -72,7 +72,7 @@ int HSwishGrad(float *src0, float *src1, int length, float *dst) { | |||
| int HSigmoidGrad(float *src0, float *src1, int length, float *dst) { | |||
| for (int i = 0; i < length; ++i) { | |||
| float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); | |||
| float tmp = (src1[i] > 3.0f ? 0.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); | |||
| dst[i] = tmp * src0[i]; | |||
| } | |||
| return NNACL_OK; | |||
| @@ -15,6 +15,8 @@ | |||
| */ | |||
| #include "nnacl/fp32_grad/arithmetic_grad.h" | |||
| #include <string.h> | |||
| #include "nnacl/fp32_grad/utils.h" | |||
| void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| @@ -27,3 +29,103 @@ void ElementMulAndDivNegSquare(const float *a, const float *b, const float *deno | |||
| output[i] = -a[i] * b[i] / (denom[i] * denom[i]); | |||
| } | |||
| } | |||
| void MaximumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims, | |||
| const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims) { | |||
| int num_output0 = 1; | |||
| int num_output1 = 1; | |||
| int same_shape = 1; | |||
| for (int idx = 0; idx < num_dims; ++idx) { | |||
| num_output0 *= input0_dims[idx]; | |||
| num_output1 *= input1_dims[idx]; | |||
| if (input0_dims[idx] != input1_dims[idx]) { | |||
| same_shape = 0; | |||
| } | |||
| } | |||
| if (same_shape) { | |||
| int input_iter[8] = {0}; | |||
| // Iterate through input_data. | |||
| do { | |||
| size_t offset = GetInputOffset(num_dims, input0_dims, input_iter); | |||
| output0[offset] = input0[offset] > input1[offset] ? dy[offset] : 0.; | |||
| output1[offset] = input1[offset] >= input0[offset] ? dy[offset] : 0.; | |||
| } while (NextIndex(num_dims, input0_dims, input_iter)); | |||
| } else { | |||
| memset(output0, 0, num_output0 * sizeof(float)); // zero output | |||
| memset(output1, 0, num_output1 * sizeof(float)); // zero output | |||
| int input_iter[8] = {0}; | |||
| int axes0[5] = {0}; | |||
| int axes1[5] = {0}; | |||
| int num_axes0 = 0; | |||
| int num_axes1 = 0; | |||
| for (int i = 0; i < num_dims; i++) { | |||
| if (input0_dims[i] == 1) { | |||
| axes0[num_axes0++] = i; | |||
| } | |||
| if (input1_dims[i] == 1) { | |||
| axes1[num_axes1++] = i; | |||
| } | |||
| } | |||
| do { | |||
| size_t offset0 = GetOutputOffset(num_dims, input0_dims, input_iter, num_axes0, axes0); | |||
| size_t offset1 = GetOutputOffset(num_dims, input1_dims, input_iter, num_axes1, axes1); | |||
| size_t yt_offset = GetInputOffset(num_dims, input0_dims, input_iter); | |||
| output0[offset0] += input0[offset0] > input1[offset1] ? dy[yt_offset] : 0.; | |||
| output1[offset1] += input1[offset1] >= input0[offset0] ? dy[yt_offset] : 0.; | |||
| } while (NextIndex(num_dims, dy_dims, input_iter)); | |||
| } | |||
| } | |||
| void MinimumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims, | |||
| const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims) { | |||
| int num_output0 = 1; | |||
| int num_output1 = 1; | |||
| int same_shape = 1; | |||
| for (int idx = 0; idx < num_dims; ++idx) { | |||
| num_output0 *= input0_dims[idx]; | |||
| num_output1 *= input1_dims[idx]; | |||
| if (input0_dims[idx] != input1_dims[idx]) { | |||
| same_shape = 0; | |||
| } | |||
| } | |||
| if (same_shape) { | |||
| int input_iter[8] = {0}; | |||
| // Iterate through input_data. | |||
| do { | |||
| size_t offset = GetInputOffset(num_dims, input0_dims, input_iter); | |||
| output0[offset] = input0[offset] < input1[offset] ? dy[offset] : 0.; | |||
| output1[offset] = input1[offset] <= input0[offset] ? dy[offset] : 0.; | |||
| } while (NextIndex(num_dims, input0_dims, input_iter)); | |||
| } else { | |||
| memset(output0, 0, num_output0 * sizeof(float)); // zero output | |||
| memset(output1, 0, num_output1 * sizeof(float)); // zero output | |||
| int input_iter[8] = {0}; | |||
| int axes0[5] = {0}; | |||
| int axes1[5] = {0}; | |||
| int num_axes0 = 0; | |||
| int num_axes1 = 0; | |||
| for (int i = 0; i < num_dims; i++) { | |||
| if (input0_dims[i] == 1) { | |||
| axes0[num_axes0++] = i; | |||
| } | |||
| if (input1_dims[i] == 1) { | |||
| axes1[num_axes1++] = i; | |||
| } | |||
| } | |||
| do { | |||
| size_t offset0 = GetOutputOffset(num_dims, input0_dims, input_iter, num_axes0, axes0); | |||
| size_t offset1 = GetOutputOffset(num_dims, input1_dims, input_iter, num_axes1, axes1); | |||
| size_t yt_offset = GetInputOffset(num_dims, input0_dims, input_iter); | |||
| output0[offset0] += input0[offset0] < input1[offset1] ? dy[yt_offset] : 0.; | |||
| output1[offset1] += input1[offset1] <= input0[offset0] ? dy[yt_offset] : 0.; | |||
| } while (NextIndex(num_dims, dy_dims, input_iter)); | |||
| } | |||
| } | |||
| @@ -16,11 +16,17 @@ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_ARITHMETIC_GRAD_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_GRAD_ARITHMETIC_GRAD_H_ | |||
| #include "nnacl/op_base.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size); | |||
| void ElementMulAndDivNegSquare(const float *a, const float *b, const float *denom, float *output, int element_size); | |||
| void MaximumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims, | |||
| const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims); | |||
| void MinimumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims, | |||
| const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -17,66 +17,55 @@ | |||
| #include <string.h> | |||
| #include "nnacl/fp32_grad/batch_norm.h" | |||
| void sumSpatialBatch(const float *in, int size, int ch, float *out) { | |||
| void sumSpatialBatch(const float *in, size_t size, int ch, float *out) { | |||
| memset(out, 0, ch * sizeof(float)); | |||
| for (int i = 0; i < size; i++) { | |||
| const float *ptr = in + i * ch; | |||
| for (int c = 0; c < ch; c++) { | |||
| for (size_t i = 0; i < size; i++) { | |||
| const float *ptr = in + (i * ch); | |||
| for (size_t c = 0; c < ch; c++) { | |||
| out[c] += ptr[c]; | |||
| } | |||
| } | |||
| } | |||
| static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) { | |||
| float N = (float)(size); | |||
| sumSpatialBatch(in, N, ch, mean); | |||
| for (int f = 0; f < ch; ++f) { | |||
| mean[f] /= N; | |||
| } | |||
| for (int f = 0; f < ch; f++) { | |||
| float tvar = 0; | |||
| for (int i = 0; i < N; i++) { | |||
| float x = in[i * ch + f]; | |||
| tvar += (x - mean[f]) * (x - mean[f]); | |||
| } | |||
| invar[f] = 1.0f / (sqrt(tvar / N + eps)); | |||
| } | |||
| } | |||
| void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps, | |||
| float *mean, float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) { | |||
| meanVar(in, size, channels, eps, mean, invar); | |||
| for (int i = 0; i < size; i++) { | |||
| for (int f = 0; f < channels; f++) { | |||
| int ix = i * channels + f; | |||
| void backwardX(const float *in, const float *dout, const float *scale, const size_t size, int channels, float *mean, | |||
| float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) { | |||
| const float N = (size); | |||
| for (size_t i = 0; i < size; i++) { | |||
| for (size_t f = 0; f < channels; f++) { | |||
| size_t ix = i * channels + f; | |||
| float x_hat = (in[ix] - mean[f]) * invar[f]; | |||
| float dxhat = dout[ix] * scale[f]; | |||
| dxhat_sum[f] += dxhat; | |||
| dxhathat_sum[f] += dxhat * x_hat; | |||
| float dx_hat = dout[ix] * scale[f]; | |||
| dxhat_sum[f] += dx_hat; | |||
| dxhathat_sum[f] += dx_hat * x_hat; | |||
| } | |||
| } | |||
| for (int i = 0; i < size; i++) { | |||
| for (int f = 0; f < channels; f++) { | |||
| int ix = i * channels + f; | |||
| for (size_t i = 0; i < size; i++) { | |||
| for (size_t f = 0; f < channels; f++) { | |||
| size_t ix = i * channels + f; | |||
| float x_hat = (in[ix] - mean[f]) * invar[f]; | |||
| float dxhat = dout[ix] * scale[f]; | |||
| out[ix] = 1.f / size * invar[f] * (size * dxhat - dxhat_sum[f] - x_hat * dxhathat_sum[f]); | |||
| float dx_hat = dout[ix] * scale[f]; | |||
| out[ix] = 1.0f / N * (invar[f]) * (N * dx_hat - dxhat_sum[f] - x_hat * dxhathat_sum[f]); | |||
| } | |||
| } | |||
| } | |||
| void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, | |||
| int n, int size, float *scale_updates) { | |||
| int i, b, f; | |||
| void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n, | |||
| int size, float *scale_updates) { | |||
| size_t i, b, f; | |||
| memset(scale_updates, 0, n * sizeof(float)); | |||
| for (b = 0; b < batch; ++b) { | |||
| for (i = 0; i < size; ++i) { | |||
| for (f = 0; f < n; ++f) { | |||
| int index = (b * size + i) * n + f; | |||
| float x_norm = (x[index] - mean[f]) * invar[f]; | |||
| scale_updates[f] += delta[index] * x_norm; | |||
| scale_updates[f] += (delta[index] * x_norm); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void var2Invar(float *save_var, size_t size, float eps) { | |||
| for (size_t i = 0; i < size; i++) { | |||
| save_var[i] = 1.0f / sqrt(save_var[i] + eps); | |||
| } | |||
| } | |||
| @@ -29,11 +29,12 @@ typedef struct BNGradParameter { | |||
| extern "C" { | |||
| #endif | |||
| void sumSpatialBatch(const float *in, int size, int ch, float *out); | |||
| void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps, | |||
| float *mean, float *invar, float *xhat_sum, float *dxhat_sum, float *out); | |||
| void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, | |||
| int n, int size, float *scale_updates); | |||
| void sumSpatialBatch(const float *in, size_t size, int ch, float *out); | |||
| void backwardX(const float *in, const float *dout, const float *scale, const size_t size, int channels, float *mean, | |||
| float *invar, float *xhat_sum, float *dxhat_sum, float *out); | |||
| void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n, | |||
| int size, float *scale_updates); | |||
| void var2Invar(float *save_var, size_t size, float eps); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -0,0 +1,23 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp32_grad/dropout_grad.h" | |||
| void DropoutGrad(const float *yt_ptr, const float *mask, float *output_ptr, int length, float scale) { | |||
| for (int i = 0; i < length; i++) { | |||
| output_ptr[i] = yt_ptr[i] * mask[i] * scale; | |||
| } | |||
| } | |||
| @@ -0,0 +1,31 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_ | |||
| #include "nnacl/op_base.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DropoutGrad(const float *yt_ptr, const float *mask, float *output_ptr, int length, float ratio); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_ | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_ | |||
| #include "nnacl/op_base.h" | |||
| typedef struct DropoutParameter { | |||
| OpParameter op_parameter_; | |||
| float ratio_; | |||
| } DropoutParameter; | |||
| #endif // MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_ | |||
| @@ -16,182 +16,536 @@ | |||
| #include "nnacl/fp32_grad/gemm.h" | |||
| #include <string.h> | |||
| #ifdef __ARM_NEON | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #include "nnacl/fp32/matmul.h" | |||
| static void gemm_not_trana_not_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, | |||
| float *mat_c, int ldc) { | |||
| const int block_size = 4; | |||
| int block_mod = N % block_size; | |||
| int block_c4 = N - block_mod; | |||
| int i, j, k; | |||
| for (i = 0; i < M; ++i) { | |||
| for (k = 0; k < K; ++k) { | |||
| float a = alpha * mat_a[i * lda + k]; | |||
| for (j = 0; j < block_c4; j += block_size) { | |||
| float *b = &mat_b[k * ldb + j]; | |||
| float *c = &mat_c[i * ldc + j]; | |||
| c[0] += a * b[0]; | |||
| c[1] += a * b[1]; | |||
| c[2] += a * b[2]; | |||
| c[3] += a * b[3]; | |||
| } | |||
| for (; j < N; ++j) { | |||
| mat_c[i * ldc + j] += a * mat_b[k * ldb + j]; | |||
| } | |||
| static void addv(const float *restrict v1, float *restrict v2, float beta, int row, int col, int stride) { | |||
| const float *src_ptr = v1; | |||
| float *dst_ptr = v2; | |||
| for (int r = 0; r < row; r++) { | |||
| for (int c = 0; c < col; c++) { | |||
| dst_ptr[c] += beta * src_ptr[c]; | |||
| } | |||
| src_ptr += stride; | |||
| dst_ptr += stride; | |||
| } | |||
| } | |||
| int MatSize(int row, int col, int round) { | |||
| int res = UP_ROUND(row, round) * col; | |||
| return res; | |||
| } | |||
| int MatSizeTotal(int row, int col, int deep, int stride) { | |||
| #ifdef ENABLE_ARM32 | |||
| const int num = C4NUM; | |||
| #else | |||
| const int num = C12NUM; | |||
| #endif | |||
| int res = MatSize(row, deep, num) + MatSize(col, deep, C8NUM); | |||
| if (stride > 0) res += row * stride; | |||
| return res; | |||
| } | |||
| #ifdef ENABLE_ARM32 | |||
| static void RowMajor2Row4MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) { | |||
| for (int r = 0; r < row; r++) { | |||
| const float *src = src_ptr + r * lead; | |||
| for (int c = 0; c < col; c++) { | |||
| int cd8 = c / 4; | |||
| int cm8 = c % 4; | |||
| dst_ptr[cd8 * 4 * row + r * 4 + cm8] = src[c]; | |||
| } | |||
| } | |||
| } | |||
| #endif | |||
| static void RowMajor2Row8MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) { | |||
| for (int r = 0; r < row; r++) { | |||
| const float *src = src_ptr + r * lead; | |||
| for (int c = 0; c < col; c++) { | |||
| int cd8 = c / 8; | |||
| int cm8 = c % 8; | |||
| dst_ptr[cd8 * 8 * row + r * 8 + cm8] = src[c]; | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| #ifndef ENABLE_ARM32 | |||
| static void RowMajor2Row12MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) { | |||
| for (int r = 0; r < row; r++) { | |||
| const float *src = src_ptr + r * lead; | |||
| for (int c = 0; c < col; c++) { | |||
| int cd8 = c / C12NUM; | |||
| int cm8 = c % C12NUM; | |||
| dst_ptr[cd8 * C12NUM * row + r * C12NUM + cm8] = src[c]; | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| static void gemm_not_trana_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, | |||
| float *mat_c, int ldc) { | |||
| const int block_size = 4; | |||
| int block_mod = K % block_size; | |||
| int block_c4 = K - block_mod; | |||
| int i, j, k; | |||
| for (i = 0; i < M; ++i) { | |||
| for (j = 0; j < N; ++j) { | |||
| float sum = 0; | |||
| for (k = 0; k < block_c4; k += block_size) { | |||
| float *a = &mat_a[i * lda + k]; | |||
| float *b = &mat_b[j * ldb + k]; | |||
| sum += alpha * a[0] * b[0]; | |||
| sum += alpha * a[1] * b[1]; | |||
| sum += alpha * a[2] * b[2]; | |||
| sum += alpha * a[3] * b[3]; | |||
| static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) { | |||
| size_t row_up_12 = UP_ROUND(row, C12NUM); | |||
| size_t row12 = row / C12NUM * C12NUM; | |||
| size_t col4 = col / C4NUM * C4NUM; | |||
| const float *src_r = src_ptr; | |||
| float *dst_r = dst_ptr; | |||
| size_t ri = 0; | |||
| for (; ri < row12; ri += C12NUM) { | |||
| size_t ci = 0; | |||
| for (; ci < col4; ci += C4NUM) { | |||
| const float *src_c = src_r + ci; | |||
| float *dst_c = dst_r + ci * C12NUM; | |||
| /* 12x4 row-major to col-major */ | |||
| #ifdef ENABLE_ARM64 | |||
| size_t stride = lead * sizeof(float); | |||
| asm volatile( | |||
| "mov x10, %[src_c]\n" | |||
| "mov x11, %[dst_c]\n" | |||
| "ld1 {v0.4s}, [x10], %[stride]\n" | |||
| "ld1 {v1.4s}, [x10], %[stride]\n" | |||
| "ld1 {v2.4s}, [x10], %[stride]\n" | |||
| "ld1 {v3.4s}, [x10], %[stride]\n" | |||
| "ld1 {v4.4s}, [x10], %[stride]\n" | |||
| "ld1 {v5.4s}, [x10], %[stride]\n" | |||
| "ld1 {v6.4s}, [x10], %[stride]\n" | |||
| "ld1 {v7.4s}, [x10], %[stride]\n" | |||
| "zip1 v12.4s, v0.4s, v1.4s\n" | |||
| "zip2 v13.4s, v0.4s, v1.4s\n" | |||
| "zip1 v14.4s, v2.4s, v3.4s\n" | |||
| "zip2 v15.4s, v2.4s, v3.4s\n" | |||
| "ld1 {v8.4s}, [x10], %[stride]\n" | |||
| "ld1 {v9.4s}, [x10], %[stride]\n" | |||
| "ld1 {v10.4s}, [x10], %[stride]\n" | |||
| "ld1 {v11.4s}, [x10], %[stride]\n" | |||
| "zip1 v16.4s, v4.4s, v5.4s\n" | |||
| "zip2 v17.4s, v4.4s, v5.4s\n" | |||
| "zip1 v18.4s, v6.4s, v7.4s\n" | |||
| "zip2 v19.4s, v6.4s, v7.4s\n" | |||
| "trn1 v20.2d, v12.2d, v14.2d\n" | |||
| "trn2 v23.2d, v12.2d, v14.2d\n" | |||
| "trn1 v26.2d, v13.2d, v15.2d\n" | |||
| "trn2 v29.2d, v13.2d, v15.2d\n" | |||
| "trn1 v21.2d, v16.2d, v18.2d\n" | |||
| "trn2 v24.2d, v16.2d, v18.2d\n" | |||
| "trn1 v27.2d, v17.2d, v19.2d\n" | |||
| "trn2 v30.2d, v17.2d, v19.2d\n" | |||
| "zip1 v12.4s, v8.4s, v9.4s\n" | |||
| "zip2 v13.4s, v8.4s, v9.4s\n" | |||
| "zip1 v14.4s, v10.4s, v11.4s\n" | |||
| "zip2 v15.4s, v10.4s, v11.4s\n" | |||
| "trn1 v22.2d, v12.2d, v14.2d\n" | |||
| "trn2 v25.2d, v12.2d, v14.2d\n" | |||
| "trn1 v28.2d, v13.2d, v15.2d\n" | |||
| "trn2 v31.2d, v13.2d, v15.2d\n" | |||
| "st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x11], #64\n" | |||
| "st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x11], #64\n" | |||
| "st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x11], #64\n" | |||
| : | |||
| : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) | |||
| : "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", | |||
| "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", | |||
| "v30", "v31"); | |||
| #elif ENABLE_ARM32 | |||
| size_t stride = lead * sizeof(float); | |||
| asm volatile( | |||
| "mov r10, %[src_c]\n" | |||
| "mov r12, %[dst_c]\n" | |||
| "vld1.32 {q0}, [r10], %[stride]\n" | |||
| "vld1.32 {q3}, [r10], %[stride]\n" | |||
| "vld1.32 {q10}, [r10], %[stride]\n" | |||
| "vld1.32 {q13}, [r10], %[stride]\n" | |||
| "vtrn.32 d0, d6\n" | |||
| "vtrn.32 d1, d7\n" | |||
| "vtrn.32 d20, d26\n" | |||
| "vtrn.32 d21, d27\n" | |||
| "vld1.32 {q1}, [r10], %[stride]\n" | |||
| "vld1.32 {q8}, [r10], %[stride]\n" | |||
| "vld1.32 {q11}, [r10], %[stride]\n" | |||
| "vld1.32 {q14}, [r10], %[stride]\n" | |||
| "vswp d1, d20\n" | |||
| "vswp d7, d26\n" | |||
| "vld1.32 {q2}, [r10], %[stride]\n" | |||
| "vld1.32 {q9}, [r10], %[stride]\n" | |||
| "vld1.32 {q12}, [r10], %[stride]\n" | |||
| "vld1.32 {q15}, [r10], %[stride]\n" | |||
| "vtrn.32 d2, d16\n" | |||
| "vtrn.32 d3, d17\n" | |||
| "vtrn.32 d22, d28\n" | |||
| "vtrn.32 d23, d29\n" | |||
| "vswp d3, d22\n" | |||
| "vswp d17, d28\n" | |||
| "vtrn.32 d4, d18\n" | |||
| "vtrn.32 d5, d19\n" | |||
| "vtrn.32 d24, d30\n" | |||
| "vtrn.32 d25, d31\n" | |||
| "vswp d5, d24\n" | |||
| "vswp d19, d30\n" | |||
| "vst1.32 {q0, q1}, [r12]!\n" | |||
| "vst1.32 {q2, q3}, [r12]!\n" | |||
| "vst1.32 {q8, q9}, [r12]!\n" | |||
| "vst1.32 {q10, q11}, [r12]!\n" | |||
| "vst1.32 {q12, q13}, [r12]!\n" | |||
| "vst1.32 {q14, q15}, [r12]!\n" | |||
| : | |||
| : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) | |||
| : "r10", "r12", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); | |||
| #else | |||
| for (int tr = 0; tr < C12NUM; tr++) { | |||
| for (int tc = 0; tc < C4NUM; tc++) { | |||
| dst_c[tc * C12NUM + tr] = src_c[tr * lead + tc]; | |||
| } | |||
| } | |||
| for (; k < K; ++k) { | |||
| sum += alpha * mat_a[i * lda + k] * mat_b[j * ldb + k]; | |||
| #endif | |||
| } | |||
| for (; ci < col; ci++) { | |||
| const float *src_c = src_r + ci; | |||
| float *dst_c = dst_r + ci * C12NUM; | |||
| for (size_t i = 0; i < C12NUM; i++) { | |||
| dst_c[i] = src_c[i * lead]; | |||
| } | |||
| mat_c[i * ldc + j] += sum; | |||
| } | |||
| src_r += C12NUM * lead; | |||
| dst_r += C12NUM * col; | |||
| } | |||
| for (; ri < row; ri++) { | |||
| for (size_t i = 0; i < col; i++) { | |||
| dst_r[i * C12NUM] = src_r[i]; | |||
| } | |||
| src_r += lead; | |||
| dst_r += 1; | |||
| } | |||
| for (; ri < row_up_12; ri++) { | |||
| for (size_t i = 0; i < col; i++) { | |||
| dst_r[i * C12NUM] = 0; | |||
| } | |||
| dst_r += 1; | |||
| } | |||
| return; | |||
| } | |||
| #endif | |||
| static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) { | |||
| size_t row8 = row / C8NUM * C8NUM; | |||
| #ifdef ENABLE_ARM64 | |||
| size_t col_skip = col / C8NUM * C8NUM; | |||
| int skip_size = C8NUM; | |||
| #else | |||
| size_t col_skip = col / C4NUM * C4NUM; | |||
| int skip_size = C4NUM; | |||
| #endif | |||
| const float *src_r = src_ptr; | |||
| float *dst_r = dst_ptr; | |||
| size_t ri = 0; | |||
| for (; ri < row8; ri += C8NUM) { | |||
| size_t ci = 0; | |||
| for (; ci < col_skip; ci += skip_size) { | |||
| const float *src_c = src_r + ci; | |||
| float *dst_c = dst_r + ci * C8NUM; | |||
| #ifdef ENABLE_ARM64 | |||
| /* 8x8 row-major to col-major */ | |||
| size_t stride = lead * sizeof(float); | |||
| asm volatile( | |||
| "mov x10, %[src_c]\n" | |||
| "mov x11, %[dst_c]\n" | |||
| "ld1 {v0.4s, v1.4s}, [x10], %[stride]\n" | |||
| "ld1 {v2.4s, v3.4s}, [x10], %[stride]\n" | |||
| "ld1 {v4.4s, v5.4s}, [x10], %[stride]\n" | |||
| "ld1 {v6.4s, v7.4s}, [x10], %[stride]\n" | |||
| "zip1 v8.4s, v0.4s, v2.4s\n" | |||
| "zip2 v9.4s, v0.4s, v2.4s\n" | |||
| "zip1 v10.4s, v4.4s, v6.4s\n" | |||
| "zip2 v11.4s, v4.4s, v6.4s\n" | |||
| "ld1 {v16.4s, v17.4s}, [x10], %[stride]\n" | |||
| "ld1 {v18.4s, v19.4s}, [x10], %[stride]\n" | |||
| "ld1 {v20.4s, v21.4s}, [x10], %[stride]\n" | |||
| "ld1 {v22.4s, v23.4s}, [x10], %[stride]\n" | |||
| "zip1 v12.4s, v1.4s, v3.4s\n" | |||
| "zip2 v13.4s, v1.4s, v3.4s\n" | |||
| "zip1 v14.4s, v5.4s, v7.4s\n" | |||
| "zip2 v15.4s, v5.4s, v7.4s\n" | |||
| "trn1 v0.2d, v8.2d, v10.2d\n" | |||
| "trn2 v1.2d, v8.2d, v10.2d\n" | |||
| "trn1 v2.2d, v9.2d, v11.2d\n" | |||
| "trn2 v3.2d, v9.2d, v11.2d\n" | |||
| "zip1 v24.4s, v16.4s, v18.4s\n" | |||
| "zip2 v25.4s, v16.4s, v18.4s\n" | |||
| "zip1 v26.4s, v20.4s, v22.4s\n" | |||
| "zip2 v27.4s, v20.4s, v22.4s\n" | |||
| "trn1 v4.2d, v12.2d, v14.2d\n" | |||
| "trn2 v5.2d, v12.2d, v14.2d\n" | |||
| "trn1 v6.2d, v13.2d, v15.2d\n" | |||
| "trn2 v7.2d, v13.2d, v15.2d\n" | |||
| "zip1 v28.4s, v17.4s, v19.4s\n" | |||
| "zip2 v29.4s, v17.4s, v19.4s\n" | |||
| "zip1 v30.4s, v21.4s, v23.4s\n" | |||
| "zip2 v31.4s, v21.4s, v23.4s\n" | |||
| "trn1 v16.2d, v24.2d, v26.2d\n" | |||
| "trn2 v17.2d, v24.2d, v26.2d\n" | |||
| "trn1 v18.2d, v25.2d, v27.2d\n" | |||
| "trn2 v19.2d, v25.2d, v27.2d\n" | |||
| "trn1 v20.2d, v28.2d, v30.2d\n" | |||
| "trn2 v21.2d, v28.2d, v30.2d\n" | |||
| "trn1 v22.2d, v29.2d, v31.2d\n" | |||
| "trn2 v23.2d, v29.2d, v31.2d\n" | |||
| "st1 {v0.4s}, [x11], #16\n" | |||
| "st1 {v16.4s}, [x11], #16\n" | |||
| "st1 {v1.4s}, [x11], #16\n" | |||
| "st1 {v17.4s}, [x11], #16\n" | |||
| "st1 {v2.4s}, [x11], #16\n" | |||
| "st1 {v18.4s}, [x11], #16\n" | |||
| "st1 {v3.4s}, [x11], #16\n" | |||
| "st1 {v19.4s}, [x11], #16\n" | |||
| "st1 {v4.4s}, [x11], #16\n" | |||
| "st1 {v20.4s}, [x11], #16\n" | |||
| "st1 {v5.4s}, [x11], #16\n" | |||
| "st1 {v21.4s}, [x11], #16\n" | |||
| "st1 {v6.4s}, [x11], #16\n" | |||
| "st1 {v22.4s}, [x11], #16\n" | |||
| "st1 {v7.4s}, [x11], #16\n" | |||
| "st1 {v23.4s}, [x11], #16\n" | |||
| : | |||
| : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) | |||
| : "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", | |||
| "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", | |||
| "v30", "v31"); | |||
| #elif ENABLE_ARM32 | |||
| /* 8x4 row-major to col-major */ | |||
| size_t stride = col * sizeof(float); | |||
| asm volatile( | |||
| "mov r10, %[src_c]\n" | |||
| "mov r11, %[dst_c]\n" | |||
| "vld1.32 {q0}, [r10], %[stride]\n" | |||
| "vld1.32 {q2}, [r10], %[stride]\n" | |||
| "vld1.32 {q4}, [r10], %[stride]\n" | |||
| "vld1.32 {q6}, [r10], %[stride]\n" | |||
| "vtrn.32 d0, d4\n" | |||
| "vtrn.32 d1, d5\n" | |||
| "vtrn.32 d8, d12\n" | |||
| "vtrn.32 d9, d13\n" | |||
| "vld1.32 {q1}, [r10], %[stride]\n" | |||
| "vld1.32 {q3}, [r10], %[stride]\n" | |||
| "vld1.32 {q5}, [r10], %[stride]\n" | |||
| "vld1.32 {q7}, [r10], %[stride]\n" | |||
| static void gemm_trana_not_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, | |||
| float *mat_c, int ldc) { | |||
| const int block_size = 4; | |||
| int block_mod = N % block_size; | |||
| int block_c4 = N - block_mod; | |||
| int i, j, k; | |||
| for (i = 0; i < M; ++i) { | |||
| for (k = 0; k < K; ++k) { | |||
| float a = alpha * mat_a[k * lda + i]; | |||
| for (j = 0; j < block_c4; j += block_size) { | |||
| float *b = &mat_b[k * ldb + j]; | |||
| float *c = &mat_c[i * ldc + j]; | |||
| c[0] += a * b[0]; | |||
| c[1] += a * b[1]; | |||
| c[2] += a * b[2]; | |||
| c[3] += a * b[3]; | |||
| "vswp d1, d8\n" | |||
| "vswp d5, d12\n" | |||
| "vtrn.32 d2, d6\n" | |||
| "vtrn.32 d3, d7\n" | |||
| "vtrn.32 d10, d14\n" | |||
| "vtrn.32 d11, d15\n" | |||
| "vswp d3, d10\n" | |||
| "vswp d7, d14\n" | |||
| "vst1.32 {q0, q1}, [r11]!\n" | |||
| "vst1.32 {q2, q3}, [r11]!\n" | |||
| "vst1.32 {q4, q5}, [r11]!\n" | |||
| "vst1.32 {q6, q7}, [r11]!\n" | |||
| : | |||
| : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) | |||
| : "r10", "r11", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"); | |||
| #else | |||
| for (int tr = 0; tr < 8; tr++) { | |||
| for (int tc = 0; tc < 4; tc++) { | |||
| dst_c[tc * 8 + tr] = src_c[tr * lead + tc]; | |||
| } | |||
| } | |||
| for (; j < N; ++j) { | |||
| mat_c[i * ldc + j] += a * mat_b[k * ldb + j]; | |||
| #endif | |||
| } | |||
| for (; ci < col; ci++) { | |||
| const float *src_c = src_r + ci; | |||
| float *dst_c = dst_r + ci * C8NUM; | |||
| for (size_t i = 0; i < C8NUM; i++) { | |||
| dst_c[i] = src_c[i * lead]; | |||
| } | |||
| } | |||
| src_r += C8NUM * lead; | |||
| dst_r += C8NUM * col; | |||
| } | |||
| for (; ri < row; ri++) { | |||
| for (size_t i = 0; i < col; i++) { | |||
| dst_r[i * C8NUM] = src_r[i]; | |||
| } | |||
| src_r += lead; | |||
| dst_r += 1; | |||
| } | |||
| return; | |||
| } | |||
| #ifdef ENABLE_ARM32 | |||
| static void RowMajor2Col4MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) { | |||
| size_t row8 = row / C4NUM * C4NUM; | |||
| size_t col4 = col / C4NUM * C4NUM; | |||
| const float *src_r = src_ptr; | |||
| float *dst_r = dst_ptr; | |||
| size_t ri = 0; | |||
| for (; ri < row8; ri += C4NUM) { | |||
| size_t ci = 0; | |||
| for (; ci < col4; ci += C4NUM) { | |||
| const float *src_c = src_r + ci; | |||
| float *dst_c = dst_r + ci * C4NUM; | |||
| /* 4x4 row-major to col-major */ | |||
| #ifdef ENABLE_ARM32 | |||
| size_t stride = col * 4; | |||
| asm volatile( | |||
| "mov r10, %[src_c]\n" | |||
| "mov r12, %[dst_c]\n" | |||
| "vld1.32 {q0}, [r10], %[stride]\n" | |||
| "vld1.32 {q1}, [r10], %[stride]\n" | |||
| "vld1.32 {q2}, [r10], %[stride]\n" | |||
| "vld1.32 {q3}, [r10], %[stride]\n" | |||
| "vtrn.32 d0, d2\n" | |||
| "vtrn.32 d1, d3\n" | |||
| "vtrn.32 d4, d6\n" | |||
| "vtrn.32 d5, d7\n" | |||
| static void gemm_trana_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, | |||
| float *mat_c, int ldc) { | |||
| int i, j, k; | |||
| const int block_size = 4; | |||
| int k_block_mod = K % block_size; | |||
| int k_block_c4 = K - k_block_mod; | |||
| int m_block_mod = M % block_size; | |||
| int m_block_c4 = M - m_block_mod; | |||
| for (i = 0; i < m_block_c4; i += block_size) { | |||
| for (j = 0; j < N; ++j) { | |||
| float sum0 = 0; | |||
| float sum1 = 0; | |||
| float sum2 = 0; | |||
| float sum3 = 0; | |||
| for (k = 0; k < k_block_c4; k += block_size) { | |||
| float *b = &mat_b[j * ldb + k]; | |||
| sum0 += alpha * mat_a[i + k * lda] * b[0]; | |||
| sum0 += alpha * mat_a[i + (k + 1) * lda] * b[1]; | |||
| sum0 += alpha * mat_a[i + (k + 2) * lda] * b[2]; | |||
| sum0 += alpha * mat_a[i + (k + 3) * lda] * b[3]; | |||
| sum1 += alpha * mat_a[i + 1 + k * lda] * b[0]; | |||
| sum1 += alpha * mat_a[i + 1 + (k + 1) * lda] * b[1]; | |||
| sum1 += alpha * mat_a[i + 1 + (k + 2) * lda] * b[2]; | |||
| sum1 += alpha * mat_a[i + 1 + (k + 3) * lda] * b[3]; | |||
| sum2 += alpha * mat_a[i + 2 + k * lda] * b[0]; | |||
| sum2 += alpha * mat_a[i + 2 + (k + 1) * lda] * b[1]; | |||
| sum2 += alpha * mat_a[i + 2 + (k + 2) * lda] * b[2]; | |||
| sum2 += alpha * mat_a[i + 2 + (k + 3) * lda] * b[3]; | |||
| sum3 += alpha * mat_a[i + 3 + k * lda] * b[0]; | |||
| sum3 += alpha * mat_a[i + 3 + (k + 1) * lda] * b[1]; | |||
| sum3 += alpha * mat_a[i + 3 + (k + 2) * lda] * b[2]; | |||
| sum3 += alpha * mat_a[i + 3 + (k + 3) * lda] * b[3]; | |||
| "vswp d1, d4\n" | |||
| "vswp d3, d6\n" | |||
| "vst1.32 {q0}, [r12]!\n" | |||
| "vst1.32 {q1}, [r12]!\n" | |||
| "vst1.32 {q2}, [r12]!\n" | |||
| "vst1.32 {q3}, [r12]!\n" | |||
| : | |||
| : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) | |||
| : "r10", "r12", "q0", "q1", "q2", "q3"); | |||
| #else | |||
| for (int tr = 0; tr < C4NUM; tr++) { | |||
| for (int tc = 0; tc < C4NUM; tc++) { | |||
| dst_c[tc * C4NUM + tr] = src_c[tr * lead + tc]; | |||
| } | |||
| } | |||
| for (; k < K; ++k) { | |||
| float *b = &mat_b[j * ldb + k]; | |||
| sum0 += alpha * mat_a[i + (k * lda)] * b[0]; | |||
| sum1 += alpha * mat_a[i + 1 + (k * lda)] * b[0]; | |||
| sum2 += alpha * mat_a[i + 2 + (k * lda)] * b[0]; | |||
| sum3 += alpha * mat_a[i + 3 + (k * lda)] * b[0]; | |||
| #endif | |||
| } | |||
| for (; ci < col; ci++) { | |||
| const float *src_c = src_r + ci; | |||
| float *dst_c = dst_r + ci * C4NUM; | |||
| for (size_t i = 0; i < C4NUM; i++) { | |||
| dst_c[i] = src_c[i * lead]; | |||
| } | |||
| mat_c[i * ldc + j] += sum0; | |||
| mat_c[(i + 1) * ldc + j] += sum1; | |||
| mat_c[(i + 2) * ldc + j] += sum2; | |||
| mat_c[(i + 3) * ldc + j] += sum3; | |||
| } | |||
| src_r += C4NUM * col; | |||
| dst_r += C4NUM * col; | |||
| } | |||
| // no more block of 4x4 | |||
| for (; i < M; ++i) { | |||
| for (j = 0; j < N; ++j) { | |||
| float sum = 0; | |||
| for (k = 0; k < K; ++k) { | |||
| sum += alpha * mat_a[i + k * lda] * mat_b[k + j * ldb]; | |||
| } | |||
| mat_c[i * ldc + j] += sum; | |||
| for (; ri < row; ri++) { | |||
| for (size_t i = 0; i < col; i++) { | |||
| dst_r[i * C4NUM] = src_r[i]; | |||
| } | |||
| src_r += lead; | |||
| dst_r += 1; | |||
| } | |||
| return; | |||
| } | |||
| #endif | |||
| void GemmMatmul(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b, | |||
| int ldb, float beta, float *mat_c, int ldc, float *workspace) { | |||
| GemmCb gcb; | |||
| gcb.atype = ActType_No; | |||
| gcb.ca = 0; | |||
| gcb.cb = 0; | |||
| gcb.bias = NULL; | |||
| GemmMatmulPlus(ta, tb, M, N, K, alpha, mat_a, lda, mat_b, ldb, beta, mat_c, ldc, workspace, &gcb); | |||
| } | |||
| // mat_c = alpha*op( mat_a )*op( mat_b ) + beta*C | |||
| // M - number of rows of matrix a | |||
| // N - number of cols of matrix b | |||
| // K - number of cols of matrix a | |||
| // lda - fast dim of matrix a | |||
| // ldb - fast dim of matrix b | |||
| // ldc - fast dim of matrix c | |||
| void gemm(int transpose_a, int transpose_b, int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, | |||
| int ldb, float beta, float *mat_c, int ldc) { | |||
| if (beta >= 0.f && beta <= 0.f) { | |||
| memset(mat_c, 0, M * N * sizeof(float)); | |||
| } else if (beta < 1.f || beta > 1.f) { | |||
| const int block_size = 4; | |||
| const int size = M * N; | |||
| int block_mod = size % block_size; | |||
| int block_c4 = size - block_mod; | |||
| int i; | |||
| for (i = 0; i < block_c4; i += block_size) { | |||
| float *c = &mat_c[i]; | |||
| c[0] *= beta; | |||
| c[1] *= beta; | |||
| c[2] *= beta; | |||
| c[3] *= beta; | |||
| } | |||
| for (; i < size; ++i) { | |||
| mat_c[i] *= beta; | |||
| void GemmMatmulPlus(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b, | |||
| int ldb, float beta, float *mat_c, int ldc, float *workspace, GemmCb *gcb) { | |||
| #ifdef ENABLE_ARM32 | |||
| const int num = C4NUM; | |||
| #else | |||
| const int num = C12NUM; | |||
| #endif | |||
| float *output = mat_c; | |||
| float *fworkspace = workspace; | |||
| int incremental = (beta < 0.f) || (beta > 0.f); | |||
| float *mat_a_input = (float *)mat_a; | |||
| float *mat_b_input = (float *)mat_b; | |||
| #ifdef ENABLE_ARM32 | |||
| if (!gcb->ca) { | |||
| mat_a_input = fworkspace; | |||
| fworkspace += MatSize(M, K, num); | |||
| if (ta) { | |||
| RowMajor2Row4MajorStride(mat_a, mat_a_input, K, M, lda); | |||
| } else { | |||
| RowMajor2Col4MajorStride(mat_a, mat_a_input, M, K, lda); | |||
| } | |||
| } | |||
| if (transpose_a && transpose_b) { | |||
| gemm_trana_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc); | |||
| } else if (!transpose_a && !transpose_b) { | |||
| gemm_not_trana_not_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc); | |||
| } else if (!transpose_a && transpose_b) { | |||
| gemm_not_trana_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc); | |||
| } else { | |||
| gemm_trana_not_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc); | |||
| #else | |||
| if (!gcb->ca) { | |||
| mat_a_input = fworkspace; | |||
| fworkspace += MatSize(M, K, num); | |||
| if (ta) { | |||
| RowMajor2Row12MajorStride(mat_a, mat_a_input, K, M, lda); | |||
| } else { | |||
| RowMajor2Col12MajorStride(mat_a, mat_a_input, M, K, lda); | |||
| } | |||
| } | |||
| #endif | |||
| if (!gcb->cb) { | |||
| mat_b_input = fworkspace; | |||
| fworkspace += MatSize(N, K, C8NUM); | |||
| if (tb) { | |||
| RowMajor2Col8MajorStride(mat_b, mat_b_input, N, K, ldb); | |||
| } else { | |||
| RowMajor2Row8MajorStride(mat_b, mat_b_input, K, N, ldb); | |||
| } | |||
| } | |||
| if (incremental) output = fworkspace; | |||
| MatMulOpt(mat_a_input, mat_b_input, output, gcb->bias, gcb->atype, K, M, N, ldc, OutType_Nhwc); | |||
| if (incremental) addv(output, mat_c, beta, M, N, ldc); | |||
| gcb->mat_a = mat_a_input; | |||
| gcb->mat_b = mat_b_input; | |||
| } | |||
| @@ -17,11 +17,26 @@ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_GEMM_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_GRAD_GEMM_H_ | |||
| #include <stdlib.h> | |||
| #include "nnacl/op_base.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void gemm(int transpose_a, int transpose_b, int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, | |||
| int ldb, float beta, float *mat_c, int ldc); | |||
| typedef struct { | |||
| int ca; | |||
| int cb; | |||
| ActType atype; | |||
| float *bias; | |||
| float *mat_a; | |||
| float *mat_b; | |||
| } GemmCb; | |||
| void GemmMatmulPlus(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b, | |||
| int ldb, float beta, float *mat_c, int ldc, float *workspace, GemmCb *cb); | |||
| void GemmMatmul(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b, | |||
| int ldb, float beta, float *mat_c, int ldc, float *workspace); | |||
| int MatSize(int row, int col, int round); | |||
| int MatSizeTotal(int row, int col, int deep, int inc); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -16,10 +16,11 @@ | |||
| #include <string.h> | |||
| #include "nnacl/fp32_grad/pack_ext.h" | |||
| #include "nnacl/pack.h" | |||
| static int is_a_ge_zero_and_a_lt_b(int a, int b) { return (unsigned)(a) < (unsigned)(b); } | |||
| void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param) { | |||
| void rolling_im2col_hwc(const float *in_data, float *data_col, const ConvParameter *conv_param, int rows, int start) { | |||
| const int pad_left = conv_param->pad_l_; | |||
| const int pad_up = conv_param->pad_u_; | |||
| @@ -35,42 +36,42 @@ void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param | |||
| const int in_height = conv_param->input_h_; | |||
| const int in_width = conv_param->input_w_; | |||
| const int output_h = conv_param->output_h_; | |||
| const int output_w = conv_param->output_w_; | |||
| const int channels = conv_param->input_channel_ / conv_param->group_; | |||
| const int tot_channels = conv_param->input_channel_; | |||
| int kernel_row, kernel_col, output_rows, output_col; | |||
| int row_stride_offset = 0; | |||
| int kernel_row, kernel_col; | |||
| for (output_rows = output_h; output_rows; output_rows--) { | |||
| int col_stride_offset = 0; | |||
| for (output_col = output_w; output_col; output_col--) { | |||
| for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { | |||
| int input_row = -pad_up + kernel_row * dilation_h + row_stride_offset; | |||
| for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { | |||
| int input_col = -pad_left + kernel_col * dilation_w + col_stride_offset; | |||
| for (int i = 0; i < rows; i++) { | |||
| int block_start = start + i; | |||
| int input_h = block_start / output_w * stride_h; | |||
| int input_w = block_start % output_w * stride_w; | |||
| for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { | |||
| int input_row = -pad_up + kernel_row * dilation_h + input_h; | |||
| for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { | |||
| int input_col = -pad_left + kernel_col * dilation_w + input_w; | |||
| if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) { | |||
| const int offset = (input_row * in_width + input_col) * tot_channels; | |||
| memcpy(data_col, in_data + offset, sizeof(float) * channels); | |||
| data_col += channels; | |||
| } else { | |||
| memset(data_col, 0, sizeof(float) * channels); | |||
| data_col += channels; | |||
| } | |||
| if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) { | |||
| const int offset = (input_row * in_width + input_col) * tot_channels; | |||
| memcpy(data_col, in_data + offset, sizeof(float) * channels); | |||
| data_col += channels; | |||
| } else { | |||
| memset(data_col, 0, sizeof(float) * channels); | |||
| data_col += channels; | |||
| } | |||
| } | |||
| col_stride_offset += stride_w; | |||
| } | |||
| row_stride_offset += stride_h; | |||
| } | |||
| } | |||
| void RollingIm2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, | |||
| int real_cal_num, int block_index) { | |||
| rolling_im2col_hwc(input_data, packed_input, conv_param, real_cal_num, block_index); | |||
| } | |||
| // output matrix is (kernel_h*kernel_w*channels)X(output_h*output_w) | |||
| void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param, bool transpose) { | |||
| void im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, bool transpose) { | |||
| const int pad_left = conv_param->pad_l_; | |||
| const int pad_up = conv_param->pad_u_; | |||
| @@ -150,7 +151,56 @@ void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param | |||
| } | |||
| } | |||
| void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param) { | |||
| void rolling_im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, int rows, int start) { | |||
| const int pad_left = conv_param->pad_l_; | |||
| const int pad_up = conv_param->pad_u_; | |||
| const int stride_h = conv_param->stride_h_; | |||
| const int stride_w = conv_param->stride_w_; | |||
| const int dilation_h = conv_param->dilation_h_; | |||
| const int dilation_w = conv_param->dilation_w_; | |||
| const int kernel_h = conv_param->kernel_h_; | |||
| const int kernel_w = conv_param->kernel_w_; | |||
| const int in_height = conv_param->output_h_; | |||
| const int in_width = conv_param->output_w_; | |||
| const int output_w = conv_param->input_w_; | |||
| const int tot_channels = conv_param->output_channel_; | |||
| const int channels = tot_channels / conv_param->group_; | |||
| int channel, kernel_row, kernel_col, output_rows, output_col; | |||
| for (channel = 0; channel < channels; channel++) { | |||
| for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { | |||
| for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { | |||
| for (output_rows = start; output_rows < start + rows; output_rows++) { | |||
| int input_row = -pad_up + kernel_row * dilation_h + output_rows * stride_h; | |||
| if (!is_a_ge_zero_and_a_lt_b(input_row, in_height)) { | |||
| for (output_col = output_w; output_col; output_col--) { | |||
| *(data_row++) = 0; | |||
| } | |||
| } else { | |||
| int input_col = -pad_left + kernel_col * dilation_w; | |||
| for (output_col = output_w; output_col; output_col--) { | |||
| if (is_a_ge_zero_and_a_lt_b(input_col, in_width)) { | |||
| const int offset = (input_row * in_width + input_col) * tot_channels + channel; | |||
| *(data_row++) = in_data[offset]; | |||
| } else { | |||
| *(data_row++) = 0; | |||
| } | |||
| input_col += stride_w; | |||
| } | |||
| } | |||
| // input_row += stride_h; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param) { | |||
| const int pad_left = conv_param->pad_l_; | |||
| const int pad_up = conv_param->pad_u_; | |||
| @@ -198,3 +248,52 @@ void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param | |||
| row_stride_offset += stride_h; | |||
| } | |||
| } | |||
| void rolling_col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param, int rows, int start) { | |||
| const int pad_left = conv_param->pad_l_; | |||
| const int pad_up = conv_param->pad_u_; | |||
| const int stride_h = conv_param->stride_h_; | |||
| const int stride_w = conv_param->stride_w_; | |||
| const int dilation_h = conv_param->dilation_h_; | |||
| const int dilation_w = conv_param->dilation_w_; | |||
| const int kernel_h = conv_param->kernel_h_; | |||
| const int kernel_w = conv_param->kernel_w_; | |||
| const int in_height = conv_param->input_h_; | |||
| const int in_width = conv_param->input_w_; | |||
| const int output_w = conv_param->output_w_; | |||
| const int channels = conv_param->input_channel_ / conv_param->group_; | |||
| const int tot_channels = conv_param->input_channel_; | |||
| int kernel_row, kernel_col; | |||
| for (int r = 0; r < rows; r++) { | |||
| int output_col = (start + r) % output_w; | |||
| int output_row = (start + r) / output_w; | |||
| int row_stride_offset = output_row * stride_h; | |||
| int col_stride_offset = output_col * stride_w; | |||
| // for (output_col = 0; output_col < output_w; output_col++) | |||
| { | |||
| for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { | |||
| int input_row = -pad_up + kernel_row * dilation_h + row_stride_offset; | |||
| for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { | |||
| int input_col = -pad_left + kernel_col * dilation_w + col_stride_offset; | |||
| if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) { | |||
| int offset = (input_row * in_width + input_col) * tot_channels; | |||
| float *data_im_ptr = &data_im[offset]; | |||
| for (int i = 0; i < channels; i++) { | |||
| data_im_ptr[i] += data_col[i]; | |||
| } | |||
| } | |||
| data_col += channels; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -17,14 +17,18 @@ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_PACK_EXT_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_GRAD_PACK_EXT_H_ | |||
| #include <stddef.h> | |||
| #include "nnacl/conv_parameter.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param); | |||
| void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param, bool transpose); | |||
| void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param); | |||
| void RollingIm2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, | |||
| int real_cal_num, int block_index); | |||
| void rolling_im2col_hwc(const float *in_data, float *data_col, const ConvParameter *conv_param, int rows, int start); | |||
| void rolling_im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, int rows, int start); | |||
| void rolling_col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param, int rows, int start); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -14,6 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include <stdint.h> | |||
| #include <string.h> | |||
| #include <float.h> | |||
| #include "nnacl/fp32_grad/pooling_grad.h" | |||
| @@ -31,8 +32,7 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter | |||
| int output_h = pooling_param->output_h_; | |||
| int output_batch = pooling_param->output_batch_; | |||
| for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0; | |||
| memset(output_ptr, 0, in_h * in_w * channel * output_batch * sizeof(float)); | |||
| float kk = (float)(win_h * win_w); | |||
| for (uint16_t ib = 0; ib < output_batch; ib++) { | |||
| float *out = &output_ptr[(ib * in_h * in_w * channel)]; | |||
| @@ -77,8 +77,7 @@ void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy | |||
| int output_h = pooling_param->output_h_; | |||
| int output_batch = pooling_param->output_batch_; | |||
| for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0; | |||
| memset(output_ptr, 0, in_h * in_w * channel * output_batch * sizeof(float)); | |||
| for (uint16_t ib = 0; ib < output_batch; ib++) { | |||
| float *out = &output_ptr[(ib * in_h * in_w * channel)]; | |||
| const float *inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]); | |||
| @@ -15,50 +15,7 @@ | |||
| */ | |||
| #include <string.h> | |||
| #include "nnacl/fp32_grad/reduce_grad.h" | |||
| static inline int NextIndex(const int num_dims, const int *dims, int *current) { | |||
| int carry = 1; | |||
| for (int idx = num_dims - 1; idx >= 0; --idx) { | |||
| int current_val = current[idx] + carry; | |||
| if (dims[idx] == current_val) { | |||
| current[idx] = 0; | |||
| } else { | |||
| current[idx] = current_val; | |||
| carry = 0; | |||
| break; | |||
| } | |||
| } | |||
| return (carry == 0); | |||
| } | |||
| static inline size_t GetInputOffset(const int num_dims, const int *dims, const int *iter) { | |||
| size_t offset = 0; | |||
| for (int idx = 0; idx < num_dims; ++idx) { | |||
| offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); | |||
| } | |||
| return offset; | |||
| } | |||
| static inline size_t GetOutputOffset(const int num_dims, const int *dims, const int *iter, const int num_axis, | |||
| const int *axes) { | |||
| size_t offset = 0; | |||
| for (int idx = 0; idx < num_dims; ++idx) { | |||
| // if we need to skip this axis | |||
| int is_axis = 0; | |||
| for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { | |||
| if (idx == axes[axis_idx]) { | |||
| is_axis = 1; | |||
| break; | |||
| } | |||
| } | |||
| if (!is_axis) { | |||
| offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); | |||
| } | |||
| } | |||
| return offset; | |||
| } | |||
| #include "nnacl/fp32_grad/utils.h" | |||
| void ReduceMeanByAxes(const float *input_data, int *input_iter, const int *input_dims, int input_num_dims, | |||
| const int *axes, int num_axes, float *output_data, const int *output_dims, int output_num_dims) { | |||
| @@ -111,7 +68,7 @@ void ReduceSumByAxes(const float *input, const int *input_dims, float *output, c | |||
| return; | |||
| } | |||
| for (int idx = 0; idx < num_outputs; ++idx) output[idx] = 0; // zero output | |||
| memset(output, 0, num_outputs * sizeof(float)); // zero output | |||
| int input_iter[8] = {0}; | |||
| int axes[5] = {0}; | |||
| @@ -41,7 +41,6 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, | |||
| const int M = input_shape[axis]; | |||
| const int N = inner_size; | |||
| const int K = 1; | |||
| for (int i = 0; i < outter_size; i++) { | |||
| int outter_offset = i * dim; | |||
| memset(sum_data, 0.0f, inner_size * sizeof(float)); | |||
| @@ -52,7 +51,14 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, | |||
| sum_data[k] += output_ptr[offset] * input_ptr[offset]; | |||
| } | |||
| } | |||
| gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N); | |||
| for (int k = 0; k < M; ++k) { | |||
| float a = -sum_mul[k]; | |||
| for (int j = 0; j < N; ++j) { | |||
| *(output_ptr + outter_offset + k * N + j) += a * sum_data[j]; | |||
| } | |||
| } | |||
| // gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N); | |||
| } | |||
| for (int i = 0; i < ele_size; i++) { | |||
| @@ -0,0 +1,72 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_ | |||
| #include "nnacl/op_base.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| static inline size_t GetInputOffset(int num_dims, const int *dims, const int *iter) { | |||
| size_t offset = 0; | |||
| for (int idx = 0; idx < num_dims; ++idx) { | |||
| offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); | |||
| } | |||
| return offset; | |||
| } | |||
| static inline size_t GetOutputOffset(int num_dims, const int *dims, const int *iter, int num_axis, const int *axes) { | |||
| size_t offset = 0; | |||
| for (int idx = 0; idx < num_dims; ++idx) { | |||
| // if we need to skip this axis | |||
| int is_axis = 0; | |||
| for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { | |||
| if (idx == axes[axis_idx]) { | |||
| is_axis = 1; | |||
| break; | |||
| } | |||
| } | |||
| if (is_axis == 0) { | |||
| offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); | |||
| } | |||
| } | |||
| return offset; | |||
| } | |||
| static inline int NextIndex(int num_dims, const int *dims, int *current) { | |||
| int carry = 1; | |||
| for (int idx = num_dims - 1; idx >= 0; --idx) { | |||
| int current_val = current[idx] + carry; | |||
| if (dims[idx] == current_val) { | |||
| current[idx] = 0; | |||
| } else { | |||
| current[idx] = current_val; | |||
| carry = 0; | |||
| break; | |||
| } | |||
| } | |||
| return (carry == 0); | |||
| } | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_ | |||
| @@ -234,6 +234,9 @@ union PrimitiveType { | |||
| BinaryCrossEntropyGrad, | |||
| BinaryCrossEntropy, | |||
| LpNormalization, | |||
| DropoutGrad, | |||
| MaximumGrad, | |||
| MinimumGrad | |||
| } | |||
| enum QuantType: int { | |||
| @@ -224,6 +224,7 @@ table Conv2DGradFilter { | |||
| dilateW: int; | |||
| dilateH: int; | |||
| hasBias: bool = false; | |||
| filter_shape: [int]; | |||
| activationType: ActivationType = 0; | |||
| } | |||
| @@ -244,6 +245,7 @@ table Conv2DGradInput { | |||
| dilateW: int; | |||
| dilateH: int; | |||
| hasBias: bool = false; | |||
| input_shape: [int]; | |||
| activationType: ActivationType = 0; | |||
| } | |||
| @@ -264,6 +266,7 @@ table GroupConv2DGradInput { | |||
| dilateW: int; | |||
| dilateH: int; | |||
| hasBias: bool = false; | |||
| input_shape: [int]; | |||
| activationType: ActivationType = 0; | |||
| } | |||
| @@ -478,13 +481,10 @@ table DeConv2DGradFilter { | |||
| } | |||
| table BNGrad { | |||
| eps : float; | |||
| momentum: float; | |||
| } | |||
| table BNGradInput { | |||
| eps : float; | |||
| eps: float; | |||
| momentum: float; | |||
| } | |||
| table Scale { | |||
| axis: int; | |||
| activationType: ActivationType = 0; | |||
| @@ -1087,6 +1087,16 @@ table FftReal { | |||
| table FftImag { | |||
| } | |||
| table DropoutGrad { | |||
| ratio : float = 0.5; | |||
| } | |||
| table MaximumGrad { | |||
| } | |||
| table MinimumGrad { | |||
| } | |||
| table NonMaxSuppression { | |||
| centerPointBox : int = 0; | |||
| } | |||
| @@ -95,13 +95,23 @@ class LiteKernel { | |||
| std::string name() const { return this->name_; } | |||
| virtual void train() { train_mode_ = true; } | |||
| virtual int Train() { | |||
| this->train_mode_ = true; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| virtual bool IsTrain() const { return this->train_mode_; } | |||
| virtual int Eval() { | |||
| this->train_mode_ = false; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| virtual bool is_train() { return train_mode_; } | |||
| virtual bool IsEval() const { return !this->train_mode_; } | |||
| virtual void eval() { train_mode_ = false; } | |||
| virtual void SetTrainable(bool trainable = true) { this->trainable_ = trainable; } | |||
| virtual bool is_eval() { return !train_mode_; } | |||
| virtual bool IsTrainable() const { return this->trainable_; } | |||
| void set_name(const std::string &name) { this->name_ = name; } | |||
| @@ -179,6 +189,7 @@ class LiteKernel { | |||
| std::vector<LiteKernel *> in_kernels_; | |||
| std::vector<LiteKernel *> out_kernels_; | |||
| bool train_mode_ = false; | |||
| bool trainable_ = false; // paramaters of this Kernel are trained in Train Session | |||
| bool is_model_output_ = false; | |||
| size_t workspace_size_ = 0; | |||
| static void *workspace_; | |||
| @@ -73,7 +73,7 @@ Registry AdamRegistry(schema::PrimitiveType_Adam, AdamCreator); | |||
| int Adam::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | |||
| if (10 != inputs.size()) { | |||
| MS_LOG(ERROR) << "Adam should have at 10 input tensors"; | |||
| MS_LOG(ERROR) << "Adam should have 10 input tensors"; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -42,11 +42,18 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector< | |||
| MS_ASSERT(dx1 != nullptr); | |||
| MS_ASSERT(dx2 != nullptr); | |||
| if ((Type() == schema::PrimitiveType_MaximumGrad) || (Type() == schema::PrimitiveType_MinimumGrad)) { | |||
| x1 = inputs_[0]; | |||
| x2 = inputs_[1]; | |||
| dy = inputs_[2]; | |||
| } | |||
| auto inShape0 = x1->shape(); | |||
| auto inShape1 = x2->shape(); | |||
| auto outShape = dy->shape(); | |||
| if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad)) { | |||
| if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad) || | |||
| (Type() == schema::PrimitiveType_MaximumGrad) || (Type() == schema::PrimitiveType_MinimumGrad)) { | |||
| ndim_ = outShape.size(); | |||
| x1_shape_.resize(ndim_); | |||
| x2_shape_.resize(ndim_); | |||
| @@ -61,7 +68,6 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector< | |||
| dy_shape_[i] = outShape[i]; | |||
| } | |||
| } else { | |||
| // if (inShape0.size() < inShape1.size()) | |||
| if (dx1->ElementsNum() < dx2->ElementsNum()) { | |||
| ndim_ = inShape1.size(); | |||
| x1_shape_.resize(ndim_); | |||
| @@ -45,7 +45,12 @@ int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &i | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| attr->axis = {0}; // GetValue<std::vector<int>>(prim.GetAttr("axis")); | |||
| if (prim.GetAttr("axis") == nullptr) { | |||
| MS_LOG(WARNING) << "get axis failed"; | |||
| attr->axis = {0}; | |||
| } else { | |||
| attr->axis = GetValue<std::vector<int>>(prim.GetAttr("axis")); | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| @@ -42,13 +42,16 @@ int BNGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp | |||
| return RET_ERROR; | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::BNGradInputT(); | |||
| auto attr = new (std::nothrow) schema::BNGradT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| attr->momentum = GetValue<float>(prim.GetAttr("momentum")); | |||
| // FusedBatchNormGrad dows not get this attribute | |||
| attr->momentum = 0.1f; | |||
| if (prim.GetAttr("momentum") != nullptr) { | |||
| attr->momentum = GetValue<float>(prim.GetAttr("momentum")); | |||
| } | |||
| attr->eps = 1e-5; | |||
| if (prim.GetAttr("epsilon") != nullptr) { | |||
| attr->eps = GetValue<float>(prim.GetAttr("epsilon")); | |||
| } | |||
| @@ -75,6 +78,9 @@ int BNGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers: | |||
| return RET_OK; | |||
| } | |||
| PrimitiveC *BNGradCreator(const schema::Primitive *primitive) { return PrimitiveC::NewPrimitiveC<BNGrad>(primitive); } | |||
| Registry BNGradRegistry(schema::PrimitiveType_BNGrad, BNGradCreator); | |||
| float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps(); } | |||
| float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); } | |||
| #endif | |||
| @@ -90,6 +96,10 @@ int BNGrad::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Ten | |||
| auto in = inputs[1]; | |||
| auto scale = inputs[2]; | |||
| if (in->shape().size() != 4) { | |||
| MS_LOG(ERROR) << "Grad Fused batchnorm only support nhwc input!"; | |||
| } | |||
| outputs[0]->set_shape(in->shape()); | |||
| outputs[1]->set_shape(scale->shape()); | |||
| outputs[2]->set_shape(scale->shape()); | |||
| @@ -38,6 +38,7 @@ int Conv2DGradFilter::GetPadRight() const { return this->primitive_->value.AsCon | |||
| int Conv2DGradFilter::GetDilateW() const { return this->primitive_->value.AsConv2DGradFilter()->dilateW; } | |||
| int Conv2DGradFilter::GetDilateH() const { return this->primitive_->value.AsConv2DGradFilter()->dilateH; } | |||
| bool Conv2DGradFilter::GetHasBias() const { return this->primitive_->value.AsConv2DGradFilter()->hasBias; } | |||
| int Conv2DGradFilter::GetActivationType() const { return this->primitive_->value.AsConv2DGradFilter()->activationType; } | |||
| void Conv2DGradFilter::SetFormat(int format) { | |||
| @@ -66,6 +67,9 @@ void Conv2DGradFilter::SetPadRight(int pad_right) { | |||
| void Conv2DGradFilter::SetDilateW(int dilate_w) { this->primitive_->value.AsConv2DGradFilter()->dilateW = dilate_w; } | |||
| void Conv2DGradFilter::SetDilateH(int dilate_h) { this->primitive_->value.AsConv2DGradFilter()->dilateH = dilate_h; } | |||
| void Conv2DGradFilter::SetHasBias(bool has_bias) { this->primitive_->value.AsConv2DGradFilter()->hasBias = has_bias; } | |||
| std::vector<int> Conv2DGradFilter::GetFilterShape() const { | |||
| return this->primitive_->value.AsConv2DGradFilter()->filter_shape; | |||
| } | |||
| void Conv2DGradFilter::SetActivationType(int activation_type) { | |||
| this->primitive_->value.AsConv2DGradFilter()->activationType = (schema::ActivationType)activation_type; | |||
| } | |||
| @@ -134,6 +138,28 @@ int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vector<AnfNod | |||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||
| } | |||
| if (inputs.size() >= kAnfPopulaterThree) { | |||
| auto filter_shape = inputs[kAnfPopulaterTwo]; | |||
| MS_ASSERT(filter_shape != nullptr); | |||
| if (filter_shape->isa<ValueNode>()) { | |||
| auto valueNode = filter_shape->cast<ValueNodePtr>(); | |||
| MS_ASSERT(valueNode != nullptr); | |||
| auto value = valueNode->value(); | |||
| MS_ASSERT(value != nullptr); | |||
| if (value->isa<ValueTuple>()) { | |||
| auto valTuplPtr = dyn_cast<ValueTuple>(value); | |||
| MS_ASSERT(valTuplPtr != nullptr); | |||
| const int nchw2nhwc[] = {0, 3, 1, 2}; | |||
| attr->filter_shape.resize(valTuplPtr->size()); | |||
| for (size_t i = 0; i < valTuplPtr->size(); i++) { | |||
| auto elem = dyn_cast<Int32Imm>((*valTuplPtr)[i]); | |||
| MS_ASSERT(elem != nullptr); | |||
| attr->filter_shape[nchw2nhwc[i]] = elem->value(); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| @@ -151,10 +177,16 @@ int Conv2DGradFilter::UnPackToFlatBuilder(const schema::Primitive *primitive, fl | |||
| MS_LOG(ERROR) << "value_as_Conv2DGradFilter return nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| auto val_offset = schema::CreateConv2DGradFilter( | |||
| std::vector<int32_t> filter_shape; | |||
| if (attr->filter_shape() != nullptr) { | |||
| for (int i = 0; i < static_cast<int>(attr->filter_shape()->size()); i++) { | |||
| filter_shape.push_back(attr->filter_shape()->data()[i]); | |||
| } | |||
| } | |||
| auto val_offset = schema::CreateConv2DGradFilterDirect( | |||
| *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), | |||
| attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), | |||
| attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType()); | |||
| attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &filter_shape, attr->activationType()); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Conv2DGradFilter, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| @@ -175,6 +207,10 @@ int Conv2DGradFilter::GetPadRight() const { return this->primitive_->value_as_Co | |||
| int Conv2DGradFilter::GetDilateW() const { return this->primitive_->value_as_Conv2DGradFilter()->dilateW(); } | |||
| int Conv2DGradFilter::GetDilateH() const { return this->primitive_->value_as_Conv2DGradFilter()->dilateH(); } | |||
| bool Conv2DGradFilter::GetHasBias() const { return this->primitive_->value_as_Conv2DGradFilter()->hasBias(); } | |||
| std::vector<int> Conv2DGradFilter::GetFilterShape() const { | |||
| auto fb_vector = this->primitive_->value_as_Conv2DGradFilter()->filter_shape(); | |||
| return std::vector<int>(fb_vector->begin(), fb_vector->end()); | |||
| } | |||
| int Conv2DGradFilter::GetActivationType() const { | |||
| return this->primitive_->value_as_Conv2DGradFilter()->activationType(); | |||
| } | |||
| @@ -186,41 +222,22 @@ Registry conv2DGradFilterRegistry(schema::PrimitiveType_Conv2DGradFilter, Conv2D | |||
| #endif | |||
| int Conv2DGradFilter::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { | |||
| if (3 != inputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad Filter should have 3 inputs"; | |||
| if (2 != inputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad Filter should have 2 inputs, but it got " << inputs.size(); | |||
| return RET_ERROR; | |||
| } | |||
| if (1 != outputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad Filter should have one output"; | |||
| MS_LOG(ERROR) << "Conv2d Grad Filter should have one output but it got " << outputs.size(); | |||
| return RET_ERROR; | |||
| } | |||
| auto *in0 = inputs.at(0); | |||
| auto *in = inputs.at(2); | |||
| MS_ASSERT(in0 != nullptr); | |||
| MS_ASSERT(in != nullptr); | |||
| std::vector<int> output_shape; | |||
| int *out_shape = reinterpret_cast<int *>(in->MutableData()); | |||
| int new_size = in->ElementsNum(); | |||
| if (in0->GetFormat() == in->GetFormat()) { | |||
| for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]); | |||
| } else { | |||
| if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) { | |||
| output_shape.push_back(out_shape[0]); | |||
| output_shape.push_back(out_shape[2]); | |||
| output_shape.push_back(out_shape[3]); | |||
| output_shape.push_back(out_shape[1]); | |||
| } else { | |||
| MS_LOG(ERROR) << "Shape covnert is not supported"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| auto *out = outputs.at(0); | |||
| MS_ASSERT(out != nullptr); | |||
| out->set_shape(output_shape); | |||
| out->set_shape(GetFilterShape()); | |||
| out->set_data_type(in0->data_type()); | |||
| out->SetFormat(in0->GetFormat()); | |||
| @@ -72,6 +72,7 @@ class Conv2DGradFilter : public PrimitiveC { | |||
| int GetDilateH() const; | |||
| bool GetHasBias() const; | |||
| int GetActivationType() const; | |||
| std::vector<int> GetFilterShape() const; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -39,6 +39,9 @@ int Conv2DGradInput::GetPadRight() const { return this->primitive_->value.AsConv | |||
| int Conv2DGradInput::GetDilateW() const { return this->primitive_->value.AsConv2DGradInput()->dilateW; } | |||
| int Conv2DGradInput::GetDilateH() const { return this->primitive_->value.AsConv2DGradInput()->dilateH; } | |||
| bool Conv2DGradInput::GetHasBias() const { return this->primitive_->value.AsConv2DGradInput()->hasBias; } | |||
| std::vector<int> Conv2DGradInput::GetInputShape() const { | |||
| return this->primitive_->value.AsConv2DGradInput()->input_shape; | |||
| } | |||
| int Conv2DGradInput::GetActivationType() const { return this->primitive_->value.AsConv2DGradInput()->activationType; } | |||
| void Conv2DGradInput::SetFormat(int format) { | |||
| @@ -137,6 +140,27 @@ int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNode | |||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||
| } | |||
| if (inputs.size() >= kAnfPopulaterThree) { | |||
| auto input_shape = inputs[kAnfPopulaterTwo]; | |||
| MS_ASSERT(input_shape != nullptr); | |||
| if (input_shape->isa<ValueNode>()) { | |||
| auto valueNode = input_shape->cast<ValueNodePtr>(); | |||
| MS_ASSERT(valueNode != nullptr); | |||
| auto value = valueNode->value(); | |||
| MS_ASSERT(value != nullptr); | |||
| if (value->isa<ValueTuple>()) { | |||
| auto valTuplPtr = dyn_cast<ValueTuple>(value); | |||
| MS_ASSERT(valTuplPtr != nullptr); | |||
| const int nchw2nhwc[] = {0, 3, 1, 2}; | |||
| attr->input_shape.resize(valTuplPtr->size()); | |||
| for (size_t i = 0; i < valTuplPtr->size(); i++) { | |||
| auto elem = dyn_cast<Int32Imm>((*valTuplPtr)[i]); | |||
| MS_ASSERT(elem != nullptr); | |||
| attr->input_shape[nchw2nhwc[i]] = elem->value(); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| @@ -154,10 +178,16 @@ int Conv2DGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive, fla | |||
| MS_LOG(ERROR) << "value_as_Conv2DGradInput return nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| auto val_offset = schema::CreateConv2DGradInput( | |||
| std::vector<int32_t> input_shape; | |||
| if (attr->input_shape() != nullptr) { | |||
| for (int i = 0; i < static_cast<int>(attr->input_shape()->size()); i++) { | |||
| input_shape.push_back(attr->input_shape()->data()[i]); | |||
| } | |||
| } | |||
| auto val_offset = schema::CreateConv2DGradInputDirect( | |||
| *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), | |||
| attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), | |||
| attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType()); | |||
| attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &input_shape, attr->activationType()); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Conv2DGradInput, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| @@ -178,6 +208,10 @@ int Conv2DGradInput::GetPadRight() const { return this->primitive_->value_as_Con | |||
| int Conv2DGradInput::GetDilateW() const { return this->primitive_->value_as_Conv2DGradInput()->dilateW(); } | |||
| int Conv2DGradInput::GetDilateH() const { return this->primitive_->value_as_Conv2DGradInput()->dilateH(); } | |||
| bool Conv2DGradInput::GetHasBias() const { return this->primitive_->value_as_Conv2DGradInput()->hasBias(); } | |||
| std::vector<int> Conv2DGradInput::GetInputShape() const { | |||
| auto fb_vector = this->primitive_->value_as_Conv2DGradInput()->input_shape(); | |||
| return std::vector<int>(fb_vector->begin(), fb_vector->end()); | |||
| } | |||
| int Conv2DGradInput::GetActivationType() const { | |||
| return this->primitive_->value_as_Conv2DGradInput()->activationType(); | |||
| } | |||
| @@ -189,40 +223,21 @@ Registry Conv2DGradInputRegistry(schema::PrimitiveType_Conv2DGradInput, Conv2DGr | |||
| #endif | |||
| int Conv2DGradInput::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { | |||
| if (3 != inputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad Input should have 3 inputs"; | |||
| if (2 != inputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad Input should have 2 inputs"; | |||
| return RET_ERROR; | |||
| } | |||
| if (1 != outputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad input should have one output"; | |||
| MS_LOG(ERROR) << "Conv2d Grad output should have one output"; | |||
| return RET_ERROR; | |||
| } | |||
| auto *in0 = inputs.at(0); | |||
| auto *in = inputs.at(2); | |||
| MS_ASSERT(in0 != nullptr); | |||
| MS_ASSERT(in != nullptr); | |||
| std::vector<int> output_shape; | |||
| int *out_shape = reinterpret_cast<int *>(in->MutableData()); | |||
| int new_size = in->ElementsNum(); | |||
| if (in0->GetFormat() == in->GetFormat()) { | |||
| for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]); | |||
| } else { | |||
| if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) { | |||
| output_shape.push_back(out_shape[0]); | |||
| output_shape.push_back(out_shape[2]); | |||
| output_shape.push_back(out_shape[3]); | |||
| output_shape.push_back(out_shape[1]); | |||
| } else { | |||
| MS_LOG(ERROR) << "Shape covnert is not supported"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| auto *out = outputs.at(0); | |||
| MS_ASSERT(out != nullptr); | |||
| out->set_shape(output_shape); | |||
| out->set_shape(GetInputShape()); | |||
| out->set_data_type(in0->data_type()); | |||
| out->SetFormat(in0->GetFormat()); | |||
| @@ -72,6 +72,7 @@ class Conv2DGradInput : public PrimitiveC { | |||
| int GetDilateH() const; | |||
| bool GetHasBias() const; | |||
| int GetActivationType() const; | |||
| std::vector<int> GetInputShape() const; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -27,6 +27,37 @@ float Dropout::GetRatio() const { return this->primitive_->value.AsDropout()->ra | |||
| void Dropout::SetRatio(float ratio) { this->primitive_->value.AsDropout()->ratio = ratio; } | |||
| int Dropout::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| if (this->primitive_ == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.type = schema::PrimitiveType_Dropout; | |||
| } | |||
| if (this->primitive_->value.type != schema::PrimitiveType_Dropout) { | |||
| MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::DropoutT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| if (prim.GetAttr("keep_prob") != nullptr) { | |||
| attr->ratio = GetValue<float>(prim.GetAttr("keep_prob")); | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #else | |||
| int Dropout::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | |||
| MS_ASSERT(nullptr != primitive); | |||
| @@ -46,5 +77,29 @@ float Dropout::GetRatio() const { return this->primitive_->value_as_Dropout()->r | |||
| PrimitiveC *DropoutCreator(const schema::Primitive *primitive) { return PrimitiveC::NewPrimitiveC<Dropout>(primitive); } | |||
| Registry DropoutRegistry(schema::PrimitiveType_Dropout, DropoutCreator); | |||
| #endif | |||
| int Dropout::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) { | |||
| MS_ASSERT(this->primitive_ != nullptr); | |||
| auto input = inputs_.front(); | |||
| MS_ASSERT(input != nullptr); | |||
| auto output0 = outputs_.front(); | |||
| MS_ASSERT(output0 != nullptr); | |||
| if (!GetInferFlag()) { | |||
| return RET_OK; | |||
| } | |||
| output0->set_shape(input->shape()); | |||
| output0->set_data_type(input->data_type()); | |||
| output0->SetFormat(input->GetFormat()); | |||
| if (outputs_.size() > 1) { | |||
| auto output1 = outputs_[1]; | |||
| MS_ASSERT(output1 != nullptr); | |||
| output1->set_shape(input->shape()); | |||
| output1->set_data_type(input->data_type()); | |||
| output1->SetFormat(input->GetFormat()); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_ | |||
| #define LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_DROPOUT_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_DROPOUT_H_ | |||
| #include <vector> | |||
| #include <set> | |||
| @@ -32,13 +32,16 @@ class Dropout : public PrimitiveC { | |||
| MS_DECLARE_PARENT(Dropout, PrimitiveC); | |||
| explicit Dropout(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} | |||
| void SetRatio(float ratio); | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| float GetRatio() const; | |||
| int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_ | |||
| #endif // MINDSPORE_LITE_SRC_OPS_DROPOUT_H_ | |||
| @@ -0,0 +1,100 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/ops/dropout_grad.h" | |||
| #ifndef PRIMITIVE_WRITEABLE | |||
| #include "src/ops/ops_register.h" | |||
| #endif | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| float DropoutGrad::GetRatio() const { return this->primitive_->value.AsDropout()->ratio; } | |||
| void DropoutGrad::SetRatio(float ratio) { this->primitive_->value.AsDropout()->ratio = ratio; } | |||
| int DropoutGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| if (this->primitive_ == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.type = schema::PrimitiveType_DropoutGrad; | |||
| } | |||
| if (this->primitive_->value.type != schema::PrimitiveType_DropoutGrad) { | |||
| MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::DropoutGradT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| if (prim.GetAttr("keep_prob") != nullptr) { | |||
| attr->ratio = GetValue<float>(prim.GetAttr("keep_prob")); | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #else | |||
| int DropoutGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | |||
| MS_ASSERT(nullptr != primitive); | |||
| MS_ASSERT(nullptr != fbb); | |||
| auto attr = primitive->value_as_DropoutGrad(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "value_as_DropoutGrad return nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| auto val_offset = schema::CreateDropoutGrad(*fbb, attr->ratio()); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_DropoutGrad, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| } | |||
| float DropoutGrad::GetRatio() const { return this->primitive_->value_as_DropoutGrad()->ratio(); } | |||
| PrimitiveC *DropoutGradCreator(const schema::Primitive *primitive) { | |||
| return PrimitiveC::NewPrimitiveC<DropoutGrad>(primitive); | |||
| } | |||
| Registry DropoutGradRegistry(schema::PrimitiveType_DropoutGrad, DropoutGradCreator); | |||
| #endif | |||
| int DropoutGrad::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) { | |||
| MS_ASSERT(this->primitive_ != nullptr); | |||
| MS_ASSERT(inputs_.size() == 2); | |||
| auto input = inputs_.front(); | |||
| MS_ASSERT(input != nullptr); | |||
| auto output = outputs_.front(); | |||
| MS_ASSERT(output != nullptr); | |||
| if (!GetInferFlag()) { | |||
| return RET_OK; | |||
| } | |||
| output->set_shape(input->shape()); | |||
| output->set_data_type(input->data_type()); | |||
| output->SetFormat(input->GetFormat()); | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_ | |||
| #include <vector> | |||
| #include <set> | |||
| #include <cmath> | |||
| #include "src/ops/primitive_c.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| class DropoutGrad : public PrimitiveC { | |||
| public: | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| MS_DECLARE_PARENT(DropoutGrad, PrimitiveC); | |||
| DropoutGrad() = default; | |||
| explicit DropoutGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} | |||
| void SetRatio(float ratio); | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| DropoutGrad() = default; | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| float GetRatio() const; | |||
| int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_ | |||
| @@ -39,6 +39,9 @@ int GroupConv2DGradInput::GetPadRight() const { return this->primitive_->value.A | |||
| int GroupConv2DGradInput::GetDilateW() const { return this->primitive_->value.AsGroupConv2DGradInput()->dilateW; } | |||
| int GroupConv2DGradInput::GetDilateH() const { return this->primitive_->value.AsGroupConv2DGradInput()->dilateH; } | |||
| bool GroupConv2DGradInput::GetHasBias() const { return this->primitive_->value.AsGroupConv2DGradInput()->hasBias; } | |||
| std::vector<int> GroupConv2DGradInput::GetInputShape() const { | |||
| return this->primitive_->value.AsGroupConv2DGradInput()->input_shape; | |||
| } | |||
| int GroupConv2DGradInput::GetActivationType() const { | |||
| return this->primitive_->value.AsGroupConv2DGradInput()->activationType; | |||
| } | |||
| @@ -99,10 +102,16 @@ int GroupConv2DGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive | |||
| MS_LOG(ERROR) << "value_as_GroupConv2DGradInput return nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| auto val_offset = schema::CreateGroupConv2DGradInput( | |||
| std::vector<int32_t> input_shape; | |||
| if (attr->input_shape() != nullptr) { | |||
| for (int i = 0; i < static_cast<int>(attr->input_shape()->size()); i++) { | |||
| input_shape.push_back(attr->input_shape()->data()[i]); | |||
| } | |||
| } | |||
| auto val_offset = schema::CreateGroupConv2DGradInputDirect( | |||
| *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), | |||
| attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), | |||
| attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType()); | |||
| attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &input_shape, attr->activationType()); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_GroupConv2DGradInput, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| @@ -127,51 +136,38 @@ int GroupConv2DGradInput::GetPadRight() const { return this->primitive_->value_a | |||
| int GroupConv2DGradInput::GetDilateW() const { return this->primitive_->value_as_GroupConv2DGradInput()->dilateW(); } | |||
| int GroupConv2DGradInput::GetDilateH() const { return this->primitive_->value_as_GroupConv2DGradInput()->dilateH(); } | |||
| bool GroupConv2DGradInput::GetHasBias() const { return this->primitive_->value_as_GroupConv2DGradInput()->hasBias(); } | |||
| std::vector<int> GroupConv2DGradInput::GetInputShape() const { | |||
| auto fb_vector = this->primitive_->value_as_GroupConv2DGradInput()->input_shape(); | |||
| return std::vector<int>(fb_vector->begin(), fb_vector->end()); | |||
| } | |||
| int GroupConv2DGradInput::GetActivationType() const { | |||
| return this->primitive_->value_as_GroupConv2DGradInput()->activationType(); | |||
| } | |||
| PrimitiveC *GroupConv2DGradInputCreator(const schema::Primitive *primitive) { | |||
| return PrimitiveC::NewPrimitiveC<GroupConv2DGradInput>(primitive); | |||
| } | |||
| Registry GroupConv2DGradInputRegistry(schema::PrimitiveType_GroupConv2DGradInput, GroupConv2DGradInputCreator); | |||
| #endif | |||
| int GroupConv2DGradInput::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { | |||
| if (3 != inputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad Input should have 3 inputs"; | |||
| if (2 != inputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad input should have 2 inputs"; | |||
| return RET_ERROR; | |||
| } | |||
| if (1 != outputs.size()) { | |||
| MS_LOG(ERROR) << "Conv2d Grad input should have one output"; | |||
| MS_LOG(ERROR) << "Conv2d Grad output should have one output"; | |||
| return RET_ERROR; | |||
| } | |||
| auto *in0 = inputs.at(0); | |||
| auto *in = inputs.at(2); | |||
| MS_ASSERT(in0 != nullptr); | |||
| MS_ASSERT(in != nullptr); | |||
| std::vector<int> output_shape; | |||
| int *out_shape = reinterpret_cast<int *>(in->MutableData()); | |||
| int new_size = in->ElementsNum(); | |||
| if (in0->GetFormat() == in->GetFormat()) { | |||
| for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]); | |||
| } else { | |||
| if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) { | |||
| output_shape.push_back(out_shape[0]); | |||
| output_shape.push_back(out_shape[2]); | |||
| output_shape.push_back(out_shape[3]); | |||
| output_shape.push_back(out_shape[1]); | |||
| } else { | |||
| MS_LOG(ERROR) << "Shape covnert is not supported"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| MS_ASSERT(in0 != nullptr); | |||
| auto *out = outputs.at(0); | |||
| MS_ASSERT(out != nullptr); | |||
| out->set_shape(output_shape); | |||
| out->set_shape(GetInputShape()); | |||
| out->set_data_type(in0->data_type()); | |||
| out->SetFormat(in0->GetFormat()); | |||
| @@ -70,6 +70,7 @@ class GroupConv2DGradInput : public PrimitiveC { | |||
| int GetDilateW() const; | |||
| int GetDilateH() const; | |||
| bool GetHasBias() const; | |||
| std::vector<int> GetInputShape() const; | |||
| int GetActivationType() const; | |||
| }; | |||
| } // namespace lite | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_ | |||
| #define LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_ | |||
| #include <vector> | |||
| #include <set> | |||
| @@ -41,4 +41,4 @@ class Maximum : public Arithmetic { | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_ | |||
| #endif // MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_ | |||
| @@ -0,0 +1,124 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "include/errorcode.h" | |||
| #include "src/ops/maximum_grad.h" | |||
| #include "src/common/log_adapter.h" | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| #include <float.h> | |||
| #include "tools/converter/quantizer/quantize_util.h" | |||
| #endif | |||
| #ifndef PRIMITIVE_WRITEABLE | |||
| #include "src/ops/ops_register.h" | |||
| #endif | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| int MaximumGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| if (this->primitive_ == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.type = schema::PrimitiveType_MaximumGrad; | |||
| } | |||
| if (this->primitive_->value.type != schema::PrimitiveType_MaximumGrad) { | |||
| MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::MaximumGradT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #else | |||
| int MaximumGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | |||
| MS_ASSERT(nullptr != primitive); | |||
| MS_ASSERT(nullptr != fbb); | |||
| auto val_offset = schema::CreateMaximumGrad(*fbb); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_MaximumGrad, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| } | |||
| PrimitiveC *MaximumGradCreator(const schema::Primitive *primitive) { | |||
| return PrimitiveC::NewPrimitiveC<MaximumGrad>(primitive); | |||
| } | |||
| Registry MaximumGradRegistry(schema::PrimitiveType_MaximumGrad, MaximumGradCreator); | |||
| #endif | |||
| int MaximumGrad::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) { | |||
| if (inputs_.size() != 3) { | |||
| MS_LOG(ERROR) << "The number of input must be 3"; | |||
| return RET_ERROR; | |||
| } | |||
| if (outputs_.size() != 2) { | |||
| MS_LOG(ERROR) << "The number of output must be 2"; | |||
| return RET_ERROR; | |||
| } | |||
| auto x1 = inputs_[0]; | |||
| auto x2 = inputs_[1]; | |||
| auto dy = inputs_[2]; | |||
| auto dx1 = outputs_[0]; | |||
| auto dx2 = outputs_[1]; | |||
| MS_ASSERT(dy != nullptr); | |||
| MS_ASSERT(x1 != nullptr); | |||
| MS_ASSERT(x2 != nullptr); | |||
| MS_ASSERT(dx1 != nullptr); | |||
| MS_ASSERT(dx2 != nullptr); | |||
| if (!GetInferFlag()) { | |||
| return RET_OK; | |||
| } | |||
| auto inShape0 = x1->shape(); | |||
| auto inShape1 = x2->shape(); | |||
| auto outShape = dy->shape(); | |||
| ndim_ = outShape.size(); | |||
| x1_shape_.resize(ndim_); | |||
| x2_shape_.resize(ndim_); | |||
| dy_shape_.resize(ndim_); | |||
| auto fillDimNum0 = outShape.size() - inShape0.size(); | |||
| auto fillDimNum1 = outShape.size() - inShape1.size(); | |||
| int j0 = 0; | |||
| int j1 = 0; | |||
| for (unsigned int i = 0; i < outShape.size(); i++) { | |||
| x1_shape_[i] = (i < fillDimNum0) ? 1 : inShape0[j0++]; | |||
| x2_shape_[i] = (i < fillDimNum1) ? 1 : inShape1[j1++]; | |||
| dy_shape_[i] = outShape[i]; | |||
| } | |||
| dx1->set_shape(x1->shape()); | |||
| dx2->set_shape(x2->shape()); | |||
| dx1->set_data_type(dy->data_type()); | |||
| dx2->set_data_type(dy->data_type()); | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_ | |||
| #include <vector> | |||
| #include <set> | |||
| #include <cmath> | |||
| #include "src/ops/arithmetic_grad.h" | |||
| #include "src/ops/primitive_c.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| class MaximumGrad : public ArithmeticGrad { | |||
| public: | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| MS_DECLARE_PARENT(MaximumGrad, ArithmeticGrad); | |||
| MaximumGrad() = default; | |||
| explicit MaximumGrad(schema::PrimitiveT *primitive) : ArithmeticGrad(primitive) {} | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| MaximumGrad() = default; | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_ | |||
| @@ -23,6 +23,33 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| int Minimum::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| if (this->primitive_ == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.type = schema::PrimitiveType_Minimum; | |||
| } | |||
| if (this->primitive_->value.type != schema::PrimitiveType_Minimum) { | |||
| MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::MinimumT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #else | |||
| int Minimum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | |||
| MS_ASSERT(nullptr != primitive); | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_ | |||
| #define LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_MINIMUM_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_MINIMUM_H_ | |||
| #include <vector> | |||
| #include <set> | |||
| @@ -32,6 +32,7 @@ class Minimum : public Arithmetic { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| MS_DECLARE_PARENT(Arithmetic, Arithmetic); | |||
| explicit Minimum(schema::PrimitiveT *primitive) : Arithmetic(primitive) {} | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| @@ -39,4 +40,4 @@ class Minimum : public Arithmetic { | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_ | |||
| #endif // MINDSPORE_LITE_SRC_OPS_MINIMUM_H_ | |||
| @@ -0,0 +1,76 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "include/errorcode.h" | |||
| #include "src/ops/minimum_grad.h" | |||
| #include "src/common/log_adapter.h" | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| #include <float.h> | |||
| #include "tools/converter/quantizer/quantize_util.h" | |||
| #endif | |||
| #ifndef PRIMITIVE_WRITEABLE | |||
| #include "src/ops/ops_register.h" | |||
| #endif | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| int MinimumGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| if (this->primitive_ == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.type = schema::PrimitiveType_MinimumGrad; | |||
| } | |||
| if (this->primitive_->value.type != schema::PrimitiveType_MinimumGrad) { | |||
| MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::MinimumGradT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #else | |||
| PrimitiveC *MinimumGradCreator(const schema::Primitive *primitive) { | |||
| return PrimitiveC::NewPrimitiveC<MinimumGrad>(primitive); | |||
| } | |||
| Registry MinimumGradRegistry(schema::PrimitiveType_MinimumGrad, MinimumGradCreator); | |||
| int MinimumGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | |||
| MS_ASSERT(nullptr != primitive); | |||
| MS_ASSERT(nullptr != fbb); | |||
| auto val_offset = schema::CreateMinimumGrad(*fbb); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_MinimumGrad, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,45 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_ | |||
| #include <vector> | |||
| #include <set> | |||
| #include <cmath> | |||
| #include "src/ops/arithmetic_grad.h" | |||
| #include "src/ops/primitive_c.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| class MinimumGrad : public ArithmeticGrad { | |||
| public: | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| MS_DECLARE_PARENT(MinimumGrad, ArithmeticGrad); | |||
| MinimumGrad() = default; | |||
| explicit MinimumGrad(schema::PrimitiveT *primitive) : ArithmeticGrad(primitive) {} | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| MinimumGrad() = default; | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_ | |||
| @@ -18,6 +18,7 @@ | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| #include <memory> | |||
| #include <map> | |||
| #include "tools/converter/quantizer/quantize_util.h" | |||
| #include "src/ops/space_to_batch.h" | |||
| #include "src/ops/space_to_batch_nd.h" | |||
| @@ -167,12 +168,14 @@ | |||
| #include "src/ops/sgd.h" | |||
| #include "src/ops/adam.h" | |||
| #include "src/ops/assign.h" | |||
| #include "src/ops/dropout_grad.h" | |||
| #include "src/ops/maximum_grad.h" | |||
| #include "src/ops/minimum_grad.h" | |||
| #include "src/ops/control_depend.h" | |||
| #include "src/ops/assign_add.h" | |||
| #include "src/ops/binary_cross_entropy.h" | |||
| #include "src/ops/binary_cross_entropy_grad.h" | |||
| #endif | |||
| #endif | |||
| namespace mindspore { | |||
| namespace lite { | |||
| @@ -506,10 +509,12 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std: | |||
| return NewPrimitiveC<Maximum>(prim, inputs, quantType); | |||
| } else if (op_type == "Split") { | |||
| return NewPrimitiveC<Split>(prim, inputs, quantType); | |||
| } else if (op_type == "While") { | |||
| return NewPrimitiveC<While>(prim, inputs, quantType); | |||
| } else if (op_type == "OneHot") { | |||
| return NewPrimitiveC<OneHot>(prim, inputs, quantType); | |||
| } else if (op_type == "Dropout") { | |||
| return NewPrimitiveC<Dropout>(prim, inputs, quantType); | |||
| } else if (op_type == "While") { | |||
| return NewPrimitiveC<While>(prim, inputs, quantType); | |||
| } else if (op_type == "GatherV2") { | |||
| return NewPrimitiveC<Gather>(prim, inputs, quantType); | |||
| } else if (op_type == "OnesLike") { | |||
| @@ -537,7 +542,7 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std: | |||
| } else if ((op_type == "ReluGrad" || op_type == "ReLU6Grad" || op_type == "SigmoidGrad" || | |||
| op_type == "HSigmoidGrad" || op_type == "HSwishGrad")) { | |||
| return NewPrimitiveC<ActivationGrad>(prim, inputs, quantType); | |||
| } else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad")) { | |||
| } else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad") || (op_type == "AvgPoolGradGpu")) { | |||
| return NewPrimitiveC<PoolingGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "Conv2DBackpropFilter") { | |||
| return NewPrimitiveC<Conv2DGradFilter>(prim, inputs, quantType); | |||
| @@ -559,6 +564,12 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std: | |||
| return NewPrimitiveC<Adam>(prim, inputs, quantType); | |||
| } else if (op_type == "Assign") { | |||
| return NewPrimitiveC<Assign>(prim, inputs, quantType); | |||
| } else if (op_type == "DropoutGrad") { | |||
| return NewPrimitiveC<DropoutGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "MaximumGrad") { | |||
| return NewPrimitiveC<MaximumGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "MinimumGrad") { | |||
| return NewPrimitiveC<MinimumGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "AssignAdd") { | |||
| return NewPrimitiveC<AssignAdd>(prim, inputs, quantType); | |||
| } else if (op_type == "BinaryCrossEntropy") { | |||
| @@ -884,7 +895,12 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { | |||
| return new BinaryCrossEntropyGrad(primitive); | |||
| case schema::PrimitiveType_BinaryCrossEntropy: | |||
| return new BinaryCrossEntropy(primitive); | |||
| case schema::PrimitiveType_DropoutGrad: | |||
| return new DropoutGrad(primitive); | |||
| case schema::PrimitiveType_MaximumGrad: | |||
| return new MaximumGrad(primitive); | |||
| case schema::PrimitiveType_MinimumGrad: | |||
| return new MinimumGrad(primitive); | |||
| #endif | |||
| default: | |||
| MS_LOG(ERROR) << "Unsupported primitive type in Create : " << schema::EnumNamePrimitiveType(op_type); | |||
| @@ -892,6 +908,7 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { | |||
| } | |||
| return nullptr; | |||
| } | |||
| #else | |||
| void PrimitiveC::SetQuantType(schema::QuantType quant_type) { this->quant_type_ = quant_type; } | |||
| schema::QuantType PrimitiveC::GetQuantType() const { return quant_type_; } | |||
| @@ -50,8 +50,7 @@ int Squeeze::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &in | |||
| MS_LOG(INFO) << "Squeeze's attr xis is set to default"; | |||
| attr->axis = {0}; | |||
| } else { | |||
| int axis = GetValue<int>(prim.GetAttr("axis")); | |||
| attr->axis = {axis}; | |||
| attr->axis = GetValue<std::vector<int>>(prim.GetAttr("axis")); | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| } | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef LITE_MINDSPORE_LITE_C_OPS_SUB_H_ | |||
| #define LITE_MINDSPORE_LITE_C_OPS_SUB_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_SUB_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_SUB_H_ | |||
| #include <vector> | |||
| #include <set> | |||
| @@ -34,7 +34,6 @@ class Sub : public Arithmetic { | |||
| explicit Sub(schema::PrimitiveT *primitive) : Arithmetic(primitive) {} | |||
| void SetActivationType(int activation_type); | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| @@ -43,4 +42,4 @@ class Sub : public Arithmetic { | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // LITE_MINDSPORE_LITE_C_OPS_SUB_H_ | |||
| #endif // MINDSPORE_LITE_SRC_OPS_SUB_H_ | |||
| @@ -39,14 +39,6 @@ void FusedBatchnormCPUKernel::FreeScaleAndOffset() { | |||
| free(offset_); | |||
| offset_ = nullptr; | |||
| } | |||
| if (save_mean_ != nullptr) { | |||
| free(save_mean_); | |||
| save_mean_ = nullptr; | |||
| } | |||
| if (save_variance_ != nullptr) { | |||
| free(save_variance_); | |||
| save_variance_ = nullptr; | |||
| } | |||
| } | |||
| int FusedBatchnormCPUKernel::InitConstTensor() { | |||
| @@ -59,11 +51,8 @@ int FusedBatchnormCPUKernel::InitConstTensor() { | |||
| offset_ = malloc(offset->Size()); | |||
| mean_ = malloc(mean->Size()); | |||
| variance_ = malloc(variance->Size()); | |||
| save_mean_ = malloc(mean->Size()); | |||
| save_variance_ = malloc(variance->Size()); | |||
| if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr || save_mean_ == nullptr || | |||
| save_variance_ == nullptr) { | |||
| if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr) { | |||
| FreeMeanAndVariance(); | |||
| FreeScaleAndOffset(); | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| @@ -73,61 +62,64 @@ int FusedBatchnormCPUKernel::InitConstTensor() { | |||
| memcpy(offset_, offset->MutableData(), offset->Size()); | |||
| memcpy(mean_, mean->MutableData(), mean->Size()); | |||
| memcpy(variance_, variance->MutableData(), variance->Size()); | |||
| memset(save_mean_, 0, mean->Size()); | |||
| memset(save_variance_, 0, variance->Size()); | |||
| if (out_tensors_.size() > 4) { | |||
| for (size_t i = 1; i < out_tensors_.size(); i++) { | |||
| auto *data = static_cast<float *>(out_tensors_[i]->MutableData()); | |||
| std::fill(data, data + out_tensors_[i]->ElementsNum(), 0.f); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::Run() { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| if (is_train() && in_tensors_.size() >= 5) { | |||
| if (IsTrain() && IsTrainable() && in_tensors_.size() >= 5) { | |||
| float *in = static_cast<float *>(in_tensors_[0]->MutableData()); | |||
| float *scale = static_cast<float *>(in_tensors_[1]->MutableData()); | |||
| float *bias = static_cast<float *>(in_tensors_[2]->MutableData()); | |||
| float *mean = static_cast<float *>(in_tensors_[3]->MutableData()); | |||
| float *var = static_cast<float *>(in_tensors_[4]->MutableData()); | |||
| std::fill(mean, mean + in_tensors_[3]->ElementsNum(), 0.f); | |||
| std::fill(var, var + in_tensors_[4]->ElementsNum(), 0.f); | |||
| FusedBatchNormFp32MeanVar(in, mean, var, param, static_cast<float *>(save_mean_), | |||
| static_cast<float *>(save_variance_)); | |||
| memcpy(out_tensors_[3]->MutableData(), save_mean_, out_tensors_[3]->Size()); | |||
| memcpy(out_tensors_[4]->MutableData(), save_variance_, out_tensors_[3]->Size()); | |||
| memcpy(mean_, mean, in_tensors_[3]->Size()); | |||
| memcpy(variance_, var, in_tensors_[4]->Size()); | |||
| float *offset = static_cast<float *>(in_tensors_[2]->MutableData()); | |||
| float *current_mean = static_cast<float *>(mean_); | |||
| float *current_var = static_cast<float *>(variance_); | |||
| float *save_mean = static_cast<float *>(in_tensors_[3]->MutableData()); | |||
| float *save_variance = static_cast<float *>(in_tensors_[4]->MutableData()); | |||
| std::fill(current_mean, current_mean + in_tensors_[3]->ElementsNum(), 0.f); | |||
| std::fill(current_var, current_var + in_tensors_[4]->ElementsNum(), 0.f); | |||
| FusedBatchNormFp32MeanVar(in, current_mean, current_var, param, static_cast<float *>(save_mean), | |||
| static_cast<float *>(save_variance)); | |||
| memcpy(out_tensors_[1]->MutableData(), scale, out_tensors_[1]->Size()); | |||
| memcpy(out_tensors_[2]->MutableData(), offset, out_tensors_[2]->Size()); | |||
| memcpy(out_tensors_[3]->MutableData(), current_mean, out_tensors_[3]->Size()); | |||
| memcpy(out_tensors_[4]->MutableData(), current_var, out_tensors_[4]->Size()); | |||
| // Copy to local variables | |||
| memcpy(scale_, scale, in_tensors_[1]->Size()); | |||
| memcpy(offset_, bias, in_tensors_[2]->Size()); | |||
| memcpy(offset_, offset, in_tensors_[2]->Size()); | |||
| // save for next iteration | |||
| memcpy(in_tensors_[3]->MutableData(), save_mean, in_tensors_[3]->Size()); | |||
| memcpy(in_tensors_[4]->MutableData(), save_variance, in_tensors_[4]->Size()); | |||
| trained_ = true; // trained at least once | |||
| } | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||
| } | |||
| return ret; | |||
| } | |||
| void FusedBatchnormCPUKernel::eval() { | |||
| LiteKernel::eval(); | |||
| int FusedBatchnormCPUKernel::Eval() { | |||
| LiteKernel::Eval(); | |||
| if (trained_) { | |||
| float *run_mean = static_cast<float *>(in_tensors_[3]->MutableData()); | |||
| float *run_var = static_cast<float *>(in_tensors_[4]->MutableData()); | |||
| float *save_mean = static_cast<float *>(in_tensors_[3]->MutableData()); | |||
| float *save_var = static_cast<float *>(in_tensors_[4]->MutableData()); | |||
| float *scale = static_cast<float *>(in_tensors_[1]->MutableData()); | |||
| float *bias = static_cast<float *>(in_tensors_[2]->MutableData()); | |||
| // Copy to input tensors for Model export | |||
| memcpy(run_mean, save_mean_, in_tensors_[3]->Size()); | |||
| memcpy(run_var, save_variance_, in_tensors_[4]->Size()); | |||
| // Copy to local variables | |||
| memcpy(mean_, run_mean, in_tensors_[3]->Size()); | |||
| memcpy(variance_, run_var, in_tensors_[4]->Size()); | |||
| memcpy(scale_, scale, in_tensors_[1]->Size()); | |||
| memcpy(offset_, bias, in_tensors_[2]->Size()); | |||
| memcpy(mean_, save_mean, in_tensors_[3]->Size()); | |||
| memcpy(variance_, save_var, in_tensors_[4]->Size()); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::DoExecute(int task_id) { | |||
| @@ -29,7 +29,7 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel { | |||
| : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); } | |||
| void eval() override; | |||
| int Eval() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int InitConstTensor() override; | |||
| @@ -39,8 +39,6 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel { | |||
| void FreeScaleAndOffset(); | |||
| void *scale_ = nullptr; | |||
| void *offset_ = nullptr; | |||
| void *save_mean_ = nullptr; | |||
| void *save_variance_ = nullptr; | |||
| bool trained_ = false; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -140,10 +140,12 @@ int MatmulCPUKernel::InitBias() { | |||
| : (c_shape[c_shape.size() - 1]); | |||
| params_->col_8_ = UP_ROUND(params_->col_, 8); | |||
| auto col_tmp = is_vector_a_ ? params_->col_ : params_->col_8_; | |||
| bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float))); | |||
| if (bias_ptr_ == nullptr) { | |||
| FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float))); | |||
| if (bias_ptr_ == nullptr) { | |||
| FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| } | |||
| memset(bias_ptr_, 0, col_tmp * sizeof(float)); | |||
| if (in_tensors_.size() == 3) { | |||
| @@ -154,6 +156,8 @@ int MatmulCPUKernel::InitBias() { | |||
| int MatmulCPUKernel::ReSize() { | |||
| if (!params_->b_const_) { | |||
| free(bias_ptr_); | |||
| bias_ptr_ = nullptr; | |||
| auto ret = InitBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp32 init bias failed"; | |||
| @@ -277,7 +281,7 @@ int MatmulCPUKernel::Run() { | |||
| auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c()); | |||
| auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c()); | |||
| if (!params_->a_const_ || is_train()) { | |||
| if (!params_->a_const_ || IsTrain()) { | |||
| if (a_pack_ptr_ != nullptr) { | |||
| params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_); | |||
| a_pack_ptr_ = nullptr; | |||
| @@ -294,7 +298,7 @@ int MatmulCPUKernel::Run() { | |||
| a_ptr_ = a_pack_ptr_; | |||
| } | |||
| } | |||
| if (!params_->b_const_ || is_train()) { | |||
| if (!params_->b_const_ || IsTrain()) { | |||
| if (b_pack_ptr_ != nullptr) { | |||
| params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_); | |||
| b_pack_ptr_ = nullptr; | |||
| @@ -311,7 +315,9 @@ int MatmulCPUKernel::Run() { | |||
| b_ptr_ = b_pack_ptr_; | |||
| } | |||
| } | |||
| if (IsTrain()) { | |||
| InitBias(); | |||
| } | |||
| for (int i = 0; i < params_->batch; ++i) { | |||
| if (is_vector_a_) { | |||
| cur_a_ptr_ = a_ptr_ + i * params_->deep_; | |||
| @@ -329,26 +335,54 @@ int MatmulCPUKernel::Run() { | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (!params_->a_const_ || is_train()) { | |||
| context_->allocator->Free(a_pack_ptr_); | |||
| if (!params_->a_const_ || IsTrain()) { | |||
| params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_); | |||
| a_pack_ptr_ = nullptr; | |||
| } | |||
| if (!params_->b_const_ || is_train()) { | |||
| context_->allocator->Free(b_pack_ptr_); | |||
| if (!params_->b_const_ || IsTrain()) { | |||
| params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_); | |||
| b_pack_ptr_ = nullptr; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void MatmulCPUKernel::eval() { | |||
| int MatmulCPUKernel::Eval() { | |||
| // Copy weights after training | |||
| LiteKernel::eval(); | |||
| auto a_src = reinterpret_cast<float *>(in_tensors_[0]->data_c()); | |||
| auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c()); | |||
| LiteKernel::Eval(); | |||
| if (params_->a_const_) { | |||
| InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->MutableData()), a_pack_ptr_); | |||
| if (a_pack_ptr_ == nullptr) { | |||
| auto ret = MallocMatrixABuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp32 malloc matrix a buffer failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (is_vector_a_) { | |||
| a_ptr_ = a_src; | |||
| } else { | |||
| InitMatrixA(a_src, a_pack_ptr_); | |||
| a_ptr_ = a_pack_ptr_; | |||
| } | |||
| } | |||
| if (params_->b_const_) { | |||
| InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->MutableData()), b_pack_ptr_); | |||
| if (b_pack_ptr_ == nullptr) { | |||
| auto ret = MallocMatrixBBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp32 malloc matrix b buffer failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (is_vector_a_ && params_->b_transpose_) { | |||
| b_ptr_ = b_src; | |||
| } else { | |||
| InitMatrixB(b_src, b_pack_ptr_); | |||
| b_ptr_ = b_pack_ptr_; | |||
| } | |||
| } | |||
| InitBias(); | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuMatmulFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -34,7 +34,7 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel { | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| void eval() override; | |||
| int Eval() override; | |||
| private: | |||
| int MallocMatrixABuffer(); | |||
| @@ -214,5 +214,5 @@ kernel::LiteKernel *CpuOneHotFp32KernelCreator(const std::vector<lite::Tensor *> | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_OneHot, CpuOneHotFp32KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_OneHot, CpuOneHotFp32KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -45,24 +45,19 @@ int AdamCPUKernel::Execute(int task_id) { | |||
| auto eps = reinterpret_cast<float *>(in_tensors_[8]->MutableData())[0]; | |||
| auto gradient = reinterpret_cast<float *>(in_tensors_[9]->MutableData()); | |||
| size_t elem_num = in_tensors_[0]->ElementsNum(); | |||
| auto update_lr = learning_rate * std::sqrt(1 - beta2_power) / (1 - beta1_power); | |||
| if (adam_param_->use_nesterov_) { // Nadam | |||
| for (size_t i = 0; i < elem_num; ++i) { | |||
| m[i] = (m[i] * beta1) + (gradient[i] * (1.f - beta1)); | |||
| v[i] = (v[i] * beta2) + (gradient[i] * gradient[i] * (1.f - beta2)); | |||
| auto g_hat = gradient[i] / (1 - beta1_power); | |||
| auto m_hat = m[i] / (1 - beta1_power); | |||
| auto v_hat = v[i] / (1 - beta2_power); | |||
| auto m_tag = (1.f - beta1) * g_hat + beta1 * m_hat; | |||
| weight[i] -= learning_rate * m_tag / (sqrtf(v_hat) + eps); | |||
| m[i] += (gradient[i] - m[i]) * (1 - beta1); | |||
| v[i] += (gradient[i] * gradient[i] - v[i]) * (1 - beta2); | |||
| weight[i] -= update_lr * (m[i] * beta1 + (1 - beta1) * gradient[i]) / (std::sqrt(v[i]) + eps); | |||
| } | |||
| } else { | |||
| for (size_t i = 0; i < elem_num; ++i) { | |||
| m[i] = (m[i] * beta1) + (gradient[i] * (1.f - beta1)); | |||
| v[i] = (v[i] * beta2) + (gradient[i] * gradient[i] * (1.f - beta2)); | |||
| auto m_hat = m[i] / (1 - beta1_power); | |||
| auto v_hat = v[i] / (1 - beta2_power); | |||
| weight[i] -= learning_rate * m_hat / (sqrtf(v_hat) + eps); | |||
| m[i] += (gradient[i] - m[i]) * (1 - beta1); | |||
| v[i] += (gradient[i] * gradient[i] - v[i]) * (1 - beta2); | |||
| weight[i] -= update_lr * m[i] / (std::sqrt(v[i]) + eps); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -177,6 +177,28 @@ void ArithmeticGradCPUKernel::ArithmeticGradDiv2L(float *dy, int dy_size, float | |||
| ElementDivNegSquare(tile_data2, x2_data, dx2, dy_size); | |||
| } | |||
| void ArithmeticGradCPUKernel::ArithmeticGradMaximum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, | |||
| int dx2_size) { | |||
| // For some reason, input order is x0, x1, dy | |||
| auto x1 = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | |||
| auto x2 = reinterpret_cast<float *>(in_tensors_[1]->MutableData()); | |||
| dy = reinterpret_cast<float *>(in_tensors_[2]->MutableData()); | |||
| MaximumByAxes(x1, x2, dy, arithmeticParameter_->in_shape0_, arithmeticParameter_->in_shape1_, | |||
| arithmeticParameter_->out_shape_, dx1, dx2, arithmeticParameter_->ndim_); | |||
| } | |||
| void ArithmeticGradCPUKernel::ArithmeticGradMinimum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, | |||
| int dx2_size) { | |||
| // For some reason, input order is x0, x1, dy | |||
| auto x1 = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | |||
| auto x2 = reinterpret_cast<float *>(in_tensors_[1]->MutableData()); | |||
| dy = reinterpret_cast<float *>(in_tensors_[2]->MutableData()); | |||
| MinimumByAxes(x1, x2, dy, arithmeticParameter_->out_shape_, arithmeticParameter_->in_shape0_, | |||
| arithmeticParameter_->in_shape1_, dx1, dx2, arithmeticParameter_->ndim_); | |||
| } | |||
| int ArithmeticGradCPUKernel::ReSize() { return RET_OK; } | |||
| int ArithmeticGradCPUKernel::Execute(int task_id) { | |||
| @@ -240,4 +262,6 @@ REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulGrad, CpuArithmeticGradFp3 | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_AddGrad, CpuArithmeticGradFp32KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SubGrad, CpuArithmeticGradFp32KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DivGrad, CpuArithmeticGradFp32KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MaximumGrad, CpuArithmeticGradFp32KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MinimumGrad, CpuArithmeticGradFp32KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -24,6 +24,8 @@ | |||
| using mindspore::schema::PrimitiveType_AddGrad; | |||
| using mindspore::schema::PrimitiveType_DivGrad; | |||
| using mindspore::schema::PrimitiveType_MaximumGrad; | |||
| using mindspore::schema::PrimitiveType_MinimumGrad; | |||
| using mindspore::schema::PrimitiveType_MulGrad; | |||
| using mindspore::schema::PrimitiveType_SubGrad; | |||
| @@ -52,6 +54,12 @@ class ArithmeticGradCPUKernel : public LiteKernel { | |||
| case PrimitiveType_DivGrad: | |||
| arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv; // this will be adjusted in InferShape | |||
| break; | |||
| case PrimitiveType_MaximumGrad: | |||
| arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMaximum; | |||
| break; | |||
| case PrimitiveType_MinimumGrad: | |||
| arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMinimum; | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "Error Operator type " << parameter->type_; | |||
| break; | |||
| @@ -79,6 +87,8 @@ class ArithmeticGradCPUKernel : public LiteKernel { | |||
| void ArithmeticGradDiv(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); | |||
| void ArithmeticGradDiv1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); | |||
| void ArithmeticGradDiv2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); | |||
| void ArithmeticGradMaximum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); | |||
| void ArithmeticGradMinimum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); | |||
| ArithmeticParameter *arithmeticParameter_; | |||
| ArithmeticGradOperation arithmetic_grad_; | |||
| float *tile_data0; | |||
| @@ -15,6 +15,7 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32_grad/bn_grad.h" | |||
| #include <math.h> | |||
| #include <algorithm> | |||
| #include <vector> | |||
| #include "schema/model_generated.h" | |||
| @@ -34,7 +35,7 @@ namespace mindspore::kernel { | |||
| int BNGradCPUKernel::Init() { | |||
| auto *input_x = in_tensors_.at(1); | |||
| int channels = input_x->shape().at(kNHWC_C); | |||
| SetWorkspaceSize(4 * channels * sizeof(float)); | |||
| SetWorkspaceSize(2 * channels * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| @@ -45,19 +46,23 @@ int BNGradCPUKernel::Execute(int task_id) { | |||
| auto *input_yt = in_tensors_.at(0); | |||
| auto *input_x = in_tensors_.at(1); | |||
| auto *input_scale = in_tensors_.at(2); | |||
| auto *input_mean = in_tensors_.at(3); | |||
| auto *input_var = in_tensors_.at(4); | |||
| float *save_mean = reinterpret_cast<float *>(input_mean->MutableData()); | |||
| float *save_var = reinterpret_cast<float *>(input_var->MutableData()); | |||
| auto *output_dx = out_tensors_.at(0); | |||
| auto *output_scale = out_tensors_.at(1); | |||
| auto *output_bias = out_tensors_.at(2); | |||
| int batch = input_x->Batch(); | |||
| int channels = input_x->Channel(); | |||
| int spatial = input_x->Height() * input_x->Width(); | |||
| size_t batch = input_x->Batch(); | |||
| size_t channels = input_x->Channel(); | |||
| size_t spatial = input_x->Height() * input_x->Width(); | |||
| float eps = bn_param->epsilon_; | |||
| float *workspace = static_cast<float *>(GetWorkspace()); | |||
| std::fill(workspace, workspace + GetWorkspaceSize() / sizeof(*workspace), 0.f); | |||
| float *mean = workspace; | |||
| float *invar = mean + channels; | |||
| float *dxhat_sum = invar + channels; | |||
| float *dxhat_sum = workspace; | |||
| float *dxhathat_sum = dxhat_sum + channels; | |||
| float *x = reinterpret_cast<float *>(input_x->MutableData()); | |||
| @@ -67,11 +72,14 @@ int BNGradCPUKernel::Execute(int task_id) { | |||
| float *dscale = reinterpret_cast<float *>(output_scale->MutableData()); | |||
| float *dbias = reinterpret_cast<float *>(output_bias->MutableData()); | |||
| backwardX(x, yt, scale, batch * spatial, channels, eps, mean, invar, dxhat_sum, dxhathat_sum, dx); | |||
| var2Invar(save_var, input_var->ElementsNum(), eps); | |||
| // dx | |||
| backwardX(x, yt, scale, batch * spatial, channels, save_mean, save_var, dxhat_sum, dxhathat_sum, dx); | |||
| // dbias | |||
| sumSpatialBatch(yt, batch * spatial, channels, dbias); | |||
| // dscale | |||
| backwardScale(x, mean, invar, yt, batch, channels, spatial, dscale); | |||
| backwardScale(x, save_mean, save_var, yt, batch, channels, spatial, dscale); | |||
| return RET_OK; | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include "nnacl/fp32_grad/gemm.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "nnacl/pack.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::RET_ERROR; | |||
| @@ -26,8 +27,8 @@ using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| int ConvolutionTrainCPUKernel::Init() { | |||
| if (2 != in_tensors_.size()) { | |||
| MS_LOG(ERROR) << "Convolution should have two inputs"; | |||
| if (2 > in_tensors_.size()) { | |||
| MS_LOG(ERROR) << "Convolution should have at least two inputs"; | |||
| return RET_ERROR; | |||
| } | |||
| if (1 != out_tensors_.size()) { | |||
| @@ -51,11 +52,11 @@ int ConvolutionTrainCPUKernel::Init() { | |||
| conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W); | |||
| conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_; | |||
| int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * | |||
| conv_param_->input_channel_ / conv_param_->group_; | |||
| SetWorkspaceSize(ws_size * sizeof(float)); | |||
| const int n = conv_param_->output_channel_ * conv_param_->group_; | |||
| const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_; | |||
| ws_size = chunk * k; | |||
| int mat_alloc = MatSizeTotal(chunk, n, k, 0); | |||
| SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| @@ -71,36 +72,35 @@ int ConvolutionTrainCPUKernel::Execute(int task_id) { | |||
| auto y_addr = reinterpret_cast<float *>(out_y->MutableData()); | |||
| auto w_addr = reinterpret_cast<float *>(input_w->MutableData()); | |||
| int i, j; | |||
| int nweights = input_w->ElementsNum(); | |||
| int in_ch = conv_param_->input_channel_; | |||
| int in_h = conv_param_->input_h_; | |||
| int in_w = conv_param_->input_w_; | |||
| int k_h = conv_param_->kernel_h_; | |||
| int k_w = conv_param_->kernel_w_; | |||
| int batch = conv_param_->output_batch_; | |||
| int out_ch = conv_param_->output_channel_; // out_y->shape()[3]; | |||
| int groups = conv_param_->group_; | |||
| int out_h = conv_param_->output_h_; | |||
| int out_w = conv_param_->output_w_; | |||
| int m = out_h * out_w; | |||
| int n = out_ch / groups; | |||
| int k = k_h * k_w * in_ch / groups; | |||
| const int nweights = input_w->ElementsNum(); | |||
| const int in_ch = conv_param_->input_channel_; | |||
| const int in_h = conv_param_->input_h_; | |||
| const int in_w = conv_param_->input_w_; | |||
| const int k_h = conv_param_->kernel_h_; | |||
| const int k_w = conv_param_->kernel_w_; | |||
| const int batch = conv_param_->output_batch_; | |||
| const int out_ch = conv_param_->output_channel_; // out_y->shape()[3]; | |||
| const int groups = conv_param_->group_; | |||
| const int out_h = conv_param_->output_h_; | |||
| const int out_w = conv_param_->output_w_; | |||
| const int m = out_h * out_w; | |||
| const int n = out_ch / groups; | |||
| const int k = k_h * k_w * in_ch / groups; | |||
| float *workspace = static_cast<float *>(GetWorkspace()); | |||
| memset(y_addr, 0, out_y->Size()); | |||
| for (i = 0; i < batch; ++i) { | |||
| for (j = 0; j < groups; ++j) { | |||
| float *mat_a = workspace; | |||
| float *mat_b = w_addr + j * nweights / groups; | |||
| float *mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups); | |||
| float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups); | |||
| im2col_hwc(im, mat_a, conv_param_); | |||
| gemm(0, 1, m, n, k, 1, mat_a, k, mat_b, k, 1, mat_c, out_ch); | |||
| float *mat_workspace = workspace + ws_size; | |||
| for (int i = 0; i < batch; ++i) { | |||
| for (int j = 0; j < groups; ++j) { | |||
| for (int ci = 0; ci < m; ci += chunk) { | |||
| int real_chunk = MSMIN(m - ci, chunk); | |||
| float *mat_a = workspace; | |||
| const float *mat_b = w_addr + j * nweights / groups; | |||
| float *mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch; | |||
| float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups); | |||
| RollingIm2ColPackUnitFp32(im, conv_param_, mat_a, real_chunk, ci); | |||
| GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a, k, mat_b, k, 0, mat_c, out_ch, mat_workspace); | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -33,6 +33,14 @@ class ConvolutionTrainCPUKernel : public LiteKernel { | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int Execute(int task_id); | |||
| private: | |||
| int ws_size = 0; | |||
| #ifdef ENABLE_ARM32 | |||
| const int chunk = C4NUM; | |||
| #else | |||
| const int chunk = C12NUM; | |||
| #endif | |||
| }; | |||
| kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -51,10 +51,12 @@ int ConvolutionGradFilterCPUKernel::Init() { | |||
| conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; | |||
| conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; | |||
| size_t ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * | |||
| conv_param->input_channel_ / conv_param->group_; | |||
| ws_size = chunk * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_; | |||
| SetWorkspaceSize(ws_size * sizeof(float)); | |||
| int n = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_; | |||
| int k = conv_param->output_channel_ / conv_param->group_; | |||
| size_t mat_alloc = MatSizeTotal(k, n, chunk, n); | |||
| SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| @@ -88,19 +90,21 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) { | |||
| int k = out_ch / groups; | |||
| float *workspace = reinterpret_cast<float *>(GetWorkspace()); | |||
| float *mat_workspace = workspace + ws_size; | |||
| // zero out pointer | |||
| memset(dw_addr, 0, out_dw->Size()); | |||
| for (i = 0; i < batch; ++i) { | |||
| for (j = 0; j < groups; ++j) { | |||
| float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups); | |||
| float *mat_b = workspace; | |||
| float *mat_c = dw_addr + j * nweights / groups; | |||
| float *im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups); | |||
| im2row_hwc(im, mat_b, conv_param, false); | |||
| gemm(1, 1, k, n, m, 1, mat_a, out_ch, mat_b, m, 1, mat_c, n); | |||
| for (int ci = 0; ci < m; ci += chunk) { | |||
| int real_chunk = MSMIN(m - ci, chunk); | |||
| float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch; | |||
| float *mat_b = workspace; | |||
| float *mat_c = dw_addr + j * nweights / groups; | |||
| float *im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups); | |||
| memset(mat_b, 0, n * real_chunk * sizeof(float)); | |||
| RollingIm2ColPackUnitFp32(im, conv_param, mat_b, real_chunk, ci); | |||
| GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, mat_b, n, 1, mat_c, n, mat_workspace); | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -34,6 +34,14 @@ class ConvolutionGradFilterCPUKernel : public LiteKernel { | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int Execute(int task_id); | |||
| private: | |||
| size_t ws_size = 0; | |||
| #ifdef ENABLE_ARM32 | |||
| const int chunk = C4NUM; | |||
| #else | |||
| const int chunk = C12NUM; | |||
| #endif | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -51,11 +51,14 @@ int ConvolutionGradInputCPUKernel::Init() { | |||
| conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; | |||
| conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; | |||
| ws_size = chunk * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_; | |||
| size_t ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * | |||
| conv_param->input_channel_ / conv_param->group_; | |||
| int n = conv_param->kernel_w_ * conv_param->kernel_h_ * conv_param->input_channel_ / conv_param->group_; | |||
| int k = conv_param->output_channel_ / conv_param->group_; | |||
| SetWorkspaceSize(ws_size * sizeof(float)); | |||
| size_t mat_alloc = MatSizeTotal(chunk, n, k, 0); | |||
| SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| @@ -88,16 +91,30 @@ int ConvolutionGradInputCPUKernel::Execute(int task_id) { | |||
| int n = k_w * k_h * in_ch / groups; | |||
| int k = out_ch / groups; | |||
| float *workspace = reinterpret_cast<float *>(GetWorkspace()); | |||
| float *mat_workspace = workspace + ws_size; | |||
| memset(dx_addr, 0, sizeof(float) * batch * in_ch * in_h * in_w); | |||
| for (i = 0; i < batch; ++i) { | |||
| for (j = 0; j < groups; ++j) { | |||
| float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups); | |||
| float *mat_b = w_addr + j * nweights / groups; | |||
| float *mat_c = workspace; | |||
| gemm(0, 0, m, n, k, 1, mat_a, out_ch, mat_b, n, 0, mat_c, n); | |||
| col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), conv_param); | |||
| GemmCb gcb; | |||
| for (int ci = 0; ci < m; ci += chunk) { | |||
| float *mat_b; | |||
| if (ci == 0) { | |||
| mat_b = w_addr + j * nweights / groups; | |||
| gcb.ca = 0; | |||
| gcb.cb = 0; | |||
| gcb.bias = nullptr; | |||
| gcb.atype = ActType_No; | |||
| } else { | |||
| mat_b = gcb.mat_b; | |||
| gcb.cb = 1; | |||
| } | |||
| int real_chunk = MSMIN(m - ci, chunk); | |||
| float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch; | |||
| float *mat_c = workspace; | |||
| GemmMatmulPlus(0, 0, real_chunk, n, k, 1, mat_a, out_ch, mat_b, n, 0, mat_c, n, mat_workspace, &gcb); | |||
| rolling_col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), | |||
| conv_param, real_chunk, ci); | |||
| } | |||
| } | |||
| } | |||
| @@ -33,6 +33,14 @@ class ConvolutionGradInputCPUKernel : public LiteKernel { | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int Execute(int task_id); | |||
| private: | |||
| size_t ws_size = 0; | |||
| #ifdef ENABLE_ARM32 | |||
| const int chunk = C4NUM; | |||
| #else | |||
| const int chunk = C12NUM; | |||
| #endif | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -50,10 +50,14 @@ int DeConvolutionGradFilterCPUKernel::Init() { | |||
| conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; | |||
| conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; | |||
| int ws_size = conv_param->input_h_ * conv_param->input_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * | |||
| conv_param->output_channel_ / conv_param->group_; | |||
| ws_size = chunk * conv_param->input_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->output_channel_ / | |||
| conv_param->group_; | |||
| SetWorkspaceSize(ws_size * sizeof(float)); | |||
| int m = conv_param->input_channel_ / conv_param->group_; | |||
| int n = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->output_channel_ / conv_param->group_; | |||
| size_t mat_alloc = MatSizeTotal(n, m, chunk * conv_param->input_w_, conv_param->input_channel_); | |||
| SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| @@ -82,21 +86,25 @@ int DeConvolutionGradFilterCPUKernel::Execute(int task_id) { | |||
| int out_h = conv_param->output_h_; | |||
| int out_w = conv_param->output_w_; | |||
| int m = in_ch / groups; | |||
| int n = k_h * k_w * out_ch / groups; | |||
| int k = in_h * in_w; | |||
| const int m = in_ch / groups; | |||
| const int n = k_h * k_w * out_ch / groups; | |||
| float *workspace = reinterpret_cast<float *>(GetWorkspace()); | |||
| float *mat_workspace = workspace + ws_size; | |||
| // zero out pointer | |||
| memset(dw_addr, 0, out_dw->Size()); | |||
| for (i = 0; i < batch; ++i) { | |||
| for (j = 0; j < groups; ++j) { | |||
| float *mat_a = x_addr + (i * (in_ch * in_h * in_w) + j * (in_ch / groups)); | |||
| float *mat_b = workspace; | |||
| float *mat_c = dw_addr + j * m; | |||
| float *im = dy_addr + (i * (out_h * out_w * out_ch) + j * (out_ch / groups)); | |||
| im2row_hwc(im, mat_b, conv_param, true); | |||
| gemm(0, 0, n, m, k, 1, mat_b, k, mat_a, in_ch, 1, mat_c, in_ch); | |||
| for (int ci = 0; ci < in_h; ci += chunk) { | |||
| int real_chunk = MSMIN(in_h - ci, chunk); | |||
| float *mat_a = x_addr + (i * (in_ch * in_h * in_w) + j * (in_ch / groups)) + ci * in_w * in_ch; | |||
| float *mat_b = workspace; | |||
| float *mat_c = dw_addr + j * m; | |||
| float *im = dy_addr + (i * (out_h * out_w * out_ch) + j * (out_ch / groups)); | |||
| rolling_im2row_hwc(im, mat_b, conv_param, real_chunk, ci); | |||
| GemmMatmul(0, 0, n, m, real_chunk * in_w, 1, mat_b, real_chunk * in_w, mat_a, in_ch, 1, mat_c, in_ch, | |||
| mat_workspace); | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -33,6 +33,10 @@ class DeConvolutionGradFilterCPUKernel : public LiteKernel { | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int Execute(int task_id); | |||
| private: | |||
| size_t ws_size = 0; | |||
| const int chunk = 1; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,131 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <random> | |||
| #include <algorithm> | |||
| #include "src/runtime/kernel/arm/fp32_grad/dropout.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| #include "nnacl/fp32_grad/dropout_parameter.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_NULL_PTR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Dropout; | |||
| namespace mindspore::kernel { | |||
| int DropoutCPUKernel::Init() { | |||
| auto param = reinterpret_cast<DropoutParameter *>(op_parameter_); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "Dropout op_parameter_ nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| if ((param->ratio_ > 1.0f) || (param->ratio_ < 0.0f)) { | |||
| MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one"; | |||
| return RET_ERROR; | |||
| } | |||
| if (param->ratio_ >= 1.0f) { | |||
| scale_ = 1.0f; | |||
| } else { | |||
| scale_ = 1. / (1. - param->ratio_); | |||
| } | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int DropoutCPUKernel::ReSize() { return RET_OK; } | |||
| int DropoutCPUKernel::Execute(int task_id) { | |||
| auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData()); | |||
| auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()); | |||
| auto mask = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); | |||
| auto length = in_tensors_.at(kInputIndex)->ElementsNum(); | |||
| auto param = reinterpret_cast<DropoutParameter *>(op_parameter_); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "Dropout op_parameter_ nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| if (IsEval()) { | |||
| std::copy(input_ptr, input_ptr + length, output_ptr); | |||
| } else { | |||
| std::default_random_engine generator; | |||
| std::bernoulli_distribution distribution(param->ratio_); | |||
| for (int i = 0; i < length; i++) { | |||
| mask[i] = distribution(generator); | |||
| output_ptr[i] = input_ptr[i] * mask[i] * scale_; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int RunDropout(void *cdata, int task_id) { | |||
| auto dropout = reinterpret_cast<DropoutCPUKernel *>(cdata); | |||
| auto error_code = dropout->Execute(task_id); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Dropout Run error task_id[" << task_id << "] error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DropoutCPUKernel::Run() { | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, RunDropout, this, 1); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Dropout function error error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuDropoutFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Dropout opParameter nullptr."; | |||
| return nullptr; | |||
| } | |||
| if (desc.type != schema::PrimitiveType_Dropout) { | |||
| MS_LOG(ERROR) << "Dropout desc type should be " << schema::PrimitiveType_Dropout << " got " << desc.type; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) DropoutCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "Dropout new kernel failed."; | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete kernel; | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Dropout, CpuDropoutFp32KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| namespace mindspore::kernel { | |||
| class DropoutCPUKernel : public LiteKernel { | |||
| public: | |||
| DropoutCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~DropoutCPUKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int Execute(int task_id); | |||
| private: | |||
| float scale_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_ | |||
| @@ -0,0 +1,118 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <random> | |||
| #include "src/runtime/kernel/arm/fp32_grad/dropout_grad.h" | |||
| #include "nnacl/fp32_grad/dropout_grad.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| #include "nnacl/fp32_grad/dropout_parameter.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_NULL_PTR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DropoutGrad; | |||
| namespace mindspore::kernel { | |||
| int DropoutGradCPUKernel::Init() { | |||
| auto param = reinterpret_cast<DropoutParameter *>(op_parameter_); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "Dropout op_parameter_ nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| if ((param->ratio_ > 1.0f) || (param->ratio_ < 0.0f)) { | |||
| MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one"; | |||
| return RET_ERROR; | |||
| } | |||
| if (param->ratio_ >= 1.0f) { | |||
| scale_ = 1.0f; | |||
| } else { | |||
| scale_ = 1. / (1. - param->ratio_); | |||
| } | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int DropoutGradCPUKernel::ReSize() { return RET_OK; } | |||
| int DropoutGradCPUKernel::Execute(int task_id) { | |||
| auto yt_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData()); | |||
| auto mask_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()); | |||
| auto length = in_tensors_.at(kInputIndex)->ElementsNum(); | |||
| DropoutGrad(yt_ptr, mask_ptr, output_ptr, length, scale_); | |||
| return RET_OK; | |||
| } | |||
| int RunDropoutGrad(void *cdata, int task_id) { | |||
| auto dropout = reinterpret_cast<DropoutGradCPUKernel *>(cdata); | |||
| auto error_code = dropout->Execute(task_id); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Dropout Grad Run error task_id[" << task_id << "] error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DropoutGradCPUKernel::Run() { | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, RunDropoutGrad, this, 1); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Dropout Grad function error error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuDropoutGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::InnerContext *ctx, | |||
| const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "DropoutGrad opParameter nullptr."; | |||
| return nullptr; | |||
| } | |||
| if (desc.type != schema::PrimitiveType_DropoutGrad) { | |||
| MS_LOG(ERROR) << "DropoutGrad desc type should be " << schema::PrimitiveType_DropoutGrad << " got " << desc.type; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) DropoutGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "DropoutGrad new kernel failed."; | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete kernel; | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DropoutGrad, CpuDropoutGradFp32KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| namespace mindspore::kernel { | |||
| class DropoutGradCPUKernel : public LiteKernel { | |||
| public: | |||
| DropoutGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~DropoutGradCPUKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int Execute(int task_id); | |||
| private: | |||
| float scale_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_ | |||
| @@ -41,8 +41,7 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab | |||
| float logit = | |||
| -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]); | |||
| grads[i * param_->number_of_classes_ + j] = | |||
| (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j]) / | |||
| param_->batch_size_; | |||
| (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j]); | |||
| total_loss += labels[i * param_->number_of_classes_ + j] * logit; | |||
| } | |||
| } | |||
| @@ -63,7 +62,7 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) { | |||
| auto labels = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| float *out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| float *grads = NULL; | |||
| if (is_train() && out_tensors_.size() > 1) { | |||
| if (IsTrain() && out_tensors_.size() > 1) { | |||
| grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); | |||
| } | |||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | |||
| @@ -86,7 +86,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) { | |||
| auto labels = reinterpret_cast<int *>(in_tensors_.at(1)->data_c()); | |||
| float *out = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | |||
| float *grads = NULL; | |||
| if (is_train() && out_tensors_.size() > 1) { | |||
| if (IsTrain() && out_tensors_.size() > 1) { | |||
| grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); | |||
| } | |||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | |||
| @@ -99,7 +99,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) { | |||
| std::fill(losses_, losses_ + data_size, 0.f); | |||
| std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0.f); | |||
| Softmax(ins, losses_, sum_data_, &sm_params_); | |||
| if (is_train()) { | |||
| if (IsTrain()) { | |||
| GradPostExecute(labels, losses_, grads, out); | |||
| } else { | |||
| ForwardPostExecute(labels, losses_, out); | |||
| @@ -36,6 +36,10 @@ | |||
| #include "src/ops/bn_grad.h" | |||
| #include "nnacl/fp32_grad/batch_norm.h" | |||
| #include "src/ops/adam.h" | |||
| #include "nnacl/fp32_grad/dropout_parameter.h" | |||
| #include "src/ops/dropout.h" | |||
| #include "src/ops/dropout_grad.h" | |||
| #include "src/ops/arithmetic.h" | |||
| #include "src/ops/oneslike.h" | |||
| #include "src/ops/binary_cross_entropy.h" | |||
| #include "src/ops/binary_cross_entropy_grad.h" | |||
| @@ -399,10 +403,66 @@ OpParameter *PopulateBNGradParameter(const mindspore::lite::PrimitiveC *primitiv | |||
| bnGrad_param->op_parameter_.type_ = primitive->Type(); | |||
| auto bngrad = reinterpret_cast<mindspore::lite::BNGrad *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); | |||
| bnGrad_param->epsilon_ = bngrad->GetEps(); | |||
| bnGrad_param->momentum_ = 0.1; | |||
| bnGrad_param->momentum_ = bngrad->GetMomentum(); | |||
| return reinterpret_cast<OpParameter *>(bnGrad_param); | |||
| } | |||
| OpParameter *PopulateDropoutParameter(const mindspore::lite::PrimitiveC *primitive) { | |||
| DropoutParameter *dropout_parameter = reinterpret_cast<DropoutParameter *>(malloc(sizeof(DropoutParameter))); | |||
| if (dropout_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "malloc Dropout Parameter failed."; | |||
| return nullptr; | |||
| } | |||
| memset(dropout_parameter, 0, sizeof(DropoutParameter)); | |||
| dropout_parameter->op_parameter_.type_ = primitive->Type(); | |||
| auto param = reinterpret_cast<mindspore::lite::Dropout *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); | |||
| dropout_parameter->ratio_ = param->GetRatio(); | |||
| if (dropout_parameter->ratio_ < 0.f || dropout_parameter->ratio_ > 1.f) { | |||
| MS_LOG(ERROR) << "Dropout ratio must be between 0 to 1, got " << dropout_parameter->ratio_; | |||
| free(dropout_parameter); | |||
| return nullptr; | |||
| } | |||
| return reinterpret_cast<OpParameter *>(dropout_parameter); | |||
| } | |||
| OpParameter *PopulateDropoutGradParameter(const mindspore::lite::PrimitiveC *primitive) { | |||
| DropoutParameter *dropoutGrad_parameter = reinterpret_cast<DropoutParameter *>(malloc(sizeof(DropoutParameter))); | |||
| if (dropoutGrad_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "malloc Dropout Grad Parameter failed."; | |||
| return nullptr; | |||
| } | |||
| memset(dropoutGrad_parameter, 0, sizeof(DropoutParameter)); | |||
| dropoutGrad_parameter->op_parameter_.type_ = primitive->Type(); | |||
| auto param = reinterpret_cast<mindspore::lite::DropoutGrad *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); | |||
| dropoutGrad_parameter->ratio_ = param->GetRatio(); | |||
| if (dropoutGrad_parameter->ratio_ < 0.f || dropoutGrad_parameter->ratio_ > 1.f) { | |||
| MS_LOG(ERROR) << "Dropout Grad ratio must be between 0 to 1, got " << dropoutGrad_parameter->ratio_; | |||
| free(dropoutGrad_parameter); | |||
| return nullptr; | |||
| } | |||
| return reinterpret_cast<OpParameter *>(dropoutGrad_parameter); | |||
| } | |||
| OpParameter *PopulateArithmeticGradParameter(const mindspore::lite::PrimitiveC *primitive) { | |||
| ArithmeticParameter *arithmetic_param = reinterpret_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter))); | |||
| if (arithmetic_param == nullptr) { | |||
| MS_LOG(ERROR) << "malloc ArithmeticParameter failed."; | |||
| return nullptr; | |||
| } | |||
| memset(arithmetic_param, 0, sizeof(ArithmeticParameter)); | |||
| arithmetic_param->op_parameter_.type_ = primitive->Type(); | |||
| arithmetic_param->broadcasting_ = ((lite::Arithmetic *)primitive)->Broadcasting(); | |||
| arithmetic_param->ndim_ = ((lite::Arithmetic *)primitive)->NDims(); | |||
| auto tmp_shape = ((lite::Arithmetic *)primitive)->InShape0(); | |||
| memcpy(arithmetic_param->in_shape0_, static_cast<void *>(tmp_shape.data()), tmp_shape.size() * sizeof(int)); | |||
| tmp_shape = ((lite::Arithmetic *)primitive)->InShape1(); | |||
| memcpy(arithmetic_param->in_shape1_, static_cast<void *>(tmp_shape.data()), tmp_shape.size() * sizeof(int)); | |||
| tmp_shape = ((lite::Arithmetic *)primitive)->OutputShape(); | |||
| memcpy(arithmetic_param->out_shape_, static_cast<void *>(tmp_shape.data()), tmp_shape.size() * sizeof(int)); | |||
| return reinterpret_cast<OpParameter *>(arithmetic_param); | |||
| } | |||
| void PopulateTrainParameters() { | |||
| lite::Registry ApplyMomentumParameterRegistry(schema::PrimitiveType_ApplyMomentum, PopulateApplyMomentumParameter); | |||
| lite::Registry BiasGradParameterRegistry(schema::PrimitiveType_BiasGrad, PopulateBiasGradParameter); | |||
| @@ -430,6 +490,10 @@ void PopulateTrainParameters() { | |||
| lite::Registry OnesLikeParameterRegistry(schema::PrimitiveType_OnesLike, DefaultPopulateParameter); | |||
| lite::Registry UnsortedSegmentSumParameterRegistry(schema::PrimitiveType_UnsortedSegmentSum, | |||
| DefaultPopulateParameter); | |||
| lite::Registry DropoutParameterRegistry(schema::PrimitiveType_Dropout, PopulateDropoutParameter); | |||
| lite::Registry DropGradParameterRegistry(schema::PrimitiveType_DropoutGrad, PopulateDropoutGradParameter); | |||
| lite::Registry MaximumGradParameterRegistry(schema::PrimitiveType_MaximumGrad, PopulateArithmeticGradParameter); | |||
| lite::Registry MinimumGradParameterRegistry(schema::PrimitiveType_MinimumGrad, PopulateArithmeticGradParameter); | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -15,9 +15,12 @@ | |||
| */ | |||
| #include "src/train/train_session.h" | |||
| #include <sys/stat.h> | |||
| #include <algorithm> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <iostream> | |||
| #include <fstream> | |||
| #include "include/errorcode.h" | |||
| #include "include/train_model.h" | |||
| #include "src/common/utils.h" | |||
| @@ -98,6 +101,21 @@ int TrainSession::CompileTrainGraph(mindspore::lite::TrainModel *model) { | |||
| for (auto inTensor : inputs_) inTensor->MutableData(); | |||
| RestoreOps(restore); | |||
| AllocWorkSpace(); | |||
| MarkOptimizedKernels(); | |||
| CompileTrainKernels(); | |||
| if (train_mode_) { | |||
| auto ret1 = Train(); | |||
| if (ret1 != RET_OK) { | |||
| MS_LOG(ERROR) << "faild to initialize network in train mode"; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| auto ret1 = Eval(); | |||
| if (ret1 != RET_OK) { | |||
| MS_LOG(ERROR) << "faild to initialize network in eval mode"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return ret; | |||
| } | |||
| @@ -110,34 +128,67 @@ void *TrainSession::ExportToBuf(char *buf, size_t *len) const { return model_->E | |||
| int TrainSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) { | |||
| this->outputs_.clear(); | |||
| for (auto ms_tensors : output_node_map_) | |||
| for (auto ms_tensor : ms_tensors.second) this->outputs_.push_back((static_cast<lite::Tensor *>(ms_tensor))); | |||
| if (train_mode_) return lite::LiteSession::RunGraph(before, after); | |||
| // build out tensor | |||
| for (auto ms_tensors : output_node_map_) { | |||
| for (auto ms_tensor : ms_tensors.second) { | |||
| this->outputs_.push_back((static_cast<lite::Tensor *>(ms_tensor))); | |||
| } | |||
| } | |||
| if (this->context_ == nullptr) { | |||
| MS_LOG(ERROR) << "context is null"; | |||
| return lite::RET_NULL_PTR; | |||
| } | |||
| auto run_kernel = (train_mode_) ? train_kernels_ : inference_kernels_; | |||
| lite::Executor executor; | |||
| if (before == nullptr && after == nullptr) { | |||
| return executor.Run(this->inputs_, this->outputs_, inference_kernels_, this->context_->allocator.get()); | |||
| return executor.Run(this->inputs_, this->outputs_, run_kernel, this->context_->allocator.get()); | |||
| } else { | |||
| return executor.Run(this->inputs_, this->outputs_, inference_kernels_, this->context_->allocator.get(), before, | |||
| after); | |||
| return executor.Run(this->inputs_, this->outputs_, run_kernel, this->context_->allocator.get(), before, after); | |||
| } | |||
| } | |||
| void TrainSession::Train() { | |||
| int TrainSession::SaveToFile(const std::string &filename) const { | |||
| size_t fb_size = 0; | |||
| auto *buf = reinterpret_cast<char *>(ExportToBuf(nullptr, &fb_size)); | |||
| if (buf == NULL) { | |||
| MS_LOG(ERROR) << "Could not Export Trained model"; | |||
| return lite::RET_NULL_PTR; | |||
| } | |||
| std::ofstream ofs(filename); | |||
| if ((true != ofs.good()) || (true != ofs.is_open())) { | |||
| MS_LOG(ERROR) << "Could not open file \"" << filename << "\" for writing"; | |||
| free(buf); | |||
| return RET_ERROR; | |||
| } | |||
| ofs.seekp(0, std::ios::beg); | |||
| ofs.write(buf, fb_size); | |||
| ofs.close(); | |||
| free(buf); | |||
| return chmod(filename.c_str(), S_IRUSR); | |||
| } | |||
| int TrainSession::Train() { | |||
| for (auto ori_kernel : kernels_) { | |||
| MS_ASSERT(nullptr != ori_kernel); | |||
| if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { | |||
| ori_kernel->train(); | |||
| auto ret = ori_kernel->Train(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << ori_kernel->name() << " failed to set train mode"; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel); | |||
| MS_ASSERT(nullptr != sub_graph); | |||
| for (auto kernel : sub_graph->nodes()) { | |||
| MS_ASSERT(nullptr != kernel); | |||
| kernel->train(); | |||
| auto ret = kernel->Train(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << kernel->name() << " failed to set train mode"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -157,6 +208,7 @@ void TrainSession::Train() { | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void TrainSession::UpdateOutputMapByLossKernel(const kernel::LiteKernel *kernel) { | |||
| @@ -190,17 +242,25 @@ void TrainSession::UpdateOutputMapByInKernel(const kernel::LiteKernel *kernel) { | |||
| } | |||
| } | |||
| void TrainSession::Eval() { | |||
| int TrainSession::Eval() { | |||
| for (auto ori_kernel : kernels_) { | |||
| MS_ASSERT(nullptr != ori_kernel); | |||
| if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { | |||
| ori_kernel->eval(); | |||
| auto ret = ori_kernel->Eval(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << ori_kernel->name() << " failed to set eval mode"; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel); | |||
| MS_ASSERT(nullptr != sub_graph); | |||
| for (auto kernel : sub_graph->nodes()) { | |||
| MS_ASSERT(nullptr != kernel); | |||
| kernel->eval(); | |||
| auto ret = kernel->Eval(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << kernel->name() << " failed to set eval mode"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -221,6 +281,7 @@ void TrainSession::Eval() { | |||
| if (inference_kernels_.size() == 0) { | |||
| BuildInferenceKernelsMap(); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void TrainSession::BuildInferenceKernelsRecursive(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *v) { | |||
| @@ -234,24 +295,25 @@ void TrainSession::BuildInferenceKernelsRecursive(kernel::LiteKernel *kernel, st | |||
| void TrainSession::BuildInferenceKernelsMap() { | |||
| std::vector<kernel::LiteKernel *> req_kernels; | |||
| for (auto ori_kernel : kernels_) { | |||
| if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { | |||
| if (IsLossKernel(ori_kernel)) { // For each loss in the system add backward tree | |||
| for (auto in_node : ori_kernel->in_kernels()) { | |||
| for (auto kernel : this->kernels_) { | |||
| if (kernel->subgraph_type() == kernel::kNotSubGraph) { | |||
| if (IsLossKernel(kernel)) { // For each loss in the system add backward tree | |||
| for (auto in_node : kernel->in_kernels()) { | |||
| BuildInferenceKernelsRecursive(in_node, &req_kernels); | |||
| } | |||
| } | |||
| } else { | |||
| auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel); | |||
| for (auto kernel : sub_graph->nodes()) { | |||
| if (IsLossKernel(kernel)) { // For each loss in the system add backward tree | |||
| for (auto in_node : kernel->in_kernels()) { | |||
| auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel); | |||
| for (auto sb_kernel : sub_graph->nodes()) { | |||
| if (IsLossKernel(sb_kernel)) { // For each loss in the system add backward tree | |||
| for (auto in_node : sb_kernel->in_kernels()) { | |||
| BuildInferenceKernelsRecursive(in_node, &req_kernels); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| inference_kernels_.clear(); | |||
| for (auto ori_kernel : kernels_) { | |||
| if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { | |||
| @@ -272,10 +334,71 @@ void TrainSession::BuildInferenceKernelsMap() { | |||
| } | |||
| } | |||
| bool TrainSession::IsLossKernel(const kernel::LiteKernel *kernel) { | |||
| void TrainSession::CompileTrainKernels() { | |||
| train_kernels_.clear(); | |||
| for (auto ori_kernel : kernels_) { | |||
| if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { | |||
| train_kernels_.push_back(ori_kernel); | |||
| } else { | |||
| auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(ori_kernel); | |||
| for (auto kernel : sub_graph->nodes()) { | |||
| train_kernels_.push_back(kernel); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void TrainSession::MarkOptimizedKernels() { | |||
| std::vector<lite::Tensor *> ot; | |||
| for (auto kernel : this->kernels_) { | |||
| if (kernel->subgraph_type() == kernel::kNotSubGraph) { | |||
| if (IsOptimizer(kernel)) { | |||
| std::copy(kernel->in_tensors().begin(), kernel->in_tensors().end(), std::back_inserter(ot)); | |||
| } | |||
| } else { | |||
| auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel); | |||
| for (auto sb_kernel : sub_graph->nodes()) { | |||
| if (IsOptimizer(sb_kernel)) { | |||
| std::copy(sb_kernel->in_tensors().begin(), sb_kernel->in_tensors().end(), std::back_inserter(ot)); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| for (auto kernel : this->kernels_) { | |||
| if (kernel->subgraph_type() == kernel::kNotSubGraph) { | |||
| if (!IsOptimizer(kernel)) { | |||
| for (auto it : kernel->in_tensors()) { | |||
| if (std::find(ot.begin(), ot.end(), it) != ot.end()) { | |||
| kernel->SetTrainable(true); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } else { | |||
| auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel); | |||
| for (auto sb_kernel : sub_graph->nodes()) { | |||
| if (!IsOptimizer(sb_kernel)) { | |||
| for (auto it : sb_kernel->in_tensors()) { | |||
| if (std::find(ot.begin(), ot.end(), it) != ot.end()) { | |||
| sb_kernel->SetTrainable(true); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| bool TrainSession::IsLossKernel(const kernel::LiteKernel *kernel) const { | |||
| return (kernel->Type() == schema::PrimitiveType_SoftmaxCrossEntropy); | |||
| } | |||
| bool TrainSession::IsOptimizer(kernel::LiteKernel *kernel) const { | |||
| return ((kernel->Type() == schema::PrimitiveType_Adam) || (kernel->Type() == schema::PrimitiveType_Sgd) || | |||
| (kernel->Type() == schema::PrimitiveType_ApplyMomentum)); | |||
| } | |||
| } // namespace lite | |||
| session::TrainSession *session::TrainSession::CreateSession(lite::Context *context) { | |||
| @@ -55,9 +55,10 @@ class TrainSession : virtual public session::TrainSession, virtual public lite:: | |||
| int CompileTrainGraph(lite::TrainModel *model) override; | |||
| void *ExportToBuf(char *buf, size_t *len) const override; | |||
| int SaveToFile(const std::string &filename) const override; | |||
| void Train() override; | |||
| void Eval() override; | |||
| int Train() override; | |||
| int Eval() override; | |||
| void BindThread(bool if_bind) override { return lite::LiteSession::BindThread(if_bind); } | |||
| std::vector<tensor::MSTensor *> GetInputs() const override { return lite::LiteSession::GetInputs(); } | |||
| @@ -84,16 +85,19 @@ class TrainSession : virtual public session::TrainSession, virtual public lite:: | |||
| protected: | |||
| void AllocWorkSpace(); | |||
| bool IsLossKernel(const kernel::LiteKernel *kernel); | |||
| bool IsLossKernel(const kernel::LiteKernel *kernel) const; | |||
| bool IsOptimizer(kernel::LiteKernel *kernel) const; | |||
| virtual void MarkOptimizedKernels(); | |||
| virtual std::vector<CreatorOp> ReplaceOps(); | |||
| virtual void RestoreOps(const std::vector<CreatorOp> &restore); | |||
| virtual void BuildInferenceKernelsMap(); | |||
| virtual void BuildInferenceKernelsRecursive(kernel::LiteKernel *ker, std::vector<kernel::LiteKernel *> *req_kernels); | |||
| virtual void CompileTrainKernels(); | |||
| TrainModel *model_ = nullptr; | |||
| std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> orig_output_map_; | |||
| std::unordered_map<std::string, mindspore::tensor::MSTensor *> orig_output_tensor_map_; | |||
| std::vector<kernel::LiteKernel *> inference_kernels_; | |||
| std::vector<kernel::LiteKernel *> train_kernels_; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,8 @@ | |||
| mini_alexnet | |||
| mobilenetv1 | |||
| mobilenetv2 | |||
| mobilenetv3 | |||
| lenet | |||
| effnet | |||
| effnet_tune | |||
| resnet | |||
| @@ -0,0 +1,394 @@ | |||
| #!/bin/bash | |||
| # Run Export on x86 platform and create output test files: | |||
| function Run_Export(){ | |||
| cd $models_path || exit 1 | |||
| if [[ -z "${CLOUD_MODEL_ZOO}" ]]; then | |||
| echo "CLOUD_MODEL_ZOO is not defined - exiting export models" | |||
| exit 1 | |||
| fi | |||
| # Export mindspore train models: | |||
| while read line; do | |||
| model_name=${line} | |||
| if [[ $model_name == \#* ]]; then | |||
| continue | |||
| fi | |||
| echo ${model_name}'_train_export.py' >> "${export_log_file}" | |||
| echo 'exporting' ${model_name} | |||
| echo 'docker run --user $(id -u):$(id -g) --env CLOUD_MODEL_ZOO=${CLOUD_MODEL_ZOO} -w $PWD --runtime=nvidia -v /home/$USER:/home/$USER -v /opt/share:/opt/share --privileged=true mindspore_dev:5 python '${models_path}'/'${model_name}'_train_export.py' >> "${export_log_file}" | |||
| docker run --user $(id -u):$(id -g) --env CLOUD_MODEL_ZOO=${CLOUD_MODEL_ZOO} -w $PWD --runtime=nvidia -v /home/$USER:/home/$USER -v /opt/share:/opt/share --privileged=true mindspore_dev:5 python ${models_path}'/'${model_name}_train_export.py | |||
| if [ $? = 0 ]; then | |||
| export_result='export mindspore '${model_name}'_train_export pass';echo ${export_result} >> ${export_result_file} | |||
| else | |||
| export_result='export mindspore '${model_name}'_train_export failed';echo ${export_result} >> ${export_result_file} | |||
| fi | |||
| done < ${models_mindspore_train_config} | |||
| } | |||
| # Run converter on x86 platform: | |||
| function Run_Converter() { | |||
| # Unzip x86 runtime and convertor | |||
| cd ${x86_path} || exit 1 | |||
| tar -zxf mindspore-lite-${version}-runtime-x86-${process_unit_x86}-train.tar.gz || exit 1 | |||
| tar -zxf mindspore-lite-${version}-converter-ubuntu-train.tar.gz || exit 1 | |||
| cd ${x86_path}/mindspore-lite-${version}-converter-ubuntu-train || exit 1 | |||
| cp converter/converter_lite ./ || exit 1 | |||
| # Convert the models | |||
| cd ${x86_path}/mindspore-lite-${version}-converter-ubuntu-train || exit 1 | |||
| rm -rf ${ms_models_path} | |||
| mkdir -p ${ms_models_path} | |||
| # Convert mindspore train models: | |||
| while read line; do | |||
| model_name=${line} | |||
| if [[ $model_name == \#* ]]; then | |||
| continue | |||
| fi | |||
| echo ${model_name}'_train' >> "${run_converter_log_file}" | |||
| echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}'_train.mindir --outputFile='${ms_models_path}'/'${model_name}'_train --trainModel=true' >> "${run_converter_log_file}" | |||
| LD_LIBRARY_PATH=./lib/:./third_party/protobuf/lib:./third_party/flatbuffers/lib:./third_party/glog/lib \ | |||
| ./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name}_train.mindir \ | |||
| --outputFile=${ms_models_path}/${model_name}'_train' \ | |||
| --trainModel=true | |||
| if [ $? = 0 ]; then | |||
| converter_result='converter mindspore '${model_name}'_train pass';echo ${converter_result} >> ${run_converter_result_file} | |||
| else | |||
| converter_result='converter mindspore '${model_name}'_train failed';echo ${converter_result} >> ${run_converter_result_file} | |||
| fi | |||
| done < ${models_mindspore_train_config} | |||
| } | |||
| # Run on x86 platform: | |||
| function Run_x86() { | |||
| # Run mindspore converted train models: | |||
| while read line; do | |||
| model_name=${line} | |||
| if [[ $model_name == \#* ]]; then | |||
| continue | |||
| fi | |||
| echo ${model_name}'_train' >> "${run_x86_log_file}" | |||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86}-train >> "${run_x86_log_file}" | |||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86}-train || return 1 | |||
| echo 'LD_LIBRARY_PATH='${LD_LIBRARY_PATH}':./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./net_train/net_train --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile='${input_path}'/'${model_name}'_input1.bin,'${train_io_path}'/'${model_name}'_input2.bin --expectedDataFile='${train_io_path}'/'${model_name}'_outputs.bin --exportFile='${ms_models_path}'/'${model_name}'_train_exported.ms' >> "${run_x86_log_file}" | |||
| echo '-------------------------------------------------------------------------------' >> "${run_x86_log_file}" | |||
| LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib \ | |||
| ${run_valgrind}./net_train/net_train \ | |||
| --modelFile=${ms_models_path}/${model_name}_train.ms \ | |||
| --inDataFile=${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin \ | |||
| --expectedDataFile=${train_io_path}/${model_name}_outputs.bin \ | |||
| --exportFile=${ms_models_path}/${model_name}_train_exported.ms >> "${run_x86_log_file}" | |||
| if [ $? = 0 ]; then | |||
| run_result='x86: '${model_name}'_train pass'; echo ${run_result} >> ${run_net_train_result_file} | |||
| else | |||
| run_result='x86: '${model_name}'_train failed'; echo ${run_result} >> ${run_net_train_result_file} | |||
| fi | |||
| done < ${models_mindspore_train_config} | |||
| } | |||
| # Run on arm platform: | |||
| # Gets a parameter - arm64/arm32 | |||
| function Run_arm() { | |||
| if [ "$1" == arm64 ]; then | |||
| arm_path=${arm64_path} | |||
| process_unit=${process_unit_arm64} | |||
| version_arm=${version_arm64} | |||
| run_arm_log_file=${run_arm64_log_file} | |||
| adb_cmd_run_file=${adb_cmd_arm64_run_file} | |||
| adb_push_log_file=${adb_push_arm64_log_file} | |||
| adb_cmd_file=${adb_cmd_arm64_file} | |||
| elif [ "$1" == arm32 ]; then | |||
| arm_path=${arm32_path} | |||
| process_unit=${process_unit_arm32} | |||
| version_arm=${version_arm32} | |||
| run_arm_log_file=${run_arm32_log_file} | |||
| adb_cmd_run_file=${adb_cmd_arm32_run_file} | |||
| adb_push_log_file=${adb_push_arm32_log_file} | |||
| adb_cmd_file=${adb_cmd_arm32_file} | |||
| else | |||
| echo 'type ' $1 'is not supported' | |||
| exit 1 | |||
| fi | |||
| arm_type=$1 | |||
| # Unzip | |||
| cd ${arm_path} || exit 1 | |||
| tar -zxf mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train.tar.gz || exit 1 | |||
| # If build with minddata, copy the minddata related libs | |||
| cd ${net_train_test_path} || exit 1 | |||
| if [ -f ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libminddata-lite.so ]; then | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/libjpeg-turbo/lib/libjpeg.so ${net_train_test_path}/libjpeg.so || exit 1 | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/libjpeg-turbo/lib/libturbojpeg.so ${net_train_test_path}/libturbojpeg.so || exit 1 | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_core.so ${net_train_test_path}/libopencv_core.so || exit 1 | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_imgcodecs.so ${net_train_test_path}/libopencv_imgcodecs.so || exit 1 | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_imgproc.so ${net_train_test_path}/libopencv_imgproc.so || exit 1 | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libminddata-lite.so ${net_train_test_path}/libminddata-lite.so || exit 1 | |||
| fi | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite.so ${net_train_test_path}/libmindspore-lite.so || exit 1 | |||
| if [ "$1" == arm64 ]; then | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite-fp16.so ${net_train_test_path}/libmindspore-lite-fp16.so || exit 1 | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite-optimize.so ${net_train_test_path}/libmindspore-lite-optimize.so || exit 1 | |||
| fi | |||
| cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/net_train/net_train ${net_train_test_path}/net_train || exit 1 | |||
| # adb push all needed files to the phone | |||
| adb -s ${device_id} push ${net_train_test_path} /data/local/tmp/ > ${adb_push_log_file} | |||
| # run adb ,run session ,check the result: | |||
| echo 'cd /data/local/tmp/net_train_test' > ${adb_cmd_file} | |||
| echo 'chmod 777 net_train' >> ${adb_cmd_file} | |||
| adb -s ${device_id} shell < ${adb_cmd_file} | |||
| # Run mindir converted train models: | |||
| while read line; do | |||
| model_name=${line} | |||
| if [[ $model_name == \#* ]]; then | |||
| continue | |||
| fi | |||
| # run net_train test without clib data | |||
| echo ${model_name}'_train' >> "${run_arm_log_file}" | |||
| adb -s ${device_id} push ${train_io_path}/${model_name}_input*.bin ${train_io_path}/${model_name}_outputs.bin /data/local/tmp/net_train_test >> ${adb_push_log_file} | |||
| echo 'cd /data/local/tmp/net_train_test' > ${adb_cmd_run_file} | |||
| if [ "$1" == arm64 ]; then | |||
| echo 'export LD_LIBRARY_PATH=/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${run_arm_log_file}" | |||
| echo 'export LD_LIBRARY_PATH=/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${adb_cmd_run_file}" | |||
| elif [ "$1" == arm32 ]; then | |||
| echo 'export LD_LIBRARY_PATH=/data/local/tmp/:/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${run_arm_log_file}" | |||
| echo 'export LD_LIBRARY_PATH=/data/local/tmp/:/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${adb_cmd_run_file}" | |||
| fi | |||
| adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file} | |||
| # TODO: change to arm_type | |||
| if [ $? = 0 ]; then | |||
| run_result=$1': '${model_name}'_train pass'; echo ${run_result} >> ${run_net_train_result_file} | |||
| else | |||
| run_result=$1': '${model_name}'_train failed'; echo ${run_result} >> ${run_net_train_result_file} | |||
| fi | |||
| done < ${models_mindspore_train_config} | |||
| } | |||
| # Print start msg before run testcase | |||
| function MS_PRINT_TESTCASE_START_MSG() { | |||
| echo "" | |||
| echo -e "-----------------------------------------------------------------------------------------------------------------------------------" | |||
| echo -e "env Testcase Result " | |||
| echo -e "--- -------- ------ " | |||
| } | |||
| # Print start msg after run testcase | |||
| function MS_PRINT_TESTCASE_END_MSG() { | |||
| echo -e "-----------------------------------------------------------------------------------------------------------------------------------" | |||
| } | |||
| function Print_Result() { | |||
| MS_PRINT_TESTCASE_END_MSG | |||
| while read line; do | |||
| arr=("${line}") | |||
| printf "%-15s %-20s %-90s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]} ${arr[3]} | |||
| done < $1 | |||
| MS_PRINT_TESTCASE_END_MSG | |||
| } | |||
| basepath=$(pwd) | |||
| echo ${basepath} | |||
| # Example:run_net_train.sh -r /home/emir/Work/TestingEnv/release -m /home/emir/Work/TestingEnv/train_models -i /home/emir/Work/TestingEnv/train_io -d "8KE5T19620002408" | |||
| # For running on arm64, use -t to set platform tools path (for using adb commands) | |||
| while getopts "r:m:d:i:e:v" opt; do | |||
| case ${opt} in | |||
| r) | |||
| release_path=${OPTARG} | |||
| echo "release_path is ${OPTARG}" | |||
| ;; | |||
| m) | |||
| models_path=${OPTARG} | |||
| echo "models_path is ${OPTARG}" | |||
| ;; | |||
| i) | |||
| train_io_path=${OPTARG} | |||
| echo "train_io_path is ${OPTARG}" | |||
| ;; | |||
| d) | |||
| device_id=${OPTARG} | |||
| echo "device_id is ${OPTARG}" | |||
| ;; | |||
| e) | |||
| enable_export=${OPTARG} | |||
| echo "enable_export = ${OPTARG}" | |||
| ;; | |||
| v) | |||
| run_valgrind="valgrind " | |||
| echo "Run x86 with valgrind" | |||
| ;; | |||
| ?) | |||
| echo "unknown para" | |||
| exit 1;; | |||
| esac | |||
| done | |||
| arm64_path=${release_path}/android_aarch64 | |||
| file=$(ls ${arm64_path}/*runtime-arm64*train.tar.gz) | |||
| file_name="${file##*/}" | |||
| IFS="-" read -r -a file_name_array <<< "$file_name" | |||
| version_arm64=${file_name_array[2]} | |||
| process_unit_arm64=${file_name_array[5]} | |||
| arm32_path=${release_path}/android_aarch32 | |||
| file=$(ls ${arm32_path}/*runtime-arm32*train.tar.gz) | |||
| file_name="${file##*/}" | |||
| IFS="-" read -r -a file_name_array <<< "$file_name" | |||
| version_arm32=${file_name_array[2]} | |||
| process_unit_arm32=${file_name_array[5]} | |||
| x86_path=${release_path}/ubuntu_x86 | |||
| file=$(ls ${x86_path}/*runtime-x86*train.tar.gz) | |||
| file_name="${file##*/}" | |||
| IFS="-" read -r -a file_name_array <<< "$file_name" | |||
| version=${file_name_array[2]} | |||
| process_unit_x86=${file_name_array[5]} | |||
| # Set models config filepath | |||
| models_mindspore_train_config=${basepath}/models_ms_train.cfg | |||
| ms_models_path=${models_path}/ms_models | |||
| logs_path=${models_path}/logs | |||
| rm -rf ${logs_path} | |||
| mkdir -p ${logs_path} | |||
| # Export model if enabled | |||
| if [[ $enable_export == 1 ]]; then | |||
| echo "Start Exporting models ..." | |||
| # Write export result to temp file | |||
| export_log_file=${logs_path}/export_log.txt | |||
| echo ' ' > ${export_log_file} | |||
| export_result_file=${logs_path}/export_result.txt | |||
| echo ' ' > ${export_result_file} | |||
| # Run export | |||
| Run_Export | |||
| Print_Result ${export_result_file} | |||
| fi | |||
| # Write converter result to temp file | |||
| run_converter_log_file=${logs_path}/run_converter_log.txt | |||
| echo ' ' > ${run_converter_log_file} | |||
| run_converter_result_file=${logs_path}/run_converter_result.txt | |||
| echo ' ' > ${run_converter_result_file} | |||
| START=$(date +%s.%N) | |||
| # Run converter | |||
| echo "start run converter ..." | |||
| Run_Converter | |||
| Run_converter_PID=$! | |||
| sleep 1 | |||
| wait ${Run_converter_PID} | |||
| Run_converter_status=$? | |||
| # Check converter result and return value | |||
| if [[ ${Run_converter_status} = 0 ]];then | |||
| echo "Run converter success" | |||
| Print_Result ${run_converter_result_file} | |||
| else | |||
| echo "Run converter failed" | |||
| cat ${run_converter_log_file} | |||
| Print_Result ${run_converter_result_file} | |||
| exit 1 | |||
| fi | |||
| # Write net_train result to temp file | |||
| run_net_train_result_file=${logs_path}/run_net_train_result.txt | |||
| echo ' ' > ${run_net_train_result_file} | |||
| # Create log files | |||
| run_x86_log_file=${logs_path}/run_x86_log.txt | |||
| echo 'run x86 logs: ' > ${run_x86_log_file} | |||
| run_arm64_log_file=${logs_path}/run_arm64_log.txt | |||
| echo 'run arm64 logs: ' > ${run_arm64_log_file} | |||
| adb_push_arm64_log_file=${logs_path}/adb_push_arm64_log.txt | |||
| adb_cmd_arm64_file=${logs_path}/adb_arm64_cmd.txt | |||
| adb_cmd_arm64_run_file=${logs_path}/adb_arm64_cmd_run.txt | |||
| run_arm32_log_file=${logs_path}/run_arm32_log.txt | |||
| echo 'run arm32 logs: ' > ${run_arm64_log_file} | |||
| adb_push_arm32_log_file=${logs_path}/adb_push_arm32_log.txt | |||
| adb_cmd_arm32_file=${logs_path}/adb_arm32_cmd.txt | |||
| adb_cmd_arm32_run_file=${logs_path}/adb_arm32_cmd_run.txt | |||
| # Copy the MindSpore models: | |||
| echo "Push files to net_train_test folder and run net_train" | |||
| net_train_test_path=${models_path}/net_train_test | |||
| rm -rf ${net_train_test_path} | |||
| mkdir -p ${net_train_test_path} | |||
| cp -a ${ms_models_path}/*.ms ${net_train_test_path} || exit 1 | |||
| # Run on x86 | |||
| echo "start Run x86 ..." | |||
| Run_x86 & | |||
| Run_x86_PID=$! | |||
| sleep 1 | |||
| # wait ${Run_x86_PID} | |||
| cat ${run_net_train_result_file} | |||
| wait ${Run_x86_PID} | |||
| Run_x86_status=$? | |||
| # exit 0 | |||
| # Run on arm64 | |||
| echo "start Run arm64 ..." | |||
| Run_arm arm64 | |||
| Run_arm64_status=$? | |||
| sleep 3 | |||
| # Run on arm32 | |||
| echo "start Run arm32 ..." | |||
| Run_arm arm32 | |||
| Run_arm32_status=$? | |||
| sleep 1 | |||
| END=$(date +%s.%N) | |||
| DIFF=$(echo "$END - $START" | bc) | |||
| function Print_Benchmark_Result() { | |||
| MS_PRINT_TESTCASE_START_MSG | |||
| while read line; do | |||
| arr=("${line}") | |||
| printf "%-20s %-100s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]} | |||
| done < ${run_net_train_result_file} | |||
| MS_PRINT_TESTCASE_END_MSG | |||
| } | |||
| # Check net_train result and return value | |||
| if [[ ${Run_x86_status} != 0 ]];then | |||
| echo "Run_x86 failed" | |||
| cat ${run_x86_log_file} | |||
| fi | |||
| if [[ ${Run_arm64_status} != 0 ]];then | |||
| echo "Run_arm64 failed" | |||
| cat ${run_arm64_log_file} | |||
| fi | |||
| if [[ ${Run_arm32_status} != 0 ]];then | |||
| echo "Run_arm32 failed" | |||
| cat ${run_arm32_log_file} | |||
| fi | |||
| echo "Test ended - Results:" | |||
| Print_Benchmark_Result | |||
| echo "Test run Time:" $DIFF | |||
| exit 0 | |||
| @@ -1,5 +1,5 @@ | |||
| #!/bin/bash | |||
| cd ./ut/src/runtime/kernel/arm || exit 1 | |||
| ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.efficient_net | |||
| ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.tuning_layer | |||
| ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.lenetnet | |||
| # ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.tuning_layer | |||
| # ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.lenetnet | |||
| @@ -42,11 +42,17 @@ TEST_F(TestActGradFp32, ReluGradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/activationGrad/relu_y_50.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| EXPECT_EQ(input_size, output_data_size * sizeof(float)); | |||
| std::string yt_path = "./test_data/activationGrad/relu_yt_50.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| EXPECT_EQ(input_size, output_data_size * sizeof(float)); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| ReluGrad(yt_data, input_data, output_data_size, output_data); | |||
| @@ -90,10 +96,15 @@ TEST_F(TestActGradFp32, Relu6GradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/activationGrad/relu6_y_50.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/activationGrad/relu6_yt_50.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| Relu6Grad(yt_data, input_data, 50, output_data); | |||
| @@ -136,10 +147,15 @@ TEST_F(TestActGradFp32, LReluGradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/activationGrad/lrelu_y_50.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/activationGrad/lrelu_yt_50.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| LReluGrad(yt_data, input_data, 50, output_data, 0.1); | |||
| @@ -182,10 +198,15 @@ TEST_F(TestActGradFp32, SigmoidGradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/activationGrad/sigmoid_y_50.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/activationGrad/sigmoid_yt_50.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| SigmoidGrad(yt_data, input_data, 50, output_data); | |||
| @@ -229,10 +250,15 @@ TEST_F(TestActGradFp32, tanhGradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/activationGrad/tanh_y_50.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/activationGrad/tanh_yt_50.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| TanhGrad(yt_data, input_data, 50, output_data); | |||
| @@ -274,11 +300,17 @@ TEST_F(TestActGradFp32, hswishGradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/activationGrad/hswish_x_50.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| EXPECT_EQ(input_size, output_data_size * sizeof(float)); | |||
| std::string yt_path = "./test_data/activationGrad/hswish_yt_50.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| EXPECT_EQ(input_size, output_data_size * sizeof(float)); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| HSwishGrad(yt_data, input_data, static_cast<int>(output_data_size), output_data); | |||
| @@ -311,4 +343,58 @@ TEST_F(TestActGradFp32, hswishGradFp32) { | |||
| delete[] yt_data; | |||
| MS_LOG(INFO) << "hswishGradFp32 passed"; | |||
| } | |||
| TEST_F(TestActGradFp32, hsigmoidGradFp32) { | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| const size_t output_data_size = 10; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/activationGrad/hsig_x_50.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| EXPECT_EQ(input_size, output_data_size * sizeof(float)); | |||
| std::string yt_path = "./test_data/activationGrad/hsig_yt_50.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| EXPECT_EQ(input_size, output_data_size * sizeof(float)); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| HSigmoidGrad(yt_data, input_data, static_cast<int>(output_data_size), output_data); | |||
| } | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| HSigmoidGrad(yt_data, input_data, output_data_size, output_data); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| printf("==================output data=================\n"); | |||
| size_t min = (output_data_size < 20UL) ? output_data_size : 20UL; | |||
| for (size_t i = 0; i < min; i++) { | |||
| std::cout << output_data[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/activationGrad/hsig_out_50.bin"; | |||
| int res = CompareRelativeOutput(output_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] output_data; | |||
| delete[] yt_data; | |||
| MS_LOG(INFO) << "hsigmoidGradFp32 passed"; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -24,9 +24,9 @@ | |||
| #include "src/kernel_registry.h" | |||
| #include "src/ops/arithmetic_grad.h" | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| namespace mindspore { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveType type, | |||
| std::vector<lite::Tensor *> inputs, | |||
| std::vector<lite::Tensor *> outputs) { | |||
| @@ -37,6 +37,12 @@ ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveTyp | |||
| } | |||
| arithmetic_param->op_parameter_.type_ = type; | |||
| schema::PrimitiveT *prim = new schema::PrimitiveT; | |||
| if (prim == nullptr) { | |||
| free(arithmetic_param); | |||
| MS_LOG(ERROR) << "new PrimitiveT failed."; | |||
| return nullptr; | |||
| } | |||
| prim->value.type = type; | |||
| auto agrad = mindspore::lite::ArithmeticGrad(prim); | |||
| agrad.InferShape(inputs, outputs); | |||
| @@ -55,6 +61,7 @@ class TestArithmeticGradFp32 : public mindspore::CommonTest { | |||
| std::vector<lite::Tensor *> GenerateTensorsForTest(const char *test, int test_id) { | |||
| size_t input_size; | |||
| std::vector<lite::Tensor *> ret_vector; | |||
| std::vector<int> large_dim({4, 6}); | |||
| std::vector<int> small_dim({6}); | |||
| int large_size = (4 * 6); | |||
| @@ -80,36 +87,127 @@ std::vector<lite::Tensor *> GenerateTensorsForTest(const char *test, int test_id | |||
| } | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(test, &input_size)); | |||
| if (dy_data == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| return ret_vector; | |||
| } | |||
| lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| if (dy_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| return ret_vector; | |||
| } | |||
| dy_tensor->set_data(dy_data); | |||
| auto x1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx1_file, &input_size)); | |||
| if (x1_data == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| delete dy_tensor; | |||
| return ret_vector; | |||
| } | |||
| lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| if (x1_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| delete dy_tensor; | |||
| delete[] x1_data; | |||
| return ret_vector; | |||
| } | |||
| x1_tensor->set_data(x1_data); | |||
| auto x2_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx2_file, &input_size)); | |||
| if (x2_data == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| delete dy_tensor; | |||
| delete[] x1_data; | |||
| delete x1_tensor; | |||
| return ret_vector; | |||
| } | |||
| lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); | |||
| if (x2_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| delete dy_tensor; | |||
| delete[] x1_data; | |||
| delete x1_tensor; | |||
| delete[] x2_data; | |||
| return ret_vector; | |||
| } | |||
| x2_tensor->set_data(x2_data); | |||
| auto dx1_data = new float[large_size]; | |||
| if (dx1_data == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| delete dy_tensor; | |||
| delete[] x1_data; | |||
| delete x1_tensor; | |||
| delete[] x2_data; | |||
| delete x2_tensor; | |||
| return ret_vector; | |||
| } | |||
| lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| if (dx1_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| delete dy_tensor; | |||
| delete[] x1_data; | |||
| delete x1_tensor; | |||
| delete[] x2_data; | |||
| delete x2_tensor; | |||
| delete[] dx1_data; | |||
| return ret_vector; | |||
| } | |||
| dx1_tensor->set_data(dx1_data); | |||
| auto dx2_data = new float[small_size]; | |||
| if (dx2_data == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| delete dy_tensor; | |||
| delete[] x1_data; | |||
| delete x1_tensor; | |||
| delete[] x2_data; | |||
| delete x2_tensor; | |||
| delete[] dx1_data; | |||
| delete dx1_tensor; | |||
| return ret_vector; | |||
| } | |||
| lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); | |||
| if (dx2_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new operator failed"; | |||
| delete[] dy_data; | |||
| delete dy_tensor; | |||
| delete[] x1_data; | |||
| delete x1_tensor; | |||
| delete[] x2_data; | |||
| delete x2_tensor; | |||
| delete[] dx1_data; | |||
| delete dx1_tensor; | |||
| delete[] dx2_data; | |||
| return ret_vector; | |||
| } | |||
| dx2_tensor->set_data(dx2_data); | |||
| std::vector<lite::Tensor *> ret_vector = {dy_tensor, x1_tensor, x2_tensor, dx1_tensor, dx2_tensor}; | |||
| ret_vector.push_back(dy_tensor); | |||
| ret_vector.push_back(x1_tensor); | |||
| ret_vector.push_back(x2_tensor); | |||
| ret_vector.push_back(dx1_tensor); | |||
| ret_vector.push_back(dx2_tensor); | |||
| return ret_vector; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -117,7 +215,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| @@ -144,10 +244,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -155,7 +257,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| @@ -184,10 +288,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin", 8); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -195,7 +301,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| @@ -225,10 +333,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_2_dy_4_6.bin", 2); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -236,7 +346,9 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| @@ -266,10 +378,12 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_3_dy_4_6.bin", 3); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -277,7 +391,9 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| @@ -305,10 +421,12 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -316,8 +434,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| int loop_count = 1000; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| @@ -354,10 +473,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -365,7 +486,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| @@ -394,10 +517,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -405,7 +530,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| @@ -434,10 +561,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -445,7 +574,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| @@ -474,10 +605,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_5_dy_4_6.bin", 5); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -485,7 +618,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| @@ -514,10 +649,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_6_dy_4_6.bin", 6); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -525,7 +662,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| @@ -555,10 +694,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { | |||
| TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin", 10); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -566,7 +707,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| @@ -595,10 +738,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { | |||
| TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin", 7); | |||
| ASSERT_NE(all_tensors.size(), 0); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| @@ -606,7 +751,9 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| @@ -630,6 +777,91 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { | |||
| MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; | |||
| } | |||
| } // namespace mindspore | |||
| TEST_F(TestArithmeticGradFp32, TestMaximumGradBroadcastFp32) { | |||
| std::vector<int> large_dim({4, 6}); | |||
| std::vector<int> small_dim({6}); | |||
| large_dim = std::vector<int>({1, 2, 3}); | |||
| small_dim = std::vector<int>({1, 3}); | |||
| int large_size = (2 * 3); | |||
| int small_size = 3; | |||
| size_t input_size; | |||
| char *dx1_file = const_cast<char *>("./test_data/operators/x1_maximum.bin"); | |||
| char *dx2_file = const_cast<char *>("./test_data/operators/x2_maximum.bin"); | |||
| std::string yt_path = "./test_data/operators/yt_maximum.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| EXPECT_EQ(input_size, large_size * sizeof(float)); | |||
| lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| ASSERT_NE(dy_tensor, nullptr); | |||
| dy_tensor->set_data(dy_data); | |||
| auto x1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx1_file, &input_size)); | |||
| ASSERT_NE(x1_data, nullptr); | |||
| lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); | |||
| ASSERT_NE(x1_tensor, nullptr); | |||
| x1_tensor->set_data(x1_data); | |||
| auto x2_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx2_file, &input_size)); | |||
| ASSERT_NE(x2_data, nullptr); | |||
| lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| ASSERT_NE(x2_tensor, nullptr); | |||
| x2_tensor->set_data(x2_data); | |||
| auto dx1_data = new float[small_size]; | |||
| ASSERT_NE(dx1_data, nullptr); | |||
| lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); | |||
| ASSERT_NE(dx1_tensor, nullptr); | |||
| dx1_tensor->set_data(dx1_data); | |||
| auto dx2_data = new float[large_size]; | |||
| ASSERT_NE(dx2_data, nullptr); | |||
| lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| ASSERT_NE(dx2_tensor, nullptr); | |||
| dx2_tensor->set_data(dx2_data); | |||
| std::vector<lite::Tensor *> inputs = {x1_tensor, x2_tensor, dy_tensor}; | |||
| std::vector<lite::Tensor *> outputs = {dx1_tensor, dx2_tensor}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MaximumGrad, inputs, outputs); | |||
| ASSERT_NE(param, nullptr); | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| ASSERT_EQ(lite::RET_OK, ctx.Init()); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MaximumGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string dx1_path = "./test_data/operators/x1_grad_maximum.bin"; | |||
| EXPECT_EQ(0, CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), dx1_path)); | |||
| std::string output_path = "./test_data/operators/x2_grad_maximum.bin"; | |||
| EXPECT_EQ(0, CompareRelativeOutput(output_ptr, output_path)); | |||
| for (auto tensor : inputs) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->set_data(nullptr); | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->set_data(nullptr); | |||
| delete tensor; | |||
| } | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestMaximumGradBroadcastFp32 passed"; | |||
| } | |||
| #endif | |||
| } // namespace mindspore | |||
| @@ -31,15 +31,20 @@ class TestBiasGradFp32 : public mindspore::CommonTest { | |||
| TEST_F(TestBiasGradFp32, BiasGradFp32) { | |||
| // prepare stage | |||
| ArithmeticParameter *bias_param = static_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter))); | |||
| ASSERT_NE(bias_param, nullptr); | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_dy({10, 28, 28, 7}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(input_data); | |||
| std::vector<lite::Tensor *> inputs = {&dy_tensor}; | |||
| auto output_data = new float[7]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| std::vector<int> dim_dw = {7}; | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(output_data); | |||
| @@ -51,8 +56,9 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bias_param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| printf("==================output data=================\n"); | |||
| @@ -61,7 +67,57 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) { | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/biasgradfp32_1_db_7.bin"; | |||
| CompareOutput(output_data, 7, output_path); | |||
| auto res = CompareRelativeOutput(output_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] output_data; | |||
| // delete bias_param; | |||
| dy_tensor.set_data(nullptr); | |||
| dw_tensor.set_data(nullptr); | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "BiasGradFp32 passed"; | |||
| } | |||
| TEST_F(TestBiasGradFp32, BiasGrad2DFp32) { | |||
| // prepare stage | |||
| ArithmeticParameter *bias_param = static_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter))); | |||
| ASSERT_NE(bias_param, nullptr); | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/operators/fc_yt.f32"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::vector<int> dim_dy({2, 20}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(input_data); | |||
| std::vector<lite::Tensor *> inputs = {&dy_tensor}; | |||
| auto output_data = new float[20]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| std::vector<int> dim_dw = {20}; | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(output_data); | |||
| std::vector<lite::Tensor *> outputs = {&dw_tensor}; | |||
| lite::InnerContext ctx; | |||
| ctx.thread_num_ = 1; | |||
| ASSERT_EQ(lite::RET_OK, ctx.Init()); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bias_param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << output_data[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/fc_b_grad.f32"; | |||
| auto res = CompareRelativeOutput(output_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] output_data; | |||
| @@ -35,6 +35,10 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector< | |||
| size_t input_size = 0; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_name.c_str(), &input_size)); | |||
| auto tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, dim); | |||
| if (tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new tensor failed"; | |||
| return nullptr; | |||
| } | |||
| tensor->set_data(input_data); | |||
| EXPECT_EQ(input_size, tensor->Size()); | |||
| return tensor; | |||
| @@ -43,7 +47,9 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector< | |||
| TEST_F(TestBNGradFp32, BNGradFp32) { | |||
| // prepare stage | |||
| auto bn_param = static_cast<BNGradParameter *>(malloc(sizeof(BNGradParameter))); | |||
| bn_param->epsilon_ = 0.00001; | |||
| ASSERT_NE(bn_param, nullptr); | |||
| bn_param->epsilon_ = 1e-2; | |||
| bn_param->momentum_ = 0.1; | |||
| const int batch = 2; | |||
| const int channels = 3; | |||
| @@ -51,10 +57,16 @@ TEST_F(TestBNGradFp32, BNGradFp32) { | |||
| const int width = 5; | |||
| auto dy_tensor = CreateInTensor("./test_data/bngrad/dy_2_4_5_3.bin", {batch, height, width, channels}); | |||
| ASSERT_NE(dy_tensor, nullptr); | |||
| auto x_tensor = CreateInTensor("./test_data/bngrad/input_x_2_4_5_3.bin", {batch, height, width, channels}); | |||
| ASSERT_NE(x_tensor, nullptr); | |||
| auto scale_tensor = CreateInTensor("./test_data/bngrad/scale_3.bin", {1, 1, 1, channels}); | |||
| ASSERT_NE(scale_tensor, nullptr); | |||
| auto mean_tensor = CreateInTensor("./test_data/bngrad/save_mean_3.bin", {1, 1, 1, channels}); | |||
| ASSERT_NE(mean_tensor, nullptr); | |||
| auto var_tensor = CreateInTensor("././test_data/bngrad/save_var_3.bin", {1, 1, 1, channels}); | |||
| ASSERT_NE(var_tensor, nullptr); | |||
| // prepare output tensors | |||
| lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, {batch, height, width, channels}); | |||
| ASSERT_EQ(dx_tensor.MallocData(), 0); | |||
| @@ -72,27 +84,18 @@ TEST_F(TestBNGradFp32, BNGradFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BNGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bn_param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); | |||
| for (int i = 0; i < 3; i++) { | |||
| kernel_obj->Run(); | |||
| } | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| kernel_obj->Run(); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| auto time_avg = cost / loop_count; | |||
| std::cout << "single thread running time : " << time_avg << "us\n"; | |||
| kernel_obj->Run(); | |||
| std::cout << "==========dx==========\n"; | |||
| auto dx = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| for (int i = 0; i < 7; i++) std::cout << dx[i] << " "; | |||
| std::cout << "\n"; | |||
| auto res = CompareRelativeOutput(dx, "./test_data/bngrad/output_dx_2_4_5_3.bin"); | |||
| EXPECT_EQ(res, 0); | |||
| std::cout << "\n=======dscale=======\n"; | |||
| auto dscale = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| for (int i = 0; i < channels; i++) std::cout << dscale[i] << " "; | |||
| @@ -104,7 +107,6 @@ TEST_F(TestBNGradFp32, BNGradFp32) { | |||
| for (int i = 0; i < 3; i++) std::cout << dbias[i] << " "; | |||
| std::cout << "\n"; | |||
| res = CompareRelativeOutput(dbias, "./test_data/bngrad/output_dbias_3.bin"); | |||
| EXPECT_EQ(res, 0); | |||
| for (auto v : inputs) { | |||
| delete[] reinterpret_cast<float *>(v->MutableData()); | |||
| v->set_data(nullptr); | |||
| @@ -117,8 +119,10 @@ TEST_F(TestBNGradFp32, BNGradFp32) { | |||
| TEST_F(TestBNGradFp32, BNTtrainFp32) { | |||
| auto bn_param = static_cast<BatchNormParameter *>(malloc(sizeof(BatchNormParameter))); | |||
| bn_param->epsilon_ = 0.00001; | |||
| bn_param->momentum_ = 0.; | |||
| ASSERT_NE(bn_param, nullptr); | |||
| bn_param->epsilon_ = 1e-2; | |||
| bn_param->momentum_ = 0.1; | |||
| const int batch = 2; | |||
| const int channels = 3; | |||
| const int height = 4; | |||
| @@ -173,27 +177,34 @@ TEST_F(TestBNGradFp32, BNTtrainFp32) { | |||
| ASSERT_EQ(lite::RET_OK, context.Init()); | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bn_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); | |||
| float *save_mean = reinterpret_cast<float *>(save_mean_tensor.MutableData()); | |||
| float *save_var = reinterpret_cast<float *>(save_var_tensor.MutableData()); | |||
| std::fill(save_mean, save_mean + channels, 0.f); | |||
| std::fill(save_var, save_var + channels, 0.f); | |||
| for (int i = 0; i < channels; i++) { | |||
| save_var[i] = 1.f; | |||
| save_mean[i] = 0.f; | |||
| } | |||
| float *curr_mean = reinterpret_cast<float *>(mean_tensor.MutableData()); | |||
| float *curr_var = reinterpret_cast<float *>(var_tensor.MutableData()); | |||
| kernel_obj->train(); | |||
| kernel_obj->Train(); | |||
| kernel_obj->SetTrainable(true); | |||
| kernel_obj->Run(); | |||
| std::cout << "================save_mean==============================\n"; | |||
| for (int i = 0; i < channels; i++) std::cout << save_mean[i] << " "; | |||
| for (int i = 0; i < channels; i++) std::cout << curr_mean[i] << " "; | |||
| std::cout << "\n"; | |||
| std::cout << "===============save_var==============================\n"; | |||
| for (int i = 0; i < channels; i++) std::cout << save_var[i] << " "; | |||
| for (int i = 0; i < channels; i++) std::cout << curr_var[i] << " "; | |||
| std::cout << "\n"; | |||
| delete[] reinterpret_cast<float *>(x_tensor->MutableData()); | |||
| auto res = CompareRelativeOutput(save_mean, "./test_data/bngrad/running_mean_3.bin"); | |||
| auto res = CompareRelativeOutput(curr_mean, "./test_data/bngrad/running_mean_3.bin"); | |||
| EXPECT_EQ(res, 0); | |||
| res = CompareRelativeOutput(save_var, "./test_data/bngrad/running_var_3.bin"); | |||
| res = CompareRelativeOutput(curr_var, "./test_data/bngrad/running_var_3.bin"); | |||
| EXPECT_EQ(res, 0); | |||
| x_tensor->set_data(nullptr); | |||
| @@ -77,11 +77,13 @@ void InitConvParamGroup3Dilation2FP32(ConvParameter *conv_param) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup1FP32(conv_param); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| InitConvParamGroup1FP32(conv_param); | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::vector<int> dim_dy({1, 28, 28, 32}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(dy_data); | |||
| @@ -95,11 +97,13 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convfp32_x_1_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_x({1, 28, 28, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({32, 3, 3, 3}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -112,7 +116,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| @@ -149,8 +155,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup1FP32(conv_param); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| InitConvParamGroup1FP32(conv_param); | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| @@ -168,6 +175,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { | |||
| size_t output_data_size = | |||
| conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; | |||
| auto dx_data = new float[output_data_size]; | |||
| ASSERT_NE(dx_data, nullptr); | |||
| std::vector<int> dim_dx({1, 28, 28, 3}); | |||
| lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); | |||
| dx_tensor.set_data(dx_data); | |||
| @@ -185,7 +193,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -222,8 +232,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3FP32(conv_param); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| InitConvParamGroup3FP32(conv_param); | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| @@ -245,6 +256,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({18, 3, 3, 1}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -257,7 +269,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| @@ -293,8 +307,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3FP32(conv_param); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| InitConvParamGroup3FP32(conv_param); | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| @@ -312,6 +327,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { | |||
| size_t output_data_size = | |||
| conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; | |||
| auto dx_data = new float[output_data_size]; | |||
| ASSERT_NE(dx_data, nullptr); | |||
| std::vector<int> dim_dx({1, 28, 28, 3}); | |||
| lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); | |||
| dx_tensor.set_data(dx_data); | |||
| @@ -329,7 +345,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| @@ -365,9 +383,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| InitConvParamGroup3Dilation2FP32(conv_param); | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| @@ -389,6 +407,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({18, 3, 3, 1}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -401,7 +420,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -437,8 +458,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3Dilation2FP32(conv_param); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| InitConvParamGroup3Dilation2FP32(conv_param); | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| @@ -456,6 +478,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { | |||
| size_t output_data_size = | |||
| conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; | |||
| auto dx_data = new float[output_data_size]; | |||
| ASSERT_NE(dx_data, nullptr); | |||
| std::vector<int> dim_dx({1, 28, 28, 3}); | |||
| lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); | |||
| dx_tensor.set_data(dx_data); | |||
| @@ -473,7 +496,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| int loop_count = 100; | |||
| @@ -504,8 +529,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3Dilation2FP32(conv_param); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| InitConvParamGroup3Dilation2FP32(conv_param); | |||
| size_t x_size; | |||
| std::string x_path = "./test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin"; | |||
| auto x_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(x_path.c_str(), &x_size)); | |||
| @@ -523,6 +549,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { | |||
| size_t output_data_size = | |||
| conv_param->output_batch_ * conv_param->output_h_ * conv_param->output_w_ * conv_param->output_channel_; | |||
| auto y_data = new float[output_data_size]; | |||
| ASSERT_NE(y_data, nullptr); | |||
| std::vector<int> dim_y({1, 26, 26, 18}); | |||
| lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| y_tensor.set_data(y_data); | |||
| @@ -540,11 +567,12 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { | |||
| auto *kernel = new mindspore::kernel::ConvolutionTrainCPUKernel(reinterpret_cast<OpParameter *>(conv_param), inputs, | |||
| outputs, &context, 0); | |||
| ASSERT_NE(kernel, nullptr); | |||
| kernel->Init(); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| kernel->train(); | |||
| EXPECT_EQ(kernel->is_train(), 1); | |||
| kernel->Train(); | |||
| EXPECT_EQ(kernel->IsTrain(), 1); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| @@ -580,6 +608,8 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| conv_param->input_batch_ = 2; | |||
| conv_param->input_h_ = 32; | |||
| conv_param->input_w_ = 32; | |||
| @@ -624,11 +654,13 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convfp32_input0_d2_g2_s2_2_4_32_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_x({2, 32, 32, 4}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({12, 3, 3, 2}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -641,7 +673,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -679,6 +713,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| conv_param->input_batch_ = 2; | |||
| conv_param->input_h_ = 32; | |||
| conv_param->input_w_ = 32; | |||
| @@ -710,6 +746,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/conv/convfp32_dy_d2_g2_s2_2_12_15_15.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::vector<int> dim_dy({2, 15, 15, 12}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(dy_data); | |||
| @@ -717,6 +754,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { | |||
| size_t w_size; | |||
| std::string w_path = "./test_data/conv/convfp32_w_d2_g2_s2_12_2_3_3.bin"; | |||
| auto w_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(w_path.c_str(), &w_size)); | |||
| ASSERT_NE(w_data, nullptr); | |||
| std::vector<int> dim_w({12, 3, 3, 2}); | |||
| lite::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w); | |||
| w_tensor.set_data(w_data); | |||
| @@ -724,6 +762,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { | |||
| size_t output_data_size = | |||
| conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; | |||
| auto dx_data = new float[output_data_size]; | |||
| ASSERT_NE(dx_data, nullptr); | |||
| std::vector<int> dim_dx({2, 32, 32, 4}); | |||
| lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); | |||
| dx_tensor.set_data(dx_data); | |||
| @@ -741,7 +780,9 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -32,6 +32,8 @@ class TestDeConvolutionGradFp32 : public mindspore::CommonTest { | |||
| TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| conv_param->input_batch_ = 2; | |||
| conv_param->input_h_ = 32; | |||
| conv_param->input_w_ = 32; | |||
| @@ -63,24 +65,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/deconv/deconvfp32_dy_2_9_63_63.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::vector<int> dim_dy({2, 63, 63, 9}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(dy_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| size_t output_data_size = | |||
| conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/deconv/deconvfp32_input0_2_3_32_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_x({2, 32, 32, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({3, 3, 3, 9}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -93,7 +95,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -101,6 +105,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { | |||
| kernel->Run(); | |||
| } | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| @@ -131,6 +138,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { | |||
| TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| conv_param->input_batch_ = 2; | |||
| conv_param->input_h_ = 32; | |||
| conv_param->input_w_ = 32; | |||
| @@ -162,24 +171,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_2_9_65_65.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::vector<int> dim_dy({2, 65, 65, 9}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(dy_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| size_t output_data_size = | |||
| conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_2_3_32_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_x({2, 32, 32, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({9, 3, 3, 3}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -192,7 +201,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -200,6 +211,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { | |||
| kernel->Run(); | |||
| } | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| @@ -230,6 +244,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { | |||
| TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| conv_param->input_batch_ = 2; | |||
| conv_param->input_h_ = 32; | |||
| conv_param->input_w_ = 32; | |||
| @@ -261,6 +277,7 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_2_9_65_65.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::vector<int> dim_dy({2, 65, 65, 9}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(dy_data); | |||
| @@ -274,11 +291,13 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_2_3_32_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_x({2, 32, 32, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({3, 3, 3, 3}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -291,7 +310,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -329,6 +350,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { | |||
| TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| conv_param->input_batch_ = 2; | |||
| conv_param->input_h_ = 32; | |||
| conv_param->input_w_ = 32; | |||
| @@ -360,24 +383,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_s1_2_9_34_34.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::vector<int> dim_dy({2, 34, 34, 9}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(dy_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| size_t output_data_size = | |||
| conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_s1_2_3_32_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_x({2, 32, 32, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({3, 3, 3, 3}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -390,7 +413,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -398,6 +423,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { | |||
| kernel->Run(); | |||
| } | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| @@ -428,6 +456,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { | |||
| TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| conv_param->input_batch_ = 2; | |||
| conv_param->input_h_ = 32; | |||
| conv_param->input_w_ = 32; | |||
| @@ -459,24 +489,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g2_s2_2_12_65_65.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::vector<int> dim_dy({2, 65, 65, 12}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(dy_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| size_t output_data_size = | |||
| conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g2_s2_2_4_32_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_x({2, 32, 32, 4}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({6, 3, 3, 4}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -489,7 +519,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -497,6 +529,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| kernel->Run(); | |||
| } | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| @@ -527,6 +562,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { | |||
| TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| ASSERT_NE(conv_param, nullptr); | |||
| conv_param->input_batch_ = 2; | |||
| conv_param->input_h_ = 32; | |||
| conv_param->input_w_ = 32; | |||
| @@ -558,6 +595,7 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { | |||
| size_t dy_size; | |||
| std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g12_s2_2_12_65_65.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::vector<int> dim_dy({2, 65, 65, 12}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(dy_data); | |||
| @@ -571,11 +609,13 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g12_s2_2_12_32_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_x({2, 32, 32, 12}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input_data); | |||
| auto dw_data = new float[output_data_size]; | |||
| ASSERT_NE(dw_data, nullptr); | |||
| std::vector<int> dim_dw({1, 3, 3, 12}); | |||
| lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| dw_tensor.set_data(dw_data); | |||
| @@ -588,7 +628,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); | |||
| // warm up loop | |||
| @@ -90,6 +90,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_Activation; | |||
| auto primitive = new schema::ActivationT; | |||
| ASSERT_NE(primitive, nullptr); | |||
| primitive->type = schema::ActivationType_RELU; | |||
| node->primitive->value.value = primitive; | |||
| node->name = "ReLU"; | |||
| @@ -102,6 +103,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_MatMul; | |||
| auto primitive = new schema::MatMulT; | |||
| ASSERT_NE(primitive, nullptr); | |||
| primitive->transposeA = false; | |||
| primitive->transposeB = true; | |||
| node->primitive->value.value = primitive; | |||
| @@ -115,6 +117,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_BiasAdd; | |||
| auto primitive = new schema::BiasAddT; | |||
| ASSERT_NE(primitive, nullptr); | |||
| primitive->axis.push_back(0); | |||
| node->primitive->value.value = primitive; | |||
| node->name = "BiasAdd"; | |||
| @@ -127,6 +130,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_SoftmaxCrossEntropy; | |||
| auto primitive = new schema::SoftmaxCrossEntropyT; | |||
| ASSERT_NE(primitive, nullptr); | |||
| primitive->axis.push_back(0); | |||
| node->primitive->value.value = primitive; | |||
| node->name = "SoftmaxCrossEntropy"; | |||
| @@ -139,6 +143,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_BiasGrad; | |||
| auto primitive = new schema::BiasGradT; | |||
| ASSERT_NE(primitive, nullptr); | |||
| primitive->axis.push_back(0); | |||
| node->primitive->value.value = primitive; | |||
| node->name = "BiasGrad"; | |||
| @@ -151,6 +156,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_MatMul; | |||
| auto primitive = new schema::MatMulT; | |||
| ASSERT_NE(primitive, nullptr); | |||
| primitive->transposeA = true; | |||
| primitive->transposeB = false; | |||
| node->primitive->value.value = primitive; | |||
| @@ -164,6 +170,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_ApplyMomentum; | |||
| auto primitive = new schema::ApplyMomentumT; | |||
| ASSERT_NE(primitive, nullptr); | |||
| node->primitive->value.value = primitive; | |||
| node->name = "Momentum"; | |||
| meta_graph->nodes.emplace_back(std::move(node)); | |||
| @@ -175,6 +182,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_ApplyMomentum; | |||
| auto primitive = new schema::ApplyMomentumT; | |||
| ASSERT_NE(primitive, nullptr); | |||
| node->primitive->value.value = primitive; | |||
| node->name = "Momentum"; | |||
| meta_graph->nodes.emplace_back(std::move(node)); | |||
| @@ -450,9 +458,6 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| std::cout << std::endl; | |||
| error = RelativeOutputError(outData, output_path); | |||
| EXPECT_LT(error, 2e-3); | |||
| delete session; | |||
| MS_LOG(INFO) << "TuningLayer passed"; | |||
| } | |||
| int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path, | |||
| @@ -516,6 +521,7 @@ TEST_F(NetworkTest, efficient_net) { | |||
| auto model = lite::TrainModel::Import(buf, net_size); | |||
| delete[] buf; | |||
| auto context = new lite::Context; | |||
| ASSERT_NE(context, nullptr); | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; | |||
| context->thread_num_ = 1; | |||
| @@ -533,48 +539,6 @@ TEST_F(NetworkTest, efficient_net) { | |||
| ASSERT_EQ(res, 0); | |||
| } | |||
| TEST_F(NetworkTest, lenetnet) { | |||
| char *buf = nullptr; | |||
| size_t net_size = 0; | |||
| std::string net = "./test_data/nets/lenet_train.ms"; | |||
| ReadFile(net.c_str(), &net_size, &buf); | |||
| auto model = lite::TrainModel::Import(buf, net_size); | |||
| delete[] buf; | |||
| auto context = new lite::Context; | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; | |||
| context->thread_num_ = 1; | |||
| // check registration | |||
| mindspore::lite::KernelRegistry *reg = mindspore::lite::KernelRegistry::GetInstance(); | |||
| mindspore::kernel::KernelKey desc1 = {mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, | |||
| mindspore::schema::PrimitiveType_Conv2D}; | |||
| mindspore::kernel::KernelKey desc2 = {mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, | |||
| mindspore::schema::PrimitiveType_DepthwiseConv2D}; | |||
| auto regb1 = reg->GetCreator(desc1); | |||
| auto regb2 = reg->GetCreator(desc2); | |||
| ASSERT_EQ(regb1 == mindspore::kernel::CpuConvTrainFp32KernelCreator, false); | |||
| auto session = session::TrainSession::CreateSession(context); | |||
| ASSERT_NE(session, nullptr); | |||
| auto ret = session->CompileTrainGraph(model); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| auto rega1 = reg->GetCreator(desc1); | |||
| auto rega2 = reg->GetCreator(desc2); | |||
| ASSERT_EQ(regb1, rega1); | |||
| ASSERT_EQ(regb2, rega2); | |||
| ASSERT_EQ(rega1 == mindspore::kernel::CpuConvTrainFp32KernelCreator, false); | |||
| // end of check registration | |||
| session->Eval(); | |||
| std::string in = "./test_data/nets/x_lenet.bin"; | |||
| std::string out = "./test_data/nets/y_lenet.bin"; | |||
| auto res = runNet(session, in, out, "24"); | |||
| delete session; | |||
| delete context; | |||
| ASSERT_EQ(res, 0); | |||
| } | |||
| TEST_F(NetworkTest, retina_net) { | |||
| char *buf = nullptr; | |||
| size_t net_size = 0; | |||
| @@ -585,6 +549,7 @@ TEST_F(NetworkTest, retina_net) { | |||
| auto model = lite::Model::Import(buf, net_size); | |||
| delete[] buf; | |||
| auto context = new lite::Context; | |||
| ASSERT_NE(context, nullptr); | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; | |||
| context->thread_num_ = 1; | |||
| @@ -592,7 +557,7 @@ TEST_F(NetworkTest, retina_net) { | |||
| auto session = session::LiteSession::CreateSession(context); | |||
| ASSERT_NE(session, nullptr); | |||
| auto ret = session->CompileGraph(model); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| EXPECT_EQ(lite::RET_OK, ret); | |||
| // session->Eval(); | |||
| std::string in = "./test_data/nets/test1.hwc_normalized_f32"; | |||
| @@ -619,8 +584,9 @@ TEST_F(NetworkTest, retina_net) { | |||
| final_res |= res; | |||
| } | |||
| ASSERT_EQ(final_res, 0); | |||
| EXPECT_EQ(final_res, 0); | |||
| delete model; | |||
| delete session; | |||
| delete context; | |||
| } | |||
| @@ -635,6 +601,7 @@ TEST_F(NetworkTest, mobileface_net) { | |||
| auto model = lite::Model::Import(buf, net_size); | |||
| delete[] buf; | |||
| auto context = new lite::Context; | |||
| ASSERT_NE(context, nullptr); | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; | |||
| context->thread_num_ = 1; | |||
| @@ -60,6 +60,8 @@ void InitPoolingParamFP32(PoolingParameter *pooling_param) { | |||
| TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { | |||
| // prepare stage | |||
| auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| ASSERT_NE(pooling_param, nullptr); | |||
| InitPoolingParamFP32(pooling_param); | |||
| pooling_param->output_channel_ = 3; | |||
| pooling_param->pool_mode_ = PoolMode_AvgPool; | |||
| @@ -73,8 +75,10 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| AvgPoolingGrad(input_data, output_data, pooling_param, 1); | |||
| @@ -108,6 +112,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { | |||
| TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { | |||
| // prepare stage | |||
| auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| ASSERT_NE(pooling_param, nullptr); | |||
| InitPoolingParamFP32(pooling_param); | |||
| pooling_param->output_channel_ = 3; | |||
| pooling_param->pool_mode_ = PoolMode_AvgPool; | |||
| @@ -121,12 +127,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_dy({1, 28, 28, 3}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(input_data); | |||
| std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin"; | |||
| auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); | |||
| ASSERT_NE(input1_data, nullptr); | |||
| std::vector<int> dim_x({1, 28, 28, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input1_data); | |||
| @@ -134,6 +142,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { | |||
| std::vector<lite::Tensor *> inputs = {&dy_tensor, &x_tensor}; | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| std::vector<int> dim_dx({1, 28, 28, 3}); | |||
| lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); | |||
| dx_tensor.set_data(output_data); | |||
| @@ -145,7 +154,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| @@ -172,8 +183,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { | |||
| TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { | |||
| // prepare stage | |||
| auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(pooling_param); | |||
| ASSERT_NE(pooling_param, nullptr); | |||
| InitPoolingParamFP32(pooling_param); | |||
| pooling_param->output_channel_ = 3; | |||
| pooling_param->input_batch_ = 3; | |||
| pooling_param->output_batch_ = 3; | |||
| @@ -185,12 +197,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_3_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_dy({3, 28, 28, 3}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.set_data(input_data); | |||
| std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_3_28_28_3.bin"; | |||
| auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); | |||
| ASSERT_NE(input1_data, nullptr); | |||
| std::vector<int> dim_x({3, 28, 28, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(input1_data); | |||
| @@ -209,7 +223,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| kernel_obj->Run(); | |||
| @@ -236,6 +252,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| ASSERT_NE(pool, nullptr); | |||
| InitPoolingParamFP32(pool); | |||
| pool->output_channel_ = 3; | |||
| pool->pool_mode_ = PoolMode_AvgPool; | |||
| @@ -250,12 +268,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(x_data, nullptr); | |||
| std::vector<int> dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(x_data); | |||
| auto yt_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_}); | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.set_data(yt_data); | |||
| @@ -271,7 +291,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; | |||
| auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc); | |||
| ASSERT_NE(pool_creator, nullptr); | |||
| auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), &context, pool_desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| kernel->Init(); | |||
| @@ -295,6 +317,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| ASSERT_NE(pool, nullptr); | |||
| InitPoolingParamFP32(pool); | |||
| pool->output_channel_ = 3; | |||
| pool->pool_mode_ = PoolMode_AvgPool; | |||
| @@ -309,12 +333,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(x_data, nullptr); | |||
| std::vector<int> dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(x_data); | |||
| auto yt_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_}); | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.set_data(yt_data); | |||
| @@ -332,7 +358,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; | |||
| auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc); | |||
| ASSERT_NE(pool_creator, nullptr); | |||
| auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), &context, pool_desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| kernel->Init(); | |||
| @@ -356,6 +384,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { | |||
| // prepare stage | |||
| auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| ASSERT_NE(pooling_param, nullptr); | |||
| InitPoolingParamFP32(pooling_param); | |||
| pooling_param->output_channel_ = 3; | |||
| pooling_param->pool_mode_ = PoolMode_MaxPool; | |||
| @@ -368,14 +398,18 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { | |||
| size_t input_size; | |||
| std::string i_path = "./test_data/pooling/maxpoolgradfp32_1_x_1_28_28_3.bin"; | |||
| auto in_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(i_path.c_str(), &input_size)); | |||
| ASSERT_NE(in_data, nullptr); | |||
| std::string dy_path = "./test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin"; | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &input_size)); | |||
| ASSERT_NE(dy_data, nullptr); | |||
| std::string dx_path = "./test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin"; | |||
| auto dx_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx_path.c_str(), &input_size)); | |||
| ASSERT_NE(dx_data, nullptr); | |||
| auto output_data = new float[output_data_size]; | |||
| ASSERT_NE(output_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param, 1); | |||
| @@ -412,6 +446,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| ASSERT_NE(maxpool, nullptr); | |||
| InitPoolingParamFP32(maxpool); | |||
| maxpool->output_channel_ = 3; | |||
| maxpool->pool_mode_ = PoolMode_MaxPool; | |||
| @@ -422,18 +458,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_x_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(x_data, nullptr); | |||
| std::vector<int> dim_x({3, 28, 28, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(x_data); | |||
| auto y_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dx_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(y_data, nullptr); | |||
| std::vector<int> dim_y({3, 28, 28, 3}); | |||
| lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| y_tensor.set_data(y_data); | |||
| auto yt_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dy_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.set_data(yt_data); | |||
| @@ -449,8 +488,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; | |||
| auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); | |||
| ASSERT_NE(maxpool_creator, nullptr); | |||
| auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, | |||
| maxpool_desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| kernel->Init(); | |||
| @@ -477,6 +518,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| ASSERT_NE(maxpool, nullptr); | |||
| InitPoolingParamFP32(maxpool); | |||
| maxpool->output_channel_ = 3; | |||
| maxpool->input_channel_ = 3; | |||
| @@ -492,18 +535,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(x_data, nullptr); | |||
| std::vector<int> dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(x_data); | |||
| auto y_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dx_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(y_data, nullptr); | |||
| std::vector<int> dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_}); | |||
| lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| y_tensor.set_data(y_data); | |||
| auto yt_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.set_data(yt_data); | |||
| @@ -520,8 +566,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; | |||
| auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); | |||
| ASSERT_NE(maxpool_creator, nullptr); | |||
| auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, | |||
| maxpool_desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| kernel->Init(); | |||
| @@ -548,6 +596,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| ASSERT_NE(maxpool, nullptr); | |||
| InitPoolingParamFP32(maxpool); | |||
| maxpool->output_channel_ = 3; | |||
| maxpool->input_channel_ = 3; | |||
| @@ -563,18 +613,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(x_data, nullptr); | |||
| std::vector<int> dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.set_data(x_data); | |||
| auto y_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dx_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(y_data, nullptr); | |||
| std::vector<int> dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_}); | |||
| lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| y_tensor.set_data(y_data); | |||
| auto yt_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.set_data(yt_data); | |||
| @@ -591,11 +644,12 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { | |||
| kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; | |||
| auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); | |||
| ASSERT_NE(maxpool_creator, nullptr); | |||
| auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, | |||
| maxpool_desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| kernel->Init(); | |||
| kernel->Run(); | |||
| std::string output_path = "./test_data/pooling/maxpoolgradfp32_s3_xgrad_3_28_28_3.bin"; | |||
| @@ -31,17 +31,21 @@ class TestSoftmaxCrossEntropyFp32 : public mindspore::CommonTest { | |||
| TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { | |||
| // prepare stage | |||
| auto sce_param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(malloc(sizeof(SoftmaxCrossEntropyParameter))); | |||
| ASSERT_NE(sce_param, nullptr); | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/operators/sce_fp32_1_y_6_4.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::vector<int> dim_y({6, 4}); | |||
| lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| y_tensor.set_data(input_data); | |||
| std::string label_path = "./test_data/operators/sce_fp32_1_l_6.bin"; | |||
| auto ll_labels = reinterpret_cast<int64_t *>(mindspore::lite::ReadFile(label_path.c_str(), &input_size)); | |||
| ASSERT_NE(ll_labels, nullptr); | |||
| auto labels = new float[6 * 4]; | |||
| ASSERT_NE(labels, nullptr); | |||
| std::fill(labels, labels + 6 * 4, 0.f); | |||
| for (int i = 0; i < 6; i++) labels[i * 4 + ll_labels[i]] = 1.0; | |||
| @@ -52,10 +56,12 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { | |||
| std::vector<lite::Tensor *> inputs = {&y_tensor, &l_tensor}; | |||
| auto loss = new float[1]; | |||
| ASSERT_NE(loss, nullptr); | |||
| std::vector<int> dim_dw({1}); | |||
| lite::Tensor loss_tensor(TypeId::kNumberTypeFloat32, dim_dw); | |||
| loss_tensor.set_data(loss); | |||
| auto grad = new float[24]; | |||
| ASSERT_NE(grad, nullptr); | |||
| lite::Tensor grad_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| grad_tensor.set_data(grad); | |||
| std::vector<lite::Tensor *> outputs = {&loss_tensor, &grad_tensor}; | |||
| @@ -66,7 +72,9 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftmaxCrossEntropy}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(sce_param), &context, desc, nullptr); | |||
| ASSERT_NE(kernel_obj, nullptr); | |||
| mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); | |||
| kernel_obj->Run(); | |||
| @@ -78,16 +86,20 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { | |||
| std::string output_path = "./test_data/operators/sce_fp32_1_loss_1.bin"; | |||
| CompareOutput(loss, 1, output_path); | |||
| ((mindspore::kernel::SparseSoftmaxCrossEntropyWithLogitsCPUKernel *)kernel_obj)->train(); | |||
| ((mindspore::kernel::SparseSoftmaxCrossEntropyWithLogitsCPUKernel *)kernel_obj)->Train(); | |||
| kernel_obj->Run(); | |||
| // normalize by batch size the result | |||
| for (int i = 0; i < 24; i++) { | |||
| grad[i] /= 6; | |||
| } | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 12; i++) { | |||
| std::cout << grad[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string grad_path = "./test_data/operators/sce_fp32_1_dy_6_4.bin"; | |||
| CompareOutput(grad, 24, grad_path); | |||
| auto res = CompareRelativeOutput(grad, grad_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] ll_labels; | |||
| delete[] labels; | |||
| @@ -55,6 +55,7 @@ void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis, int n, int c, i | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| ASSERT_NE(softmax_param, nullptr); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 0); | |||
| @@ -64,21 +65,23 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| ASSERT_NE(sum_data, nullptr); | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| ASSERT_NE(sum_mul, nullptr); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| ASSERT_NE(out_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| @@ -112,6 +115,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| ASSERT_NE(softmax_param, nullptr); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 1); | |||
| @@ -121,21 +125,26 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| ASSERT_NE(sum_data, nullptr); | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| ASSERT_NE(sum_mul, nullptr); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| ASSERT_NE(out_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| @@ -171,6 +180,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| ASSERT_NE(softmax_param, nullptr); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 2); | |||
| @@ -180,21 +190,26 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| ASSERT_NE(sum_data, nullptr); | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| ASSERT_NE(sum_mul, nullptr); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| ASSERT_NE(out_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| @@ -230,6 +245,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| ASSERT_NE(softmax_param, nullptr); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 3); | |||
| @@ -239,21 +255,25 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| ASSERT_NE(sum_data, nullptr); | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| ASSERT_NE(sum_mul, nullptr); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| ASSERT_NE(out_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| @@ -289,6 +309,8 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| ASSERT_NE(softmax_param, nullptr); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, -1); | |||
| @@ -298,21 +320,25 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| ASSERT_NE(sum_data, nullptr); | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| ASSERT_NE(sum_mul, nullptr); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| ASSERT_NE(input_data, nullptr); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| ASSERT_NE(yt_data, nullptr); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| ASSERT_NE(out_data, nullptr); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| @@ -1,2 +1,2 @@ | |||
| V_‚?�K¿ŒÏ§¿øÓà¿…�î>J?/=Å"m?LÒu¿œÂj@!U$?f=ˆ?e¥[?W·Ú¾òÎ ½m©?æ e¾O™·?}4¿¬žˆ?˜B<–ÚK¿íÕÀše›¾¶ÆÚ½Ýø¿p~|>½?ƒ¿Š7ì½ç§ø½E :?JͿ̬>— ? Ä~?Ï«¾óN1¿?>HV¿ú|ʾ¨œØ=€{‰¿IðU?x›¿©v¾°W>úÉ[¾$˜î? ú•¿]›¿4Bu¿û ¾ç4@Ç׿¦+?Ÿ…z>ušB?Åä™=|e >MÚê>¢Ÿ>í¶ß?}Þ0?¹ö§¾©¿ëœ>�û†¾ ù @<ç€?Âv�?våZ?zäÅ¿@±í¾è.ο•8B?ðîo½ Œ¿ªâÔ¾q"m¿n¯‰?”k>=ì"ê>:©¿³»@ÇÉ<>+R¿ | |||
| b±¿6.Œ?“i?¶›v?`j6¿R~]?çJU¼6„s¿GöŒ?M·—¿% ž?Äh”>£È¿½ÇÍ ¿¯ÚG¼Â½?„¦>³Ó“¾«'6?Æ÷@¿�¥ð¿2¿ƒ/V¾K5è¿TÆ>X]?„[Ý?v_Ø¿¥ü¤¾”j?pý˜?€\l?ã.l=°äÀb©? | |||
| ûCE¾Â€‡¾&sò¾ÁðľÎpÑ¿ ·¾fe'ÀÇäº?æ | |||
| ©¿Ï«?uëP>¦(†?¡ç/>�¨Â¿[óQ¾ŠÈR>™YÞ;Ãê¿o…v¿´1ù?rUt?jb?úWM>æóÁ¿´ç,¾;4%À\4¿|Ä¿°_k¿!Ȉ¿ÈTŸ½éÚ¾ f¿h^Ï>µ×&?Ó%V¿¥}¾';�½#Öf>ND¿m´\>³X¹>¦1Ï?ž³[>cË@‹B@3 À¯HÍ?H´¿K�6?Bäî¼{{¾ä— ¿z?*>0ç¦?�1*ÀÉjŠ? ßq?/!"¾I5�>HA?Pc¿<�¿O@P?+Ú³¿vå>ƒçð?Ñëú¿›“(@nŸ?¤±T¾=™j¾¥?…f:?MJC>çÑH¿›D¿ë¼µ=¢—Å?õ‚¾¦Æ†¿!J°¿Ã\¿U§? Uµ¾Füõ¿�lÀ·ø>¡‘%½’÷¿ˆ?øµ¿D¶Þ¿cÊK>¿?YQd¿äi0>ûyÏ?ê_¬?b¡*¿Áƒ ?f<‘¿dÉϾ6Ë¿˜?"¾¹/¾£¾à.�¿‹¦q>„23?‹Â5>iUÂ>J^(?“8–=’º¿è~\¾¢K>e¥ž?›¾=KK~= | |||
| @@ -1 +1 @@ | |||
| ���B���B���B | |||
| r��B���B]��B | |||
| @@ -1 +1 @@ | |||
| �-�<���<���= | |||
| 8�<"��>��� | |||
| @@ -1 +1 @@ | |||
| cה¶?‘ ƒ?;…[? | |||
| ”€�?`י‚?ֱז‚? | |||