| @@ -27,7 +27,6 @@ struct Model; | |||
| } | |||
| namespace session { | |||
| class TrainSession : public lite::LiteSession { | |||
| public: | |||
| TrainSession(); | |||
| @@ -20,8 +20,8 @@ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/errorcode.h" | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, | |||
| BatchNormParameter *param, int task_id, void *output) { | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | |||
| void *output) { | |||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | |||
| int completed_units = task_id * units_per_thread; | |||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | |||
| @@ -31,7 +31,7 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| float variance_sqrt = sqrt(((const float *)variance)[c] + param->epsilon_); | |||
| ((float *)output)[cur_offset + c] = | |||
| (((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt; | |||
| (((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt; | |||
| } | |||
| cur_offset += param->channel_; | |||
| } | |||
| @@ -53,3 +53,22 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset | |||
| cur_offset += param->channel_; | |||
| } | |||
| } | |||
| void FusedBatchNormFp32MeanVar(const float *input, float momentum, float *run_mean, float *run_var, | |||
| BatchNormParameter *param, float *save_mean, float *save_inv_var) { | |||
| float N = param->channel_ * param->unit_; | |||
| for (int i = 0; i < param->unit_; i++) { | |||
| for (int f = 0; f < param->channel_; f++) { | |||
| int idx = i * param->channel_ + f; | |||
| run_mean[f] += input[idx]; | |||
| run_var[f] += input[idx] * input[idx]; | |||
| } | |||
| } | |||
| for (int f = 0; f < param->channel_; f++) { | |||
| run_mean[f] = run_mean[f] / N; | |||
| run_var[f] = run_var[f] / N - run_mean[f] * run_mean[f]; | |||
| save_mean[f] = momentum * save_mean[f] + (1 - momentum) * run_mean[f]; | |||
| float inv_var = 1.f/sqrt(run_var[f]+param->epsilon_); | |||
| save_inv_var[f] = momentum * save_inv_var[f] + (1 - momentum) * inv_var; | |||
| } | |||
| } | |||
| @@ -28,6 +28,8 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, Ba | |||
| void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | |||
| const void *variance, BatchNormParameter *param, int task_id, void *output); | |||
| void FusedBatchNormFp32MeanVar(const float *input, float momentum, float *run_mean, float *run_var, | |||
| BatchNormParameter *param, float *save_mean, float *save_var); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -27,41 +27,8 @@ void sumSpatialBatch(const float *in, int size, int ch, float *out) { | |||
| } | |||
| } | |||
| void scaleBias(const float *scales, int batch, int n, int size, float *output) { | |||
| for (int i = 0; i < batch * size; i++) | |||
| for (int c = 0; c < n; c++) output[i * n + c] *= scales[c]; | |||
| } | |||
| void normalize(const float *x, const float *mean, const float *invar, int batch, int filters, int spatial, | |||
| float *out) { | |||
| int b, f, i; | |||
| for (b = 0; b < batch; ++b) { | |||
| for (i = 0; i < spatial; ++i) { | |||
| for (f = 0; f < filters; ++f) { | |||
| int index = b * filters * spatial + i * filters + f; | |||
| out[index] = (x[index] - mean[f]) * invar[f]; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, | |||
| int n, int size, float *scale_updates) { | |||
| int i, b, f; | |||
| memset(scale_updates, 0, n * sizeof(float)); | |||
| for (b = 0; b < batch; ++b) { | |||
| for (i = 0; i < size; ++i) { | |||
| for (f = 0; f < n; ++f) { | |||
| int index = (b * size + i) * n + f; | |||
| float x_norm = (x[index] - mean[f]) * invar[f]; | |||
| scale_updates[f] += delta[index] * x_norm; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void meanVar(const float *in, int batch, int spatial, int ch, float eps, float *mean, float *invar) { | |||
| float N = batch * spatial; | |||
| static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) { | |||
| float N = (float)size; | |||
| sumSpatialBatch(in, N, ch, mean); | |||
| for (int f = 0; f < ch; ++f) { | |||
| mean[f] /= N; | |||
| @@ -76,49 +43,40 @@ void meanVar(const float *in, int batch, int spatial, int ch, float eps, float * | |||
| } | |||
| } | |||
| void meanDelta(float *yt, int size, int ch, float *invar, float *mean_delta) { | |||
| sumSpatialBatch(yt, size, ch, mean_delta); | |||
| for (int i = 0; i < ch; i++) mean_delta[i] *= -invar[i]; | |||
| } | |||
| void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial, | |||
| float *mean_add, float *mean_delta) { | |||
| int i, k; | |||
| memset(mean_add, 0, filters * sizeof(float)); | |||
| for (k = 0; k < spatial * batch; ++k) { | |||
| for (i = 0; i < filters; ++i) { | |||
| int index = k * filters + i; | |||
| mean_add[i] += x[index] - mean[i]; | |||
| void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps, | |||
| float *mean, float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) { | |||
| meanVar(in, size, channels, eps, mean, invar); | |||
| for (int i = 0; i < size; i++) { | |||
| for (int f = 0; f < channels; f++) { | |||
| int ix = i*channels + f; | |||
| float x_hat = (in[ix] - mean[f]) * invar[f]; | |||
| float dxhat = dout[ix] * scale[f]; | |||
| dxhat_sum[f] += dxhat; | |||
| dxhathat_sum[f] += dxhat * x_hat; | |||
| } | |||
| } | |||
| for (i = 0; i < filters; ++i) { | |||
| mean_add[i] *= variance_delta[i] * (-2.f / (spatial * batch)); | |||
| mean_delta[i] += mean_add[i]; | |||
| } | |||
| } | |||
| void varianceDelta(const float *x, const float *delta, const float *mean, const float *invar, int batch, int filters, | |||
| int spatial, float *variance_delta) { | |||
| int i, k; | |||
| memset(variance_delta, 0, filters * sizeof(float)); | |||
| for (k = 0; k < batch * spatial; k++) { | |||
| for (i = 0; i < filters; i++) { | |||
| int index = k * filters + i; | |||
| variance_delta[i] += delta[index] * (x[index] - mean[i]); | |||
| for (int i = 0; i < size; i++) { | |||
| for (int f = 0; f < channels; f++) { | |||
| int ix = i*channels + f; | |||
| float x_hat = (in[ix] - mean[f]) * invar[f]; | |||
| float dxhat = dout[ix] * scale[f]; | |||
| out[ix] = 1.f / size * invar[f] * (size * dxhat - dxhat_sum[f] - x_hat * dxhathat_sum[f]); | |||
| } | |||
| } | |||
| for (i = 0; i < filters; i++) variance_delta[i] *= -.5 * 1.0f/(invar[i]*invar[i]*invar[i]); | |||
| } | |||
| void NormalizeDelta(const float *x, const float *mean, const float *invar, const float *mean_delta, | |||
| const float *variance_delta, int batch, int filters, int spatial, float *delta) { | |||
| int f, k; | |||
| for (k = 0; k < batch * spatial; k++) { | |||
| for (f = 0; f < filters; f++) { | |||
| int index = k * filters + f; | |||
| delta[index] = delta[index] * invar[f] + | |||
| variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + | |||
| mean_delta[f] / (spatial * batch); | |||
| void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, | |||
| int n, int size, float *scale_updates) { | |||
| int i, b, f; | |||
| memset(scale_updates, 0, n * sizeof(float)); | |||
| for (b = 0; b < batch; ++b) { | |||
| for (i = 0; i < size; ++i) { | |||
| for (f = 0; f < n; ++f) { | |||
| int index = (b * size + i) * n + f; | |||
| float x_norm = (x[index] - mean[f]) * invar[f]; | |||
| scale_updates[f] += delta[index] * x_norm; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -30,18 +30,11 @@ extern "C" { | |||
| #endif | |||
| void sumSpatialBatch(const float *in, int size, int ch, float *out); | |||
| void scaleBias(const float *scales, int batch, int n, int size, float *output); | |||
| void normalize(const float *x, const float *mean, const float *invar, int batch, int filters, int spatial, float *out); | |||
| void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n, | |||
| int size, float *scale_updates); | |||
| void meanVar(const float *in, int batch, int size, int ch, float eps, float *mean, float *invar); | |||
| void meanDelta(float *yt, int size, int ch, float *invar, float *mean_delta); | |||
| void varianceDelta(const float *x, const float *delta, const float *mean, const float *invar, int batch, int ch, | |||
| int spatial, float *variance_delta); | |||
| void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial, | |||
| float *mean_add, float *mean_delta); | |||
| void NormalizeDelta(const float *x, const float *mean, const float *invar, const float *mean_delta, | |||
| const float *variance_delta, int batch, int filters, int spatial, float *delta); | |||
| void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps, | |||
| float *mean, float *invar, float *xhat_sum, float *dxhat_sum, float *out); | |||
| void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, | |||
| int n, int size, float *scale_updates); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -0,0 +1,61 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp32_grad/softmax_grad.h" | |||
| #include <string.h> | |||
| #include "nnacl/fp32_grad/gemm.h" | |||
| void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, float *sum_mul, | |||
| SoftmaxParameter *parameter) { | |||
| int32_t axis = parameter->axis_; | |||
| int n_dim = parameter->n_dim_; | |||
| int ele_size = parameter->element_size_; | |||
| int *input_shape = parameter->input_shape_; | |||
| int dim = 1; | |||
| int inner_size = 1, outter_size = 1; | |||
| for (int i = 0; i < axis; i++) { | |||
| outter_size *= input_shape[i]; | |||
| } | |||
| for (int i = axis + 1; i < n_dim; i++) { | |||
| inner_size *= input_shape[i]; | |||
| } | |||
| for (int i = 0; i < inner_size * input_shape[axis]; i++) sum_mul[i] = 1.0; | |||
| for (int i = 0; i < n_dim; i++) dim *= input_shape[i]; | |||
| dim /= outter_size; | |||
| memcpy(output_ptr, yt_ptr, ele_size * sizeof(float)); | |||
| int M = input_shape[axis]; | |||
| int N = inner_size; | |||
| int K = 1; | |||
| for (int i = 0; i < outter_size; i++) { | |||
| int outter_offset = i * dim; | |||
| memset(sum_data, 0.0f, inner_size * sizeof(float)); | |||
| for (int k = 0; k < inner_size; k++) { | |||
| int inner_offset = outter_offset + k; | |||
| for (int j = 0; j < input_shape[axis]; j++) { | |||
| int offset = inner_offset + j * inner_size; | |||
| sum_data[k] += output_ptr[offset] * input_ptr[offset]; | |||
| } | |||
| } | |||
| gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N); | |||
| } | |||
| for (int i = 0; i < ele_size; i++) { | |||
| output_ptr[i] *= input_ptr[i]; | |||
| } | |||
| } | |||
| @@ -14,10 +14,15 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/fp32/softmax.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| typedef struct SoftmaxCrossEntropyParameter { | |||
| OpParameter op_parameter_; | |||
| @@ -26,4 +31,11 @@ typedef struct SoftmaxCrossEntropyParameter { | |||
| int n_dim_; | |||
| int input_shape_[5]; | |||
| } SoftmaxCrossEntropyParameter; | |||
| #endif // MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_ | |||
| void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, | |||
| float *sum_mul, SoftmaxParameter *parameter); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_ | |||
| @@ -21,7 +21,7 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| static int CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) { | |||
| static float CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) { | |||
| float error = 0; | |||
| // relative error | |||
| @@ -35,6 +35,16 @@ static int CompareOutputRelativeData(float *output_data, float *correct_data, in | |||
| diffSum += diff; | |||
| } | |||
| error = diffSum / sum; | |||
| return error; | |||
| } | |||
| int CompareRelativeOutput(float *output_data, std::string file_path) { | |||
| size_t output_size; | |||
| auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); | |||
| size_t output_num = output_size / sizeof(float); | |||
| // std::cout << "output num : " << output_num << "\n"; | |||
| int error = CompareOutputRelativeData(output_data, ground_truth, output_num); | |||
| delete [] ground_truth; | |||
| if (error > 1e-4) { | |||
| std::cout << "has accuracy error!\n" << error << "\n"; | |||
| return 1; | |||
| @@ -42,14 +52,15 @@ static int CompareOutputRelativeData(float *output_data, float *correct_data, in | |||
| return 0; | |||
| } | |||
| int CompareRelativeOutput(float *output_data, std::string file_path) { | |||
| float RelativeOutputError(float *output_data, std::string file_path) { | |||
| size_t output_size; | |||
| auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); | |||
| size_t output_num = output_size / sizeof(float); | |||
| std::cout << "output num : " << output_num << "\n"; | |||
| int res = CompareOutputRelativeData(output_data, ground_truth, output_num); | |||
| delete[] ground_truth; | |||
| return res; | |||
| float error = CompareOutputRelativeData(output_data, ground_truth, output_num); | |||
| delete [] ground_truth; | |||
| return error; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -21,6 +21,7 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| int CompareRelativeOutput(float *output_data, std::string file_path); | |||
| float RelativeOutputError(float *output_data, std::string file_path); | |||
| } | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_COMMON_FILE_UTILS_EXT_H_ | |||
| @@ -32,13 +32,16 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| static std::vector<schema::PrimitiveType> packed_op = { | |||
| schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, schema::PrimitiveType_DepthwiseConv2D, | |||
| schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_MatMul}; | |||
| // this method will not check whether tensor_idx is a weight tensor index, caller should ensure this. | |||
| static bool WeightTensorNeedCopy(const lite::Model *model, const uint32_t tensor_idx) { | |||
| #ifdef SUPPORT_TRAIN | |||
| return false; | |||
| #endif | |||
| MS_ASSERT(model != nullptr); | |||
| auto post_node_idxes = GetLinkedPostNodeIdx(model, tensor_idx); | |||
| return std::none_of(post_node_idxes.begin(), post_node_idxes.end(), [&](const size_t &post_node_idx) { | |||
| @@ -267,7 +270,9 @@ int LiteSession::CompileGraph(Model *model) { | |||
| } | |||
| executor->Prepare(this->kernels_); | |||
| #ifndef SUPPORT_TRAIN | |||
| model->Free(); | |||
| #endif | |||
| return RET_OK; | |||
| } | |||
| @@ -42,9 +42,11 @@ bool ConvertNodes(const schema::MetaGraph *meta_graph, Model *model) { | |||
| for (uint32_t j = 0; j < count; ++j) { | |||
| node->input_indices_.push_back(size_t(c_node->inputIndex()->GetAs<uint32_t>(j))); | |||
| } | |||
| count = c_node->outputIndex()->size(); | |||
| for (uint32_t j = 0; j < count; ++j) { | |||
| node->output_indices_.push_back(size_t(c_node->outputIndex()->GetAs<uint32_t>(j))); | |||
| if (c_node->outputIndex() != nullptr) { | |||
| count = c_node->outputIndex()->size(); | |||
| for (uint32_t j = 0; j < count; ++j) { | |||
| node->output_indices_.push_back(size_t(c_node->outputIndex()->GetAs<uint32_t>(j))); | |||
| } | |||
| } | |||
| model->nodes_.push_back(node); | |||
| } | |||
| @@ -46,6 +46,8 @@ int ActivationGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodeP | |||
| } else if (prim.name() == "ReLU6") { | |||
| attr->type = schema::ActivationType_RELU6; | |||
| } | |||
| // auto alpha = GetValue<float>(prim.GetAttr("alpha")); | |||
| attr->alpha = 0; // alpha; | |||
| this->primitive_->value.value = attr.release(); | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| @@ -16,7 +16,6 @@ | |||
| #include "src/ops/apply_momentum.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| @@ -31,11 +30,17 @@ int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePt | |||
| MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| auto attr = std::make_unique<schema::ApplyMomentumT>(); | |||
| this->primitive_->value.value = attr.release(); | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| auto attr = std::make_unique<schema::ApplyMomentumT>(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.value = attr.release(); | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -49,13 +54,13 @@ int ApplyMomentum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatb | |||
| return RET_ERROR; | |||
| } | |||
| auto val_offset = schema::CreateApplyMomentum(*fbb); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_ActivationGrad, val_offset.o); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_ApplyMomentum, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| int ApplyMomentum::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { | |||
| int ApplyMomentum::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | |||
| if (5 != inputs.size()) { | |||
| MS_LOG(ERROR) << "ApplyMomentum should have at 5 input tensors"; | |||
| return RET_ERROR; | |||
| @@ -48,6 +48,9 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector< | |||
| if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad)) { | |||
| ndim_ = outShape.size(); | |||
| x1_shape_.resize(ndim_); | |||
| x2_shape_.resize(ndim_); | |||
| dy_shape_.resize(ndim_); | |||
| auto fillDimNum0 = outShape.size() - inShape0.size(); | |||
| auto fillDimNum1 = outShape.size() - inShape1.size(); | |||
| int j0 = 0; | |||
| @@ -61,6 +64,9 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector< | |||
| // if (inShape0.size() < inShape1.size()) | |||
| if (dx1->ElementsNum() < dx2->ElementsNum()) { | |||
| ndim_ = inShape1.size(); | |||
| x1_shape_.resize(ndim_); | |||
| x2_shape_.resize(ndim_); | |||
| dy_shape_.resize(ndim_); | |||
| auto fillDimNum = inShape1.size() - inShape0.size(); // This will not work for batch! | |||
| int j = 0; | |||
| for (unsigned int i = 0; i < inShape1.size(); i++) { | |||
| @@ -74,8 +80,10 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector< | |||
| } | |||
| } else if (dx2->ElementsNum() < dx1->ElementsNum()) { // if (inShape0.size() > inShape1.size()) | |||
| ndim_ = inShape0.size(); | |||
| x1_shape_.resize(ndim_); | |||
| x2_shape_.resize(ndim_); | |||
| dy_shape_.resize(ndim_); | |||
| broadcasting_ = true; | |||
| ndim_ = inShape0.size(); | |||
| int j = 0; | |||
| auto fillDimNum = inShape0.size() - inShape1.size(); | |||
| for (unsigned int i = 0; i < inShape0.size(); i++) { | |||
| @@ -32,7 +32,7 @@ class ArithmeticGrad : public PrimitiveC { | |||
| ArithmeticGrad() = default; | |||
| explicit ArithmeticGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} | |||
| #else | |||
| // explicit Arithmetic(schema::Primitive *primitive) : PrimitiveC(primitive) {} | |||
| // explicit ArithmeticGrad(const schema::Primitive &primitive) : PrimitiveC(primitive) {} | |||
| ArithmeticGrad() = default; | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override { | |||
| return RET_ERROR; | |||
| @@ -41,6 +41,7 @@ int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &i | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| attr->axis = {0}; // GetValue<std::vector<int>>(prim.GetAttr("axis")); | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| @@ -73,6 +74,7 @@ std::vector<int> BiasGrad::GetAxis() const { | |||
| auto fb_vector = this->primitive_->value_as_BiasGrad()->axis(); | |||
| return std::vector<int>(fb_vector->begin(), fb_vector->end()); | |||
| } | |||
| #endif | |||
| int BiasGrad::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { | |||
| if (1 != inputs.size()) { | |||
| @@ -99,6 +101,5 @@ int BiasGrad::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> out | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -38,8 +38,8 @@ class BiasGrad : public PrimitiveC { | |||
| BiasGrad() = default; | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| int InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) override; | |||
| #endif | |||
| int InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) override; | |||
| std::vector<int> GetAxis() const; | |||
| }; | |||
| } // namespace lite | |||
| @@ -67,9 +67,31 @@ int BNGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers: | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| } | |||
| float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps(); } | |||
| float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); } | |||
| #endif | |||
| int BNGrad::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | |||
| if (5 != inputs.size()) { | |||
| MS_LOG(ERROR) << "BNGrad should have five inputs"; | |||
| return RET_ERROR; | |||
| } | |||
| if (3 != outputs.size()) { | |||
| MS_LOG(ERROR) << "BNGrad should have three outputs"; | |||
| return RET_ERROR; | |||
| } | |||
| auto in = inputs[1]; | |||
| auto scale = inputs[2]; | |||
| outputs[0]->set_shape(in->shape()); | |||
| outputs[1]->set_shape(scale->shape()); | |||
| outputs[2]->set_shape(scale->shape()); | |||
| outputs[0]->set_data_type(in->data_type()); | |||
| outputs[1]->set_data_type(scale->data_type()); | |||
| outputs[2]->set_data_type(scale->data_type()); | |||
| outputs[0]->SetFormat(in->GetFormat()); | |||
| outputs[1]->SetFormat(scale->GetFormat()); | |||
| outputs[2]->SetFormat(scale->GetFormat()); | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -38,6 +38,8 @@ class BNGrad : public PrimitiveC { | |||
| BNGrad() = default; | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| int InferShape(std::vector<lite::Tensor *> inputs_, | |||
| std::vector<lite::Tensor *> outputs_) override; | |||
| float GetEps() const; | |||
| float GetMomentum() const; | |||
| }; | |||
| @@ -1,75 +0,0 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/ops/bn_grad_input.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| float BNGradInput::GetEps() const { return this->primitive_->value.AsBNGradInput()->eps; } | |||
| float BNGradInput::GetMomentum() const { return this->primitive_->value.AsBNGradInput()->momentum; } | |||
| void BNGradInput::SetEps(float eps) { this->primitive_->value.AsBNGradInput()->eps = eps; } | |||
| void BNGradInput::SetMomentum(float momentum) { this->primitive_->value.AsBNGradInput()->momentum = momentum; } | |||
| int BNGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| if (this->primitive_ == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT failed"; | |||
| return RET_ERROR; | |||
| } | |||
| this->primitive_->value.type = schema::PrimitiveType_BNGradInput; | |||
| } | |||
| if (this->primitive_->value.type != schema::PrimitiveType_BNGradInput) { | |||
| MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::BNGradInputT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| attr->eps = GetValue<float>(prim.GetAttr("eps")); | |||
| attr->momentum = GetValue<float>(prim.GetAttr("momentum")); | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #else | |||
| int BNGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | |||
| MS_ASSERT(nullptr != primitive); | |||
| MS_ASSERT(nullptr != fbb); | |||
| auto attr = primitive->value_as_BNGradInput(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "value_as_BNGradInputInput return nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| auto val_offset = schema::CreateBNGradInput(*fbb, attr->eps(), attr->momentum()); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_BNGradInput, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| } | |||
| float BNGradInput::GetEps() const { return this->primitive_->value_as_BNGradInput()->eps(); } | |||
| float BNGradInput::GetMomentum() const { return this->primitive_->value_as_BNGradInput()->momentum(); } | |||
| #endif | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -1,47 +0,0 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_ | |||
| #define LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_ | |||
| #include <vector> | |||
| #include <set> | |||
| #include <cmath> | |||
| #include "ir/dtype/type_id.h" | |||
| #include "src/ops/primitive_c.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| class BNGradInput : public PrimitiveC { | |||
| public: | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| MS_DECLARE_PARENT(BNGradInput, PrimitiveC); | |||
| BNGradInput() = default; | |||
| explicit BNGradInput(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} | |||
| void SetEps(float eps); | |||
| void SetMomentum(float momentum); | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| BNGradInput() = default; | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| float GetEps() const; | |||
| float GetMomentum() const; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_ | |||
| @@ -66,108 +66,7 @@ void Conv2DGradFilter::SetHasBias(bool has_bias) { this->primitive_->value.AsCon | |||
| void Conv2DGradFilter::SetActivationType(int activation_type) { | |||
| this->primitive_->value.AsConv2DGradFilter()->activationType = (schema::ActivationType)activation_type; | |||
| } | |||
| void Conv2DGradFilter::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | |||
| const std::vector<AnfNodePtr> &inputs) { | |||
| auto attr = std::make_unique<schema::DepthwiseConv2DT>(); | |||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | |||
| if (format == "NCHW") { | |||
| attr->format = schema::Format_NCHW; | |||
| } else if (format == "NHWC") { | |||
| attr->format = schema::Format_NHWC; | |||
| } else { | |||
| attr->format = schema::Format_NUM_OF_FORMAT; | |||
| } | |||
| auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list")); | |||
| attr->padUp = pad_list[0]; | |||
| attr->padDown = pad_list[1]; | |||
| attr->padLeft = pad_list[2]; | |||
| attr->padRight = pad_list[3]; | |||
| auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation")); | |||
| attr->dilateH = dilation[0]; | |||
| attr->dilateW = dilation[1]; | |||
| auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size")); | |||
| attr->kernelH = kernel_size[0]; | |||
| attr->kernelW = kernel_size[1]; | |||
| auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride")); | |||
| attr->strideH = stride[2]; | |||
| attr->strideW = stride[3]; | |||
| auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode")); | |||
| if (pad_mode == "valid") { | |||
| attr->padMode = schema::PadMode_VALID; | |||
| } else if (pad_mode == "same") { | |||
| attr->padMode = schema::PadMode_SAME; | |||
| } else { | |||
| attr->padMode = schema::PadMode_NOTSET; | |||
| } | |||
| if (prim.GetAttr("activation_name") != nullptr) { | |||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||
| attr->activationType = kActivationTypeMap[activate_name]; | |||
| } else { | |||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||
| } | |||
| int channel_mutiplier = 1; | |||
| if (prim.GetAttr("channel_mutiplier") != nullptr) { | |||
| channel_mutiplier = GetValue<int>(prim.GetAttr("channel_multiplier")); | |||
| } | |||
| attr->channelMultiplier = channel_mutiplier; | |||
| primitive->value.value = attr.release(); | |||
| } | |||
| void Conv2DGradFilter::PopulaterConv2DSingleGroup(const Primitive &prim, | |||
| schema::PrimitiveT *primitive, const int &group) { | |||
| auto attr = std::make_unique<schema::Conv2DT>(); | |||
| attr->group = group; | |||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | |||
| if (format == "NCHW") { | |||
| attr->format = schema::Format_NCHW; | |||
| } else if (format == "NHWC") { | |||
| attr->format = schema::Format_NHWC; | |||
| } else { | |||
| attr->format = schema::Format_NUM_OF_FORMAT; | |||
| } | |||
| auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list")); | |||
| attr->padUp = pad_list[0]; | |||
| attr->padDown = pad_list[1]; | |||
| attr->padLeft = pad_list[2]; | |||
| attr->padRight = pad_list[3]; | |||
| auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation")); | |||
| attr->dilateH = dilation[0]; | |||
| attr->dilateW = dilation[1]; | |||
| auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size")); | |||
| attr->kernelH = kernel_size[0]; | |||
| attr->kernelW = kernel_size[1]; | |||
| auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride")); | |||
| attr->strideH = stride[2]; | |||
| attr->strideW = stride[3]; | |||
| attr->channelOut = GetValue<int>(prim.GetAttr("out_channel")); | |||
| auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode")); | |||
| if (pad_mode == "valid") { | |||
| attr->padMode = schema::PadMode_VALID; | |||
| } else if (pad_mode == "same") { | |||
| attr->padMode = schema::PadMode_SAME; | |||
| } else { | |||
| attr->padMode = schema::PadMode_NOTSET; | |||
| } | |||
| if (prim.GetAttr("activation_name") != nullptr) { | |||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||
| attr->activationType = kActivationTypeMap[activate_name]; | |||
| } else { | |||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||
| } | |||
| primitive->value.value = attr.release(); | |||
| } | |||
| int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| @@ -181,11 +80,62 @@ int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vector<AnfNod | |||
| MS_LOG(ERROR) << "primitive_ type is error:" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| int group = GetValue<int>(prim.GetAttr("group")); | |||
| if (group > 1) { | |||
| PopulaterConv2DMultiGroup(prim, this->primitive_, group, inputs); | |||
| } else { | |||
| PopulaterConv2DSingleGroup(prim, this->primitive_, group); | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::Conv2DGradFilterT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| attr->group = GetValue<int>(prim.GetAttr("group")); | |||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | |||
| if (format == "NCHW") { | |||
| attr->format = schema::Format_NCHW; | |||
| } else if (format == "NHWC") { | |||
| attr->format = schema::Format_NHWC; | |||
| } else { | |||
| attr->format = schema::Format_NUM_OF_FORMAT; | |||
| } | |||
| auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list")); | |||
| attr->padUp = pad_list[0]; | |||
| attr->padDown = pad_list[1]; | |||
| attr->padLeft = pad_list[2]; | |||
| attr->padRight = pad_list[3]; | |||
| auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation")); | |||
| attr->dilateH = dilation[0]; | |||
| attr->dilateW = dilation[1]; | |||
| auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size")); | |||
| attr->kernelH = kernel_size[0]; | |||
| attr->kernelW = kernel_size[1]; | |||
| auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride")); | |||
| attr->strideH = stride[0]; | |||
| attr->strideW = stride[1]; | |||
| attr->channelOut = GetValue<int>(prim.GetAttr("out_channel")); | |||
| auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode")); | |||
| if (pad_mode == "valid") { | |||
| attr->padMode = schema::PadMode_VALID; | |||
| } else if (pad_mode == "same") { | |||
| attr->padMode = schema::PadMode_SAME; | |||
| } else { | |||
| attr->padMode = schema::PadMode_NOTSET; | |||
| } | |||
| if (prim.GetAttr("activation_name") != nullptr) { | |||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||
| attr->activationType = kActivationTypeMap[activate_name]; | |||
| } else { | |||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -268,6 +218,5 @@ int Conv2DGradFilter::InferShape(std::vector<Tensor *> inputs, std::vector<Tenso | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -51,9 +51,6 @@ class Conv2DGradFilter : public PrimitiveC { | |||
| void SetHasBias(bool has_bias); | |||
| void SetActivationType(int activation_type); | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| void PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | |||
| const std::vector<AnfNodePtr> &inputs); | |||
| void PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group); | |||
| #else | |||
| Conv2DGradFilter() = default; | |||
| @@ -64,108 +64,7 @@ void Conv2DGradInput::SetHasBias(bool has_bias) { this->primitive_->value.AsConv | |||
| void Conv2DGradInput::SetActivationType(int activation_type) { | |||
| this->primitive_->value.AsConv2DGradInput()->activationType = (schema::ActivationType)activation_type; | |||
| } | |||
| void Conv2DGradInput::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | |||
| const std::vector<AnfNodePtr> &inputs) { | |||
| auto attr = std::make_unique<schema::DepthwiseConv2DT>(); | |||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | |||
| if (format == "NCHW") { | |||
| attr->format = schema::Format_NCHW; | |||
| } else if (format == "NHWC") { | |||
| attr->format = schema::Format_NHWC; | |||
| } else { | |||
| attr->format = schema::Format_NUM_OF_FORMAT; | |||
| } | |||
| auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list")); | |||
| attr->padUp = pad_list[0]; | |||
| attr->padDown = pad_list[1]; | |||
| attr->padLeft = pad_list[2]; | |||
| attr->padRight = pad_list[3]; | |||
| auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation")); | |||
| attr->dilateH = dilation[0]; | |||
| attr->dilateW = dilation[1]; | |||
| auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size")); | |||
| attr->kernelH = kernel_size[0]; | |||
| attr->kernelW = kernel_size[1]; | |||
| auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride")); | |||
| attr->strideH = stride[2]; | |||
| attr->strideW = stride[3]; | |||
| auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode")); | |||
| if (pad_mode == "valid") { | |||
| attr->padMode = schema::PadMode_VALID; | |||
| } else if (pad_mode == "same") { | |||
| attr->padMode = schema::PadMode_SAME; | |||
| } else { | |||
| attr->padMode = schema::PadMode_NOTSET; | |||
| } | |||
| if (prim.GetAttr("activation_name") != nullptr) { | |||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||
| attr->activationType = kActivationTypeMap[activate_name]; | |||
| } else { | |||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||
| } | |||
| int channel_mutiplier = 1; | |||
| if (prim.GetAttr("channel_mutiplier") != nullptr) { | |||
| channel_mutiplier = GetValue<int>(prim.GetAttr("channel_multiplier")); | |||
| } | |||
| attr->channelMultiplier = channel_mutiplier; | |||
| primitive->value.value = attr.release(); | |||
| } | |||
| void Conv2DGradInput::PopulaterConv2DSingleGroup(const Primitive &prim, | |||
| schema::PrimitiveT *primitive, const int &group) { | |||
| auto attr = std::make_unique<schema::Conv2DT>(); | |||
| attr->group = group; | |||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | |||
| if (format == "NCHW") { | |||
| attr->format = schema::Format_NCHW; | |||
| } else if (format == "NHWC") { | |||
| attr->format = schema::Format_NHWC; | |||
| } else { | |||
| attr->format = schema::Format_NUM_OF_FORMAT; | |||
| } | |||
| auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list")); | |||
| attr->padUp = pad_list[0]; | |||
| attr->padDown = pad_list[1]; | |||
| attr->padLeft = pad_list[2]; | |||
| attr->padRight = pad_list[3]; | |||
| auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation")); | |||
| attr->dilateH = dilation[0]; | |||
| attr->dilateW = dilation[1]; | |||
| auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size")); | |||
| attr->kernelH = kernel_size[0]; | |||
| attr->kernelW = kernel_size[1]; | |||
| auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride")); | |||
| attr->strideH = stride[2]; | |||
| attr->strideW = stride[3]; | |||
| attr->channelOut = GetValue<int>(prim.GetAttr("out_channel")); | |||
| auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode")); | |||
| if (pad_mode == "valid") { | |||
| attr->padMode = schema::PadMode_VALID; | |||
| } else if (pad_mode == "same") { | |||
| attr->padMode = schema::PadMode_SAME; | |||
| } else { | |||
| attr->padMode = schema::PadMode_NOTSET; | |||
| } | |||
| if (prim.GetAttr("activation_name") != nullptr) { | |||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||
| attr->activationType = kActivationTypeMap[activate_name]; | |||
| } else { | |||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||
| } | |||
| primitive->value.value = attr.release(); | |||
| } | |||
| int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| @@ -179,11 +78,63 @@ int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNode | |||
| MS_LOG(ERROR) << "primitive_ type is error:" << this->primitive_->value.type; | |||
| return RET_ERROR; | |||
| } | |||
| int group = GetValue<int>(prim.GetAttr("group")); | |||
| if (group > 1) { | |||
| PopulaterConv2DMultiGroup(prim, this->primitive_, group, inputs); | |||
| } else { | |||
| PopulaterConv2DSingleGroup(prim, this->primitive_, group); | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::Conv2DGradInputT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| attr->group = GetValue<int>(prim.GetAttr("group")); | |||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | |||
| if (format == "NCHW") { | |||
| attr->format = schema::Format_NCHW; | |||
| } else if (format == "NHWC") { | |||
| attr->format = schema::Format_NHWC; | |||
| } else { | |||
| attr->format = schema::Format_NUM_OF_FORMAT; | |||
| } | |||
| auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list")); | |||
| attr->padUp = pad_list[0]; | |||
| attr->padDown = pad_list[1]; | |||
| attr->padLeft = pad_list[2]; | |||
| attr->padRight = pad_list[3]; | |||
| auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation")); | |||
| attr->dilateH = dilation[0]; | |||
| attr->dilateW = dilation[1]; | |||
| auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size")); | |||
| attr->kernelH = kernel_size[0]; | |||
| attr->kernelW = kernel_size[1]; | |||
| auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride")); | |||
| attr->strideH = stride[0]; | |||
| attr->strideW = stride[1]; | |||
| attr->channelOut = GetValue<int>(prim.GetAttr("out_channel")); | |||
| auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode")); | |||
| if (pad_mode == "valid") { | |||
| attr->padMode = schema::PadMode_VALID; | |||
| } else if (pad_mode == "same") { | |||
| attr->padMode = schema::PadMode_SAME; | |||
| } else { | |||
| attr->padMode = schema::PadMode_NOTSET; | |||
| } | |||
| if (prim.GetAttr("activation_name") != nullptr) { | |||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||
| attr->activationType = kActivationTypeMap[activate_name]; | |||
| } else { | |||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||
| } | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -265,6 +216,5 @@ int Conv2DGradInput::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -51,9 +51,6 @@ class Conv2DGradInput : public PrimitiveC { | |||
| void SetHasBias(bool has_bias); | |||
| void SetActivationType(int activation_type); | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| void PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | |||
| const std::vector<AnfNodePtr> &inputs); | |||
| void PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group); | |||
| #else | |||
| Conv2DGradInput() = default; | |||
| @@ -47,6 +47,15 @@ int Depend::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #else | |||
| int Depend::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | |||
| MS_ASSERT(nullptr != primitive); | |||
| MS_ASSERT(nullptr != fbb); | |||
| auto val_offset = schema::CreateDepend(*fbb); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Depend, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -31,9 +31,10 @@ class Depend : public PrimitiveC { | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| Depend() = default; | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // LITE_MINDSPORE_LITE_SRC_OPS_Depend_H_ | |||
| #endif // LITE_MINDSPORE_LITE_SRC_OPS_DEPEND_H_ | |||
| @@ -66,7 +66,6 @@ class DetectionPostProcess : public PrimitiveC { | |||
| bool GetUseRegularNms() const; | |||
| bool GetOutQuantized() const; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -80,9 +80,9 @@ int PoolingGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> | |||
| } else { | |||
| attr->format = schema::Format_NUM_OF_FORMAT; | |||
| } | |||
| if (prim.instance_name() == "MaxPool") { | |||
| if (prim.instance_name() == "MaxPoolGrad") { | |||
| attr->poolingMode = schema::PoolMode_MAX_POOLING; | |||
| } else if (prim.instance_name() == "MeanPool") { | |||
| } else if (prim.instance_name() == "MeanPoolGrad") { | |||
| attr->poolingMode = schema::PoolMode_MEAN_POOLING; | |||
| } | |||
| @@ -189,6 +189,5 @@ int PoolingGrad::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> | |||
| grad_output->SetFormat(input->GetFormat()); | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -139,7 +139,6 @@ | |||
| #include "src/ops/power_grad.h" | |||
| #include "src/ops/softmax_cross_entropy.h" | |||
| #include "src/ops/bn_grad.h" | |||
| #include "src/ops/bn_grad_input.h" | |||
| #include "src/ops/arithmetic_grad.h" | |||
| #include "src/ops/depend.h" | |||
| #include "src/ops/flatten_grad.h" | |||
| @@ -392,49 +391,42 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std: | |||
| return NewPrimitiveC<Elu>(prim, inputs, quantType); | |||
| } else if (op_type == "Log") { | |||
| return NewPrimitiveC<Log>(prim, inputs, quantType); | |||
| } else if (op_type == "Conv2DBackpropInput") { | |||
| } else if (op_type == "DeConv2D") { | |||
| return NewPrimitiveC<DeConv2D>(prim, inputs, quantType); | |||
| } else if (op_type == "tuple_getitem") { | |||
| return NewPrimitiveC<TupleGetItem>(prim, inputs, quantType); | |||
| } else if (op_type == "Softmax") { | |||
| return NewPrimitiveC<SoftMax>(prim, inputs, quantType); | |||
| #ifdef SUPPORT_TRAIN0 | |||
| #ifdef SUPPORT_TRAIN | |||
| } else if (op_type == "SoftmaxCrossEntropyWithLogits") { | |||
| return NewPrimitiveC<SoftmaxCrossEntropy>(prim, inputs, quantType); | |||
| } else if (op_type == "BiasAddGrad") { | |||
| return NewPrimitiveC<BiasGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "ApplyMomentum") { | |||
| return NewPrimitiveC<ApplyMomentum>(prim, inputs, quantType); | |||
| } else if (op_type == "Depend") { | |||
| return NewPrimitiveC<Depend>(prim, inputs, quantType); | |||
| } else if ((op_type == "ReluGrad" || op_type == "Relu6Grad" || op_type == "SigmoidGrad")) { | |||
| return NewPrimitiveC<ActivationGrad>(prim, inputs, quantType); | |||
| } else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad")) { | |||
| return NewPrimitiveC<PoolingGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "Conv2DBackpropFilter") { | |||
| return NewPrimitiveC<Conv2DGradFilter>(prim, inputs, quantType); | |||
| } else if (op_type == "BiasAddGrad") { | |||
| return NewPrimitiveC<BiasGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "ApplyMomentum") { | |||
| return NewPrimitiveC<ApplyMomentum>(prim, inputs, quantType); | |||
| } else if (op_type == "NegGrad") { | |||
| return NewPrimitiveC<NegGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "LogGrad") { | |||
| return NewPrimitiveC<LogGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "Conv2DBackpropInput") { | |||
| return NewPrimitiveC<Conv2DGradInput>(prim, inputs, quantType); | |||
| } else if (op_type == "BatchNormGrad") { | |||
| return NewPrimitiveC<BNGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "Conv2DGradInput") { | |||
| return NewPrimitiveC<Conv2DGradInput>(prim, inputs, quantType); | |||
| } else if (op_type == "Conv2DGradFilter") { | |||
| return NewPrimitiveC<Conv2DGradFilter>(prim, inputs, quantType); | |||
| } else if (op_type == "BiasGrad") { | |||
| return NewPrimitiveC<BiasGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "ActivationGrad") { | |||
| return NewPrimitiveC<ActivationGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "PoolingGrad") { | |||
| return NewPrimitiveC<PoolingGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "BNGradInput") { | |||
| return NewPrimitiveC<BNGradInput>(prim, inputs, quantType); | |||
| } else if (op_type == "PowerGrad") { | |||
| return NewPrimitiveC<PowerGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "SoftmaxCrossEntropyWithLogits") { | |||
| return NewPrimitiveC<SoftmaxCrossEntropy>(prim, inputs, quantType); | |||
| } else if (op_type == "Depend") { | |||
| return NewPrimitiveC<Depend>(prim, inputs, quantType); | |||
| } else if (op_type == "FlattenGrad") { | |||
| return NewPrimitiveC<FlattenGrad>(prim, inputs, quantType); | |||
| #endif | |||
| #ifdef SUPPORT_TRAIN0 | |||
| } else if (op_type == "PowerGrad") { | |||
| return NewPrimitiveC<PowerGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "NegGrad") { | |||
| return NewPrimitiveC<NegGrad>(prim, inputs, quantType); | |||
| } else if (op_type == "LogGrad") { | |||
| return NewPrimitiveC<LogGrad>(prim, inputs, quantType); | |||
| #endif | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported primitive type in Create : " << op_type; | |||
| @@ -677,12 +669,10 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { | |||
| return new ArithmeticGrad(primitive); | |||
| case schema::PrimitiveType_DivGrad: | |||
| return new ArithmeticGrad(primitive); | |||
| case schema::PrimitiveType_PowerGrad: | |||
| return new PowerGrad(primitive); | |||
| case schema::PrimitiveType_BNGradInput: | |||
| return new BNGradInput(primitive); | |||
| case schema::PrimitiveType_SoftmaxCrossEntropy: | |||
| return new SoftmaxCrossEntropy(primitive); | |||
| case schema::PrimitiveType_PowerGrad: | |||
| return new PowerGrad(primitive); | |||
| case schema::PrimitiveType_Depend: | |||
| return new Depend(primitive); | |||
| case schema::PrimitiveType_FlattenGrad: | |||
| @@ -934,7 +924,9 @@ PrimitiveC *PrimitiveC::Create(const schema::Primitive *primitive) { | |||
| case schema::PrimitiveType_MulGrad: | |||
| return NewPrimitiveC<ArithmeticGrad>(primitive); | |||
| case schema::PrimitiveType_DivGrad: | |||
| return NewPrimitiveC<ArithmeticGrad>(primitive); | |||
| return NewPrimitiveC<ArithmeticGrad>(primitive); | |||
| case schema::PrimitiveType_SoftmaxCrossEntropy: | |||
| return NewPrimitiveC<SoftmaxCrossEntropy>(primitive); | |||
| case schema::PrimitiveType_NegGrad: | |||
| return NewPrimitiveC<NegGrad>(primitive); | |||
| case schema::PrimitiveType_LogGrad: | |||
| @@ -43,6 +43,8 @@ int SoftmaxCrossEntropy::UnPackAttr(const Primitive &prim, const std::vector<Anf | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| attr->axis = {0}; | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||
| @@ -102,6 +104,5 @@ int SoftmaxCrossEntropy::InferShape(std::vector<Tensor *> inputs, std::vector<Te | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -50,7 +50,6 @@ int ConvolutionDepthwiseCPUKernel::InitWeightBias() { | |||
| } | |||
| PackWeightKHWToHWKFp32(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(), channel); | |||
| auto bias_tensor = in_tensors_[kBiasIndex]; | |||
| bias_data_ = reinterpret_cast<float *>(malloc(channel * sizeof(float))); | |||
| if (bias_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| @@ -59,6 +58,7 @@ int ConvolutionDepthwiseCPUKernel::InitWeightBias() { | |||
| memset(bias_data_, 0, channel * sizeof(float)); | |||
| if (in_tensors_.size() == kInputSize2) { | |||
| auto bias_tensor = in_tensors_[kBiasIndex]; | |||
| auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); | |||
| memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float)); | |||
| } | |||
| @@ -63,6 +63,30 @@ int FusedBatchnormCPUKernel::InitConstTensor() { | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; | |||
| return ret; | |||
| } | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| if (is_train()) { | |||
| float *in = static_cast<float *>(in_tensors_[0]->MutableData()); | |||
| float *run_mean = static_cast<float *>(out_tensors_[1]->MutableData()); | |||
| float *run_var = static_cast<float *>(out_tensors_[2]->MutableData()); | |||
| float *save_mean = static_cast<float *>(out_tensors_[3]->MutableData()); | |||
| float *save_inv_var = static_cast<float *>(out_tensors_[4]->MutableData()); | |||
| std::fill(run_mean, run_mean+param->channel_, 0.f); | |||
| std::fill(run_var, run_var+param->channel_, 0.f); | |||
| FusedBatchNormFp32MeanVar(in, 0.9, run_mean, run_var, param, save_mean, save_inv_var); | |||
| } | |||
| ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||
| } | |||
| return ret; | |||
| } | |||
| int FusedBatchnormCPUKernel::DoExecute(int task_id) { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| FusedBatchNormFp32(in_tensors_.at(0)->MutableData(), scale_, offset_, mean_, variance_, param, task_id, | |||
| @@ -30,7 +30,7 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel { | |||
| ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); } | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int InitConstTensor() override; | |||
| int DoExecute(int task_id) override; | |||
| @@ -186,10 +186,10 @@ int MatmulCPUKernel::Run() { | |||
| auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c()); | |||
| auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c()); | |||
| if (params_->a_const_ == false) { | |||
| if (params_->a_const_ == false || is_train()) { | |||
| InitMatrixA(a_src, a_c12_ptr_); | |||
| } | |||
| if (params_->b_const_ == false) { | |||
| if (params_->b_const_ == false || is_train()) { | |||
| InitMatrixB(b_src, b_r8_ptr_); | |||
| } | |||
| @@ -201,4 +201,16 @@ int MatmulCPUKernel::Run() { | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void MatmulCPUKernel::eval() { | |||
| // Copy weights after training | |||
| LiteKernel::eval(); | |||
| if (params_->a_const_ == true) { | |||
| InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->MutableData()), a_c12_ptr_); | |||
| } | |||
| if (params_->b_const_ == true) { | |||
| InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->MutableData()), b_r8_ptr_); | |||
| } | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -34,6 +34,8 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel { | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| void eval() override; | |||
| private: | |||
| void InitMatrixA(float *src_ptr, float *dst_ptr); | |||
| @@ -28,7 +28,7 @@ class ActivationGradCPUKernel : public LiteKernel { | |||
| explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(param, inputs, outputs, ctx, primitive) { | |||
| : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||
| param_act_grad_ = reinterpret_cast<ActivationParameter *>(param); | |||
| } | |||
| ~ActivationGradCPUKernel() override = default; | |||
| @@ -76,7 +76,7 @@ kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::Tensor | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad); | |||
| auto *kernel = | |||
| new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx, primitive); | |||
| new (std::nothrow) BiasGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new BiasGradCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -56,7 +56,7 @@ OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive) { | |||
| int BNGradCPUKernel::Init() { | |||
| auto *input_x = in_tensors_.at(1); | |||
| int channels = input_x->shape().at(kNHWC_C); | |||
| workspace_size = 5 * channels; | |||
| workspace_size = 4 * channels; | |||
| workspace = new (std::nothrow) float[workspace_size]; | |||
| if (workspace == nullptr) { | |||
| MS_LOG(ERROR) << "new workspace fail!"; | |||
| @@ -89,9 +89,8 @@ int BNGradCPUKernel::Run() { | |||
| std::fill(workspace, workspace + workspace_size, 0.f); | |||
| float *mean = workspace; | |||
| float *invar = mean + channels; | |||
| float *mean_delta = invar + channels; | |||
| float *variance_delta = mean_delta + channels; | |||
| float *mean_add_delta = variance_delta + channels; | |||
| float *dxhat_sum = invar + channels; | |||
| float *dxhathat_sum = dxhat_sum + channels; | |||
| float *x = reinterpret_cast<float *>(input_x->MutableData()); | |||
| float *yt = reinterpret_cast<float *>(input_yt->MutableData()); | |||
| @@ -100,13 +99,7 @@ int BNGradCPUKernel::Run() { | |||
| float *dscale = reinterpret_cast<float *>(output_scale->MutableData()); | |||
| float *dbias = reinterpret_cast<float *>(output_bias->MutableData()); | |||
| std::copy(yt, yt + batch * channels * spatial, dx); | |||
| meanVar(x, batch, spatial, channels, eps, mean, invar); | |||
| scaleBias(scale, batch, channels, spatial, dx); | |||
| meanDelta(dx, spatial, channels, invar, mean_delta); | |||
| varianceDelta(x, dx, mean, invar, batch, channels, spatial, variance_delta); | |||
| meanAdd(x, mean, variance_delta, batch, channels, spatial, mean_add_delta, mean_delta); | |||
| NormalizeDelta(x, mean, invar, mean_delta, variance_delta, batch, channels, spatial, dx); | |||
| backwardX(x, yt, scale, batch * spatial, channels, eps, mean, invar, dxhat_sum, dxhathat_sum, dx); | |||
| // dbias | |||
| sumSpatialBatch(yt, batch * spatial, channels, dbias); | |||
| // dscale | |||
| @@ -29,7 +29,7 @@ class BNGradCPUKernel : public LiteKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~BNGradCPUKernel() override { delete workspace; } | |||
| ~BNGradCPUKernel() override { delete [] workspace; } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -39,8 +39,5 @@ class BNGradCPUKernel : public LiteKernel { | |||
| float *workspace; | |||
| int workspace_size; | |||
| }; | |||
| // OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive); | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BN_GRAD_H_ | |||
| @@ -41,10 +41,12 @@ int ConvolutionTrainCPUKernel::Init() { | |||
| conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H); | |||
| conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W); | |||
| conv_param_->group_ = (conv_param_->group_ == 0)? conv_param_->input_channel_:conv_param_->group_; | |||
| int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * | |||
| conv_param_->input_channel_ / conv_param_->group_; | |||
| workspace = new float[ws_size]; | |||
| workspace = new (std::nothrow) float[ws_size]; | |||
| return RET_OK; | |||
| } | |||
| @@ -103,7 +105,7 @@ kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor | |||
| const lite::Context *ctx, const kernel::KernelKey &desc, | |||
| const lite::PrimitiveC *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D || desc.type == schema::PrimitiveType_DepthwiseConv2D); | |||
| auto *kernel = new (std::nothrow) ConvolutionTrainCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| @@ -0,0 +1,186 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <math.h> | |||
| #include "src/kernel_registry.h" | |||
| #include "nnacl/softmax_parameter.h" | |||
| #include "nnacl/fp32/softmax.h" | |||
| #include "src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_SoftmaxCrossEntropy; | |||
| namespace mindspore::kernel { | |||
| int SoftmaxCrossEntropyWithLogitsCPUKernel::ReSize() { return RET_OK; } | |||
| void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *labels, const float *logits, | |||
| float *grads, float *output2) const { | |||
| float eps = 1e-6; | |||
| float total_loss = 0.0; | |||
| if (grads != nullptr) { | |||
| for (int i = 0; i < param_->batch_size_; ++i) { | |||
| for (size_t j = 0; j < param_->number_of_classes_; ++j) { | |||
| float logit = | |||
| -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]); | |||
| grads[i * param_->number_of_classes_ + j] = | |||
| (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j])/param_->batch_size_; | |||
| total_loss += labels[i * param_->number_of_classes_ + j] * logit; | |||
| } | |||
| } | |||
| } else { | |||
| for (int i = 0; i < param_->batch_size_; ++i) { | |||
| for (size_t j = 0; j < param_->number_of_classes_; ++j) { | |||
| float logit = | |||
| -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]); | |||
| total_loss += labels[i * param_->number_of_classes_ + j] * logit; | |||
| } | |||
| } | |||
| } | |||
| output2[0] = total_loss / param_->batch_size_; | |||
| } | |||
| #if 0 | |||
| void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *labels, const float *losses, | |||
| float *output) const { | |||
| float total_loss = 0; | |||
| for (int i = 0; i < param_->batch_size_; ++i) { | |||
| if (labels[i] < 0) { | |||
| MS_LOG(EXCEPTION) << "label value must >= 0"; | |||
| } | |||
| size_t label = labels[i]; | |||
| if (label > param->number_of_classes_) { | |||
| MS_LOG(EXCEPTION) << "error label input!"; | |||
| } else { | |||
| total_loss -= logf(losses[i * param->number_of_classes_ + label]); | |||
| } | |||
| } | |||
| output[0] = total_loss / param->batch_size_; | |||
| } | |||
| void SoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses, float *grads, | |||
| float *output) const { | |||
| size_t row_start = 0; | |||
| float total_loss = 0; | |||
| for (int i = 0; i < param->batch_size_; ++i) { | |||
| if (labels[i] < 0) { | |||
| MS_LOG(EXCEPTION) << "label value must >= 0"; | |||
| } | |||
| size_t label = labels[i]; | |||
| if (label > param->number_of_classes_) { | |||
| MS_LOG(EXCEPTION) << "error label input!"; | |||
| } else { | |||
| total_loss -= logf(losses[i * param->number_of_classes_ + label]); | |||
| for (size_t j = 0; j < param->number_of_classes_; ++j) { | |||
| size_t index = row_start + j; | |||
| if (j == label) { | |||
| grads[index] = (losses[index] - 1) / param->batch_size_; | |||
| } else { | |||
| grads[index] = losses[index] / param->batch_size_; | |||
| } | |||
| } | |||
| } | |||
| row_start += param->number_of_classes_; | |||
| } | |||
| output[0] = total_loss / param->batch_size_; | |||
| } | |||
| #endif | |||
| int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| auto ins = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| auto labels = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| float *out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| float *grads = NULL; | |||
| if (is_train() && out_tensors_.size() > 1) { | |||
| grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData()); | |||
| } | |||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | |||
| float *losses = new (std::nothrow) float[data_size]; | |||
| if (losses == nullptr) { | |||
| MS_LOG(ERROR) << "losses is null"; | |||
| return RET_ERROR; | |||
| } | |||
| MS_ASSERT(out != nullptr); | |||
| MS_ASSERT(labels != nullptr); | |||
| MS_ASSERT(ins != nullptr); | |||
| std::fill(losses_, losses_ + data_size, 0); | |||
| std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0); | |||
| Softmax(ins, losses_, sum_data_, &sm_params_); | |||
| ForwardPostExecute(labels, losses_, grads, out); | |||
| return RET_OK; | |||
| } | |||
| int SoftmaxCrossEntropyWithLogitsCPUKernel::Init() { | |||
| auto dims = in_tensors_[0]->shape(); | |||
| param_->n_dim_ = 2; | |||
| param_->number_of_classes_ = dims[1]; | |||
| param_->batch_size_ = dims[0]; | |||
| for (unsigned int i = 0; i < dims.size(); i++) param_->input_shape_[i] = dims[i]; | |||
| if (2 != this->in_tensors_.size()) { | |||
| MS_LOG(ERROR) << "softmax entropy loss should have two inputs"; | |||
| return RET_ERROR; | |||
| } | |||
| auto *in0 = in_tensors_.front(); | |||
| if (in0 == nullptr) { | |||
| MS_LOG(ERROR) << "softmax etropy loss in0 have no data"; | |||
| return RET_ERROR; | |||
| } | |||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | |||
| losses_ = new (std::nothrow) float[data_size]; | |||
| sum_data_ = new (std::nothrow) float[dims[0]]; | |||
| MS_ASSERT(losses_ != nullptr); | |||
| MS_ASSERT(sum_data_ != nullptr); | |||
| sm_params_.n_dim_ = 2; | |||
| sm_params_.element_size_ = data_size; | |||
| sm_params_.axis_ = 1; | |||
| for (size_t i = 0; i < dims.size(); i++) sm_params_.input_shape_[i] = dims[i]; | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_SoftmaxCrossEntropy); | |||
| auto *kernel = | |||
| new (std::nothrow) SoftmaxCrossEntropyWithLogitsCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto ret = kernel->Init(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| delete kernel; | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| // REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSoftmaxCrossEntropyFp32KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,62 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ | |||
| #include <vector> | |||
| #include "src/train/loss_kernel.h" | |||
| #include "ir/anf.h" | |||
| #include "nnacl/fp32_grad/softmax_grad.h" | |||
| #include "nnacl/fp32/arithmetic.h" | |||
| #include "nnacl/softmax_parameter.h" | |||
| namespace mindspore::kernel { | |||
| class SoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel { | |||
| public: | |||
| explicit SoftmaxCrossEntropyWithLogitsCPUKernel(OpParameter *parameter, | |||
| const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LossKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| param_ = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter); | |||
| } | |||
| ~SoftmaxCrossEntropyWithLogitsCPUKernel() override { | |||
| delete[] losses_; | |||
| delete[] sum_data_; | |||
| } | |||
| void ForwardPostExecute(const float *labels, const float *logits, | |||
| float *output1, float *output2) const; | |||
| // void ForwardPostExecute(const int *labels, const float *losses, float *output) const; | |||
| // void GradPostExecute(const int *labels, const float *losses, float* grads, float *output) const; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| private: | |||
| SoftmaxCrossEntropyParameter *param_; | |||
| SoftmaxParameter sm_params_; | |||
| float *losses_ = nullptr; | |||
| float *sum_data_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ | |||
| @@ -0,0 +1,100 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32_grad/softmax_grad.h" | |||
| #include <string.h> | |||
| #include <vector> | |||
| #include "nnacl/fp32_grad/softmax_grad.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| // using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| // using mindspore::schema::PrimitiveType_SoftMaxGrad; | |||
| namespace mindspore::kernel { | |||
| int SoftmaxGradCPUKernel::Init() { | |||
| // auto input_tensor =in_tensors_.at(0); | |||
| param = reinterpret_cast<SoftmaxParameter *>(op_parameter_); | |||
| auto in_shape = in_tensors_.at(0)->shape(); | |||
| auto in_dims = in_shape.size(); | |||
| int ele_size = 1; | |||
| param->n_dim_ = in_dims; | |||
| for (size_t i = 0; i < in_dims; i++) { | |||
| param->input_shape_[i] = in_shape[i]; | |||
| ele_size *= in_shape[i]; | |||
| } | |||
| param->element_size_ = ele_size; | |||
| // malloc tmp buffer | |||
| auto axis = param->axis_; | |||
| if ((axis < -1) || (axis > param->n_dim_)) { | |||
| MS_LOG(ERROR) << "SoftmaxGrad axis is invalid!"; | |||
| } else if (axis == -1) { | |||
| axis = param->axis_ = (in_dims - 1); | |||
| } | |||
| int inner_size = 1; | |||
| for (size_t i = axis + 1; i < in_dims; i++) { | |||
| inner_size *= in_shape[i]; | |||
| } | |||
| sum_data_ = new (std::nothrow) float[inner_size]; | |||
| MS_ASSERT(sum_data_ != nullptr); | |||
| sum_mul_ = new (std::nothrow) float[inner_size * in_shape[axis]]; | |||
| MS_ASSERT(sum_mul_ != nullptr); | |||
| return RET_OK; | |||
| } | |||
| int SoftmaxGradCPUKernel::ReSize() { return RET_OK; } | |||
| int SoftmaxGradCPUKernel::Run() { | |||
| // auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData()); | |||
| auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData()); | |||
| auto yt_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()); | |||
| SoftmaxGrad(input_ptr, yt_ptr, output_ptr, sum_data_, sum_mul_, reinterpret_cast<SoftmaxParameter *>(op_parameter_)); | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuSoftmaxGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| // MS_ASSERT(desc.type == schema::PrimitiveType_SoftMaxGrad); | |||
| auto *kernel = new (std::nothrow) SoftmaxGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new SoftmaxGradCPUKernel fail!"; | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| delete kernel; | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| // REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftMaxGrad, CpuSoftmaxGradFp32KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,49 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/softmax_parameter.h" | |||
| namespace mindspore::kernel { | |||
| class SoftmaxGradCPUKernel : public LiteKernel { | |||
| public: | |||
| explicit SoftmaxGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| param = reinterpret_cast<SoftmaxParameter *>(parameter); | |||
| } | |||
| ~SoftmaxGradCPUKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| private: | |||
| SoftmaxParameter *param; | |||
| float *sum_data_ = nullptr; | |||
| float *sum_mul_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_ | |||
| @@ -143,7 +143,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| kernel::LiteKernel *CpuSparseSoftmaxCrossEntropyFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc, | |||
| @@ -163,5 +163,5 @@ kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<li | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSoftmaxCrossEntropyFp32KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSparseSoftmaxCrossEntropyFp32KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -57,6 +57,7 @@ int Tensor::CopyTensor(const Tensor &srcTensor, bool copyData) { | |||
| this->data_type_ = srcTensor.data_type_; | |||
| this->shape_ = srcTensor.shape_; | |||
| this->category_ = srcTensor.category_; | |||
| this->format_ = srcTensor.format_; | |||
| if (copyData) { | |||
| auto ret = CopyTensorData(srcTensor); | |||
| if (0 != ret) { | |||
| @@ -27,6 +27,8 @@ | |||
| #include "nnacl/conv_parameter.h" | |||
| #include "src/ops/power_grad.h" | |||
| #include "nnacl/power_parameter.h" | |||
| #include "src/ops/bias_grad.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| namespace mindspore::kernel { | |||
| @@ -36,7 +38,7 @@ OpParameter *DefaultPopulateParameter(const mindspore::lite::PrimitiveC *primiti | |||
| return nullptr; | |||
| } | |||
| OpParameter *param = new (std::nothrow) OpParameter(); | |||
| OpParameter *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "new Param for primitive failed."; | |||
| return nullptr; | |||
| @@ -51,7 +53,8 @@ OpParameter *PopulateSoftmaxCrossEntropyParameter(const mindspore::lite::Primiti | |||
| MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; | |||
| return nullptr; | |||
| } | |||
| SoftmaxCrossEntropyParameter *sce_param = new (std::nothrow) SoftmaxCrossEntropyParameter(); | |||
| SoftmaxCrossEntropyParameter *sce_param = reinterpret_cast<SoftmaxCrossEntropyParameter *> | |||
| (malloc(sizeof(SoftmaxCrossEntropyParameter))); | |||
| if (sce_param == nullptr) { | |||
| MS_LOG(ERROR) << "new SoftmaxCrossEntropyParameter failed."; | |||
| return nullptr; | |||
| @@ -65,7 +68,7 @@ OpParameter *PopulatePoolingGradParameter(const mindspore::lite::PrimitiveC *pri | |||
| MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; | |||
| return nullptr; | |||
| } | |||
| PoolingParameter *pooling_param = new (std::nothrow) PoolingParameter(); | |||
| PoolingParameter *pooling_param = reinterpret_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| if (pooling_param == nullptr) { | |||
| MS_LOG(ERROR) << "new PoolingParameter failed."; | |||
| return nullptr; | |||
| @@ -118,7 +121,7 @@ OpParameter *PopulateActivationGradParameter(const mindspore::lite::PrimitiveC * | |||
| return nullptr; | |||
| } | |||
| ActivationParameter *act_param = new (std::nothrow) ActivationParameter(); | |||
| ActivationParameter *act_param = reinterpret_cast<ActivationParameter *>(malloc(sizeof(ActivationParameter))); | |||
| if (act_param == nullptr) { | |||
| MS_LOG(ERROR) << "new ActivationParameter failed."; | |||
| return nullptr; | |||
| @@ -137,7 +140,7 @@ OpParameter *PopulateConvolutionGradFilterParameter(const mindspore::lite::Primi | |||
| return nullptr; | |||
| } | |||
| ConvParameter *param = new (std::nothrow) ConvParameter(); | |||
| ConvParameter *param = reinterpret_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "new Param for conv grad filter failed."; | |||
| return nullptr; | |||
| @@ -178,7 +181,7 @@ OpParameter *PopulateConvolutionGradInputParameter(const mindspore::lite::Primit | |||
| return nullptr; | |||
| } | |||
| ConvParameter *param = new (std::nothrow) ConvParameter(); | |||
| ConvParameter *param = reinterpret_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "new Param for conv grad filter failed."; | |||
| return nullptr; | |||
| @@ -219,7 +222,7 @@ OpParameter *PopulatePowerGradParameter(const mindspore::lite::PrimitiveC *primi | |||
| return nullptr; | |||
| } | |||
| PowerParameter *power_param = new (std::nothrow) PowerParameter(); | |||
| PowerParameter *power_param = reinterpret_cast<PowerParameter *>(malloc(sizeof(PowerParameter))); | |||
| if (power_param == nullptr) { | |||
| MS_LOG(ERROR) << "new PowerParameter failed."; | |||
| return nullptr; | |||
| @@ -232,10 +235,25 @@ OpParameter *PopulatePowerGradParameter(const mindspore::lite::PrimitiveC *primi | |||
| return reinterpret_cast<OpParameter *>(power_param); | |||
| } | |||
| OpParameter *PopulateBiasGradParameter(const mindspore::lite::PrimitiveC *primitive) { | |||
| if (primitive == nullptr) { | |||
| MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; | |||
| return nullptr; | |||
| } | |||
| ArithmeticParameter *arithmetic_param = reinterpret_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter))); | |||
| if (arithmetic_param == nullptr) { | |||
| MS_LOG(ERROR) << "new ArithmeticParameter failed."; | |||
| return nullptr; | |||
| } | |||
| arithmetic_param->op_parameter_.type_ = primitive->Type(); | |||
| return reinterpret_cast<OpParameter *>(arithmetic_param); | |||
| } | |||
| void PopulateTrainParameters() { | |||
| auto ppr = PopulateParameterRegistry::GetInstance(); | |||
| ppr->AddPopulateParameterFunc(schema::PrimitiveType_ApplyMomentum, DefaultPopulateParameter); | |||
| ppr->AddPopulateParameterFunc(schema::PrimitiveType_BiasGrad, PopulateArithmetic); | |||
| ppr->AddPopulateParameterFunc(schema::PrimitiveType_BiasGrad, PopulateBiasGradParameter); | |||
| ppr->AddPopulateParameterFunc(schema::PrimitiveType_SoftmaxCrossEntropy, PopulateSoftmaxCrossEntropyParameter); | |||
| ppr->AddPopulateParameterFunc(schema::PrimitiveType_ActivationGrad, PopulateActivationGradParameter); | |||
| ppr->AddPopulateParameterFunc(schema::PrimitiveType_TupleGetItem, DefaultPopulateParameter); | |||
| @@ -35,6 +35,10 @@ void TrainSession::ReplaceOps() { | |||
| mindspore::lite::KernelRegistrar tmp(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, | |||
| mindspore::schema::PrimitiveType_Conv2D, | |||
| mindspore::kernel::CpuConvTrainFp32KernelCreator); | |||
| mindspore::lite::KernelRegistrar tmp0(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, | |||
| mindspore::schema::PrimitiveType_DepthwiseConv2D, | |||
| mindspore::kernel::CpuConvTrainFp32KernelCreator); | |||
| } | |||
| int TrainSession::CompileGraph(lite::Model *model) { | |||
| @@ -124,5 +128,4 @@ std::vector<tensor::MSTensor *> TrainSession::GetOutputsByName(const std::string | |||
| } | |||
| return ret->second; | |||
| } | |||
| } // namespace mindspore::session | |||
| @@ -0,0 +1,584 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "src/common/file_utils.h" | |||
| #include "src/common/file_utils_ext.h" | |||
| #include "nnacl/fp32/reduce.h" | |||
| #include "src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/ops/arithmetic_grad.h" | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| namespace mindspore { | |||
| ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveType type, | |||
| std::vector<lite::Tensor *> inputs, | |||
| std::vector<lite::Tensor *> outputs) { | |||
| ArithmeticParameter *arithmetic_param = static_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter))); | |||
| if (arithmetic_param == nullptr) { | |||
| MS_LOG(ERROR) << "new ArithmeticParameter failed."; | |||
| return nullptr; | |||
| } | |||
| arithmetic_param->op_parameter_.type_ = type; | |||
| schema::PrimitiveT *prim = new schema::PrimitiveT; | |||
| prim->value.type = type; | |||
| auto agrad = mindspore::lite::ArithmeticGrad(prim); | |||
| agrad.InferShape(inputs, outputs); | |||
| arithmetic_param->ndim_ = agrad.NDims(); | |||
| for (size_t i = 0; i < agrad.dyShape().size(); i++) arithmetic_param->out_shape_[i] = (agrad.dyShape())[i]; | |||
| for (size_t i = 0; i < agrad.x1Shape().size(); i++) arithmetic_param->in_shape0_[i] = (agrad.x1Shape())[i]; | |||
| for (size_t i = 0; i < agrad.x2Shape().size(); i++) arithmetic_param->in_shape1_[i] = (agrad.x2Shape())[i]; | |||
| return arithmetic_param; | |||
| } | |||
| class TestArithmeticGradFp32 : public mindspore::CommonTest { | |||
| public: | |||
| TestArithmeticGradFp32() {} | |||
| }; | |||
| std::vector<lite::Tensor *> GenerateTensorsForTest(const char *test, int test_id) { | |||
| size_t input_size; | |||
| std::vector<int> large_dim({4, 6}); | |||
| std::vector<int> small_dim({6}); | |||
| int large_size = (4 * 6); | |||
| int small_size = (1 * 6); | |||
| char *dx1_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_1_x1_4_6.bin"); | |||
| char *dx2_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_1_x2_1_6.bin"); | |||
| if (test_id == 7) { | |||
| large_dim = std::vector<int>({4, 5, 6}); | |||
| small_dim = std::vector<int>({6}); | |||
| large_size = (4 * 5 * 6); | |||
| small_size = (6); | |||
| dx1_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_7_x1_4_5_6.bin"); | |||
| dx2_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_7_x2_1_1_6.bin"); | |||
| } | |||
| if (test_id >= 8) { | |||
| large_dim = std::vector<int>({5, 4, 6}); | |||
| small_dim = std::vector<int>({5, 1, 6}); | |||
| large_size = (4 * 5 * 6); | |||
| small_size = (5 * 6); | |||
| dx1_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_8_x1_5_4_6.bin"); | |||
| dx2_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_8_x2_5_1_6.bin"); | |||
| } | |||
| auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(test, &input_size)); | |||
| lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| dy_tensor->SetData(dy_data); | |||
| auto x1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx1_file, &input_size)); | |||
| lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| x1_tensor->SetData(x1_data); | |||
| auto x2_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx2_file, &input_size)); | |||
| lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); | |||
| x2_tensor->SetData(x2_data); | |||
| auto dx1_data = new float[large_size]; | |||
| lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); | |||
| dx1_tensor->SetData(dx1_data); | |||
| auto dx2_data = new float[small_size]; | |||
| lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); | |||
| dx2_tensor->SetData(dx2_data); | |||
| std::vector<lite::Tensor *> ret_vector = {dy_tensor, x1_tensor, x2_tensor, dx1_tensor, dx2_tensor}; | |||
| return ret_vector; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_1_dx1_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestAddGradFp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_1_dx1_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; //TODO tensor data is unique pointer | |||
| // delete param; | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestAddGrad2Fp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin", 8); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_8_dx2_5_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_8_dx1_5_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; | |||
| // delete param; | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestAddGrad3Fp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_2_dy_4_6.bin", 2); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_2_dx1_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_2_dx2_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; | |||
| // delete param; | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestSubGradFp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_3_dy_4_6.bin", 3); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_3_dx1_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_3_dx2_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestSubGrad2Fp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| int loop_count = 1000; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| kernel_obj->Run(); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| printf("total cost (for %d loops): %lu us\n", loop_count, cost); | |||
| // auto time_avg = cost / loop_count; | |||
| // printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_4_dx1_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| delete kernel_obj; | |||
| // delete param; | |||
| MS_LOG(INFO) << "TestMulGradFp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_4_dx1_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; | |||
| // delete param; | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestMulGrad2Fp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; | |||
| // delete param; | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestMulGrad3Fp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; | |||
| // delete param; | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestMulGrad4Fp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_5_dy_4_6.bin", 5); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_5_dx1_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path)); | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_5_dx2_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; | |||
| delete kernel_obj; | |||
| // delete param; | |||
| MS_LOG(INFO) << "TestDivGradFp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_6_dy_4_6.bin", 6); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string dx2_path = "./test_data/operators/arithmetic_fp32_6_dx2_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), dx2_path)); | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_6_dx1_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; | |||
| // delete param; | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin", 10); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string dx1_path = "./test_data/operators/arithmetic_fp32_10_dx1_5_4_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), dx1_path)); | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_10_dx2_5_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| // for (int i = 0; i < 5; i++) delete all_tensors[i]; | |||
| // delete param; | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestDivGrad3Fp32 passed"; | |||
| } | |||
| TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { | |||
| std::vector<lite::Tensor *> all_tensors = | |||
| GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin", 7); | |||
| std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; | |||
| std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]}; | |||
| auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr); | |||
| kernel_obj->Run(); | |||
| float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 6; i++) { | |||
| std::cout << output_ptr[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string dx1_path = "./test_data/operators/arithmetic_fp32_7_dx1_4_5_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), dx1_path)); | |||
| std::string output_path = "./test_data/operators/arithmetic_fp32_7_dx2_1_1_6.bin"; | |||
| EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); | |||
| for (auto tensor : all_tensors) { | |||
| delete[] reinterpret_cast<float *>(tensor->MutableData()); | |||
| tensor->SetData(nullptr); | |||
| delete tensor; | |||
| } | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; | |||
| } | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -30,8 +30,7 @@ class TestBiasGradFp32 : public mindspore::CommonTest { | |||
| TEST_F(TestBiasGradFp32, BiasGradFp32) { | |||
| // prepare stage | |||
| auto bias_param = new ArithmeticParameter(); | |||
| ArithmeticParameter* bias_param = static_cast<ArithmeticParameter*>(malloc(sizeof(ArithmeticParameter))); | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| @@ -43,7 +43,7 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector< | |||
| TEST_F(TestBNGradFp32, BNGradFp32) { | |||
| // prepare stage | |||
| auto bn_param = new BNGradParameter(); | |||
| auto bn_param = static_cast<BNGradParameter*>(malloc(sizeof(BNGradParameter))); | |||
| bn_param->epsilon_ = 0.00001; | |||
| bn_param->momentum_ = 0.1; | |||
| const int batch = 2; | |||
| @@ -88,22 +88,24 @@ TEST_F(TestBNGradFp32, BNGradFp32) { | |||
| std::cout << "==========dx==========\n"; | |||
| auto dx = reinterpret_cast<float *>(outputs[0]->MutableData()); | |||
| for (int i = 0; i < 7; i++) std::cout << dx[i] << " "; | |||
| std::cout << "\n"; | |||
| auto res = mindspore::lite::CompareRelativeOutput(dx, "./test_data/bngrad/output_dx_2_4_5_3.bin"); | |||
| std::cout << "\n=======dscale=======\n"; | |||
| auto dscale = reinterpret_cast<float *>(outputs[1]->MutableData()); | |||
| for (int i = 0; i < channels; i++) std::cout << dscale[i] << " "; | |||
| std::cout << "\n"; | |||
| int res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin"); | |||
| res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin"); | |||
| EXPECT_EQ(res, 0); | |||
| std::cout << "==========dbias==========\n"; | |||
| auto dbias = reinterpret_cast<float *>(outputs[2]->MutableData()); | |||
| for (int i = 0; i < 3; i++) std::cout << dbias[i] << " "; | |||
| std::cout << "\n"; | |||
| res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin"); | |||
| res = mindspore::lite::CompareRelativeOutput(dbias, "./test_data/bngrad/output_dbias_3.bin"); | |||
| EXPECT_EQ(res, 0); | |||
| for (auto v : inputs) { | |||
| delete[] reinterpret_cast<float *>(v->MutableData()); | |||
| v->SetData(nullptr); | |||
| // delete v; | |||
| delete v; | |||
| } | |||
| delete kernel_obj; | |||
| MS_LOG(INFO) << "BNGradFp32 passed"; | |||
| @@ -77,7 +77,7 @@ void InitConvParamGroup3Dilation2FP32(ConvParameter *conv_param) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup1FP32(conv_param); | |||
| size_t dy_size; | |||
| @@ -144,7 +144,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup1FP32(conv_param); | |||
| size_t dy_size; | |||
| @@ -211,7 +211,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3FP32(conv_param); | |||
| size_t dy_size; | |||
| @@ -277,7 +277,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3FP32(conv_param); | |||
| size_t dy_size; | |||
| @@ -344,7 +344,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3Dilation2FP32(conv_param); | |||
| @@ -410,7 +410,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3Dilation2FP32(conv_param); | |||
| size_t dy_size; | |||
| @@ -476,7 +476,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { | |||
| TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||
| InitConvParamGroup3Dilation2FP32(conv_param); | |||
| size_t x_size; | |||
| @@ -73,7 +73,7 @@ class NetworkTest : public mindspore::CommonTest { | |||
| // +-------------+ | | |||
| // V dw(9) | | |||
| // +-----------Update-----+ | |||
| #if 0 | |||
| TEST_F(NetworkTest, tuning_layer) { | |||
| const int BATCH_SIZE = 32; | |||
| const int NUM_CLASSES = 10; | |||
| @@ -177,7 +177,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| node->name = "Momentum"; | |||
| meta_graph->nodes.emplace_back(std::move(node)); | |||
| } | |||
| meta_graph->inputIndex = {6, 0}; // XXX TODO why is it reverse? | |||
| meta_graph->inputIndex = {0, 6}; | |||
| meta_graph->outputIndex = {5, 14}; | |||
| auto input0 = std::make_unique<schema::TensorT>(); | |||
| @@ -209,6 +209,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| weight->data.resize(weight_size); | |||
| std::copy(buf, buf + weight_size, weight->data.data()); | |||
| meta_graph->allTensors.emplace_back(std::move(weight)); | |||
| delete [] buf; | |||
| // tensor 3 - matmul | |||
| auto input3 = std::make_unique<schema::TensorT>(); | |||
| input3->nodeType = schema::NodeType::NodeType_Parameter; | |||
| @@ -231,6 +232,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| bias->data.resize(bias_size); | |||
| std::copy(buf, buf + bias_size, bias->data.data()); | |||
| meta_graph->allTensors.emplace_back(std::move(bias)); | |||
| delete [] buf; | |||
| // tensor 5 - bias_add | |||
| auto input5 = std::make_unique<schema::TensorT>(); | |||
| @@ -366,13 +368,13 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| ASSERT_NE(nullptr, model); | |||
| meta_graph.reset(); | |||
| content = nullptr; | |||
| auto context = new lite::Context; | |||
| context->device_type_ = lite::DT_CPU; | |||
| context->cpu_bind_mode_ = lite::NO_BIND; | |||
| context->thread_num_ = 1; | |||
| lite::Context context; | |||
| context.device_type_ = lite::DT_CPU; | |||
| context.cpu_bind_mode_ = lite::NO_BIND; | |||
| context.thread_num_ = 1; | |||
| auto session = new session::TrainSession(); | |||
| ASSERT_NE(nullptr, session); | |||
| session->Init(context); | |||
| session->Init(&context); | |||
| auto ret = session->CompileGraph(model); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| session->train(); | |||
| @@ -392,7 +394,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| //=================================================== | |||
| ASSERT_EQ(input_size, inTensor->Size()); | |||
| memcpy(data, input_data, input_size); | |||
| delete [] buf; | |||
| auto labelTensor = inputs.at(1); | |||
| ASSERT_NE(nullptr, labelTensor); | |||
| ASSERT_EQ(BATCH_SIZE, labelTensor->ElementsNum()); | |||
| @@ -408,7 +410,7 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); | |||
| auto *outData = reinterpret_cast<float *>(outTensor->MutableData()); | |||
| ASSERT_NE(nullptr, outData); | |||
| std::cout << "========================dW=====================" << std::endl; | |||
| std::cout << "==============Initial=Scores===================" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << outData[i] << ", "; | |||
| } | |||
| @@ -422,27 +424,19 @@ TEST_F(NetworkTest, tuning_layer) { | |||
| ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); | |||
| outData = reinterpret_cast<float *>(outTensor->MutableData()); | |||
| ASSERT_NE(nullptr, outData); | |||
| std::cout << "========================dW=====================" << std::endl; | |||
| std::cout << "==============Scores=after-single=train========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << outData[i] << ", "; | |||
| } | |||
| //=================================================== | |||
| #if 0 | |||
| size_t output_size; | |||
| std::string output_path = "./convfp32_out_1_28_28_32.bin"; | |||
| buf = mindspore::lite::ReadFile(output_path.c_str(), &output_size); | |||
| ASSERT_NE(nullptr, buf); | |||
| auto output_data = reinterpret_cast<float *>(buf); | |||
| ASSERT_NE(nullptr, output_data); | |||
| //=================================================== | |||
| ASSERT_EQ(output_size, runOutput->Size()); | |||
| for (size_t i = 0; i < runOutput->ElementsNum(); i++) { | |||
| ASSERT_EQ(output_data[i], outData[i]); | |||
| } | |||
| #endif | |||
| MS_LOG(INFO) << "Passed"; | |||
| } | |||
| std::string output_path = "./test_data/train/train_output_32_10.bin"; | |||
| auto error = lite::RelativeOutputError(outData, output_path); | |||
| EXPECT_LT(error, 2e-3); | |||
| MS_LOG(INFO) << "TuningLayer passed"; | |||
| delete model; | |||
| delete session; | |||
| } | |||
| #endif | |||
| int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path, | |||
| std::function<int32_t(mindspore::session::TrainSession *session, const std::string &)> cb) { | |||
| int32_t res = 0; | |||
| @@ -459,7 +453,7 @@ int32_t fileIterator(mindspore::session::TrainSession *session, const std::strin | |||
| } | |||
| void replaceExt(const std::string &src, std::string *dst) { *dst = src.substr(0, src.find_last_of('.')) + ".emb"; } | |||
| int32_t runEffNet(mindspore::session::TrainSession *session, const std::string &in, const std::string &out) { | |||
| int32_t runEffNet(mindspore::lite::LiteSession *session, const std::string &in, const std::string &out) { | |||
| // setup input | |||
| auto inputs = session->GetInputs(); | |||
| // ASSERT_EQ(inputs.size(), 1); | |||
| @@ -473,14 +467,15 @@ int32_t runEffNet(mindspore::session::TrainSession *session, const std::string & | |||
| auto input_data = reinterpret_cast<float *>(in_buf); | |||
| // ASSERT_EQ(input_size, inTensor->Size()); | |||
| std::copy(input_data, input_data + inTensor->ElementsNum(), data); | |||
| delete [] in_buf; | |||
| // execute network | |||
| session->RunGraph(); | |||
| // compare outputs | |||
| auto outputs = session->GetOutputMap(); | |||
| auto outputs = session->GetOutputs(); | |||
| auto output = ((outputs.begin())->second); | |||
| float *output_data = reinterpret_cast<float *>(output.at(0)->MutableData()); | |||
| float *output_data = reinterpret_cast<float *>(output->MutableData()); | |||
| return mindspore::lite::CompareRelativeOutput(output_data, out.c_str()); | |||
| } | |||
| @@ -488,15 +483,19 @@ int32_t runEffNet(mindspore::session::TrainSession *session, const std::string & | |||
| TEST_F(NetworkTest, efficient_net) { | |||
| char *buf = nullptr; | |||
| size_t net_size = 0; | |||
| std::string net = "./test_data/nets/efficientnet_b0_f.ms"; | |||
| // std::string net = "./test_data/nets/efficientnet_b0_f.ms"; | |||
| std::string net = "./test_data/nets/effnetb0_fwd_nofuse.ms"; | |||
| ReadFile(net.c_str(), &net_size, &buf); | |||
| auto model = lite::Model::Import(buf, net_size); | |||
| delete [] buf; | |||
| auto context = new lite::Context; | |||
| context->device_type_ = lite::DT_CPU; | |||
| context->cpu_bind_mode_ = lite::NO_BIND; | |||
| context->thread_num_ = 1; | |||
| auto session = new mindspore::session::TrainSession(); | |||
| // auto session = new mindspore::lite::LiteSession(); | |||
| ASSERT_NE(session, nullptr); | |||
| auto ret = session->Init(context); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| @@ -506,7 +505,7 @@ TEST_F(NetworkTest, efficient_net) { | |||
| #if 0 | |||
| std::string path = "/opt/share/MiniBinEmbDataset/"; | |||
| auto res = fileIterator(session, path, [](mindspore::session::TrainSession *session, const std::string &in) { | |||
| auto res = fileIterator(session, path, [](mindspore::lite::LiteSession *session, const std::string &in) { | |||
| int32_t res = 0; | |||
| if (in.find(".bin") != std::string::npos) { | |||
| std::string out; | |||
| @@ -549,6 +548,9 @@ TEST_F(NetworkTest, efficient_net) { | |||
| // float* output_data = reinterpret_cast<float *>(output.at(0)->MutableData()); | |||
| // int res = lite::CompareRelativeOutput(output_data, output_path); | |||
| ASSERT_EQ(res, 0); | |||
| delete model; | |||
| delete session; | |||
| delete context; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -54,11 +54,12 @@ void InitPoolingParamFP32(PoolingParameter *pooling_param) { | |||
| pooling_param->pad_l_ = 1; | |||
| pooling_param->pad_r_ = 1; | |||
| pooling_param->thread_num_ = 1; | |||
| pooling_param->global_ = false; | |||
| } | |||
| TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { | |||
| // prepare stage | |||
| auto pooling_param = new PoolingParameter(); | |||
| auto pooling_param = static_cast<PoolingParameter*>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(pooling_param); | |||
| pooling_param->output_channel_ = 3; | |||
| pooling_param->pool_mode_ = PoolMode_AvgPool; | |||
| @@ -95,20 +96,21 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin"; | |||
| lite::CompareOutput(output_data, output_path); | |||
| auto res = lite::CompareOutput(output_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] output_data; | |||
| delete pooling_param; | |||
| free(pooling_param); | |||
| MS_LOG(INFO) << "TestAvgPoolingGradFp32 passed"; | |||
| } | |||
| TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { | |||
| // prepare stage | |||
| auto pooling_param = new PoolingParameter(); | |||
| auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(pooling_param); | |||
| pooling_param->output_channel_ = 3; | |||
| pooling_param->pool_mode_ = PoolMode_AvgPool; | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| @@ -150,7 +152,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin"; | |||
| lite::CompareOutput(output_data, output_path); | |||
| auto res = lite::CompareOutput(output_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] input1_data; | |||
| @@ -165,38 +168,36 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { | |||
| TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { | |||
| // prepare stage | |||
| auto pooling_param = new PoolingParameter(); | |||
| auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(pooling_param); | |||
| pooling_param->output_channel_ = 3; | |||
| pooling_param->input_batch_ = 3; | |||
| pooling_param->output_batch_ = 3; | |||
| pooling_param->pool_mode_ = PoolMode_AvgPool; | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| // uint64_t time_avg = 0; | |||
| size_t output_data_size = | |||
| pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->input_h_ * pooling_param->input_w_; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_3_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::vector<int> dim_dy({1, 28, 28, 3}); | |||
| std::vector<int> dim_dy({3, 28, 28, 3}); | |||
| lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); | |||
| dy_tensor.SetData(input_data); | |||
| std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_3_28_28_3.bin"; | |||
| auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); | |||
| std::vector<int> dim_x({1, 28, 28, 3}); | |||
| std::vector<int> dim_x({3, 28, 28, 3}); | |||
| lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| x_tensor.SetData(input1_data); | |||
| std::vector<lite::Tensor *> inputs = {&dy_tensor, &x_tensor}; | |||
| auto output_data = new float[output_data_size]; | |||
| std::vector<int> dim_dx({1, 28, 28, 3}); | |||
| std::vector<int> dim_dx({3, 28, 28, 3}); | |||
| lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); | |||
| dx_tensor.SetData(output_data); | |||
| dx_tensor.MallocData(); | |||
| auto output_data = reinterpret_cast<float *>(dx_tensor.MutableData()); | |||
| std::vector<lite::Tensor *> outputs = {&dx_tensor}; | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; | |||
| @@ -212,12 +213,11 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_3_28_28_3.bin"; | |||
| lite::CompareOutput(output_data, output_path); | |||
| auto res = lite::CompareOutput(output_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] input1_data; | |||
| delete[] output_data; | |||
| dx_tensor.SetData(nullptr); | |||
| x_tensor.SetData(nullptr); | |||
| dy_tensor.SetData(nullptr); | |||
| // delete pooling_param; | |||
| @@ -228,7 +228,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { | |||
| TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto pool = new PoolingParameter(); | |||
| auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(pool); | |||
| pool->output_channel_ = 3; | |||
| pool->pool_mode_ = PoolMode_AvgPool; | |||
| @@ -240,7 +240,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| pool->stride_w_ = 2; | |||
| size_t input_size; | |||
| size_t y_data_size = pool->output_batch_ * pool->output_channel_ * pool->input_h_ * pool->input_w_; | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); | |||
| @@ -253,11 +252,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_}); | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.SetData(yt_data); | |||
| auto out_data = new float[y_data_size]; | |||
| lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| out_tensor.SetData(out_data); | |||
| out_tensor.MallocData(); | |||
| float *out_data = static_cast<float *>(out_tensor.MutableData()); | |||
| std::vector<lite::Tensor *> inputs = {&yt_tensor, &x_tensor}; | |||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||
| // ---------------------------------------- | |||
| @@ -274,7 +271,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| std::string output_path = "./test_data/pooling/avgpoolgradfp32_s2_dx_3_28_28_3.bin"; | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] x_data; | |||
| @@ -283,7 +279,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| // delete conv_param; | |||
| x_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| MS_LOG(INFO) << "AvgPoolGradStride2Fp32 Filter Grad passed"; | |||
| } | |||
| @@ -291,7 +286,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto pool = new PoolingParameter(); | |||
| auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(pool); | |||
| pool->output_channel_ = 3; | |||
| pool->pool_mode_ = PoolMode_AvgPool; | |||
| @@ -303,7 +298,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| pool->stride_w_ = 3; | |||
| size_t input_size; | |||
| size_t y_data_size = pool->output_batch_ * pool->output_channel_ * pool->input_h_ * pool->input_w_; | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); | |||
| @@ -317,9 +311,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.SetData(yt_data); | |||
| auto out_data = new float[y_data_size]; | |||
| lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| out_tensor.SetData(out_data); | |||
| out_tensor.MallocData(); | |||
| auto out_data = static_cast<float *>(out_tensor.MutableData()); | |||
| std::vector<lite::Tensor *> inputs = {&yt_tensor, &x_tensor}; | |||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||
| @@ -346,14 +340,13 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| // delete conv_param; | |||
| x_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| MS_LOG(INFO) << "AvgPoolGradStride3Fp32 Filter Grad passed"; | |||
| } | |||
| TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { | |||
| // prepare stage | |||
| auto pooling_param = new PoolingParameter(); | |||
| auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(pooling_param); | |||
| pooling_param->output_channel_ = 3; | |||
| pooling_param->pool_mode_ = PoolMode_MaxPool; | |||
| @@ -395,10 +388,11 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_xgrad_1_28_28_3.bin"; | |||
| lite::CompareOutput(output_data, output_path); | |||
| auto res = lite::CompareOutput(output_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| free(pooling_param); | |||
| delete[] in_data; | |||
| delete pooling_param; | |||
| delete[] dy_data; | |||
| delete[] dx_data; | |||
| delete[] output_data; | |||
| @@ -526,7 +520,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolingKernelGradFp32) { | |||
| TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto maxpool = new PoolingParameter(); | |||
| auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(maxpool); | |||
| maxpool->output_channel_ = 3; | |||
| maxpool->pool_mode_ = PoolMode_MaxPool; | |||
| @@ -534,7 +528,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| maxpool->output_batch_ = 3; | |||
| size_t input_size; | |||
| size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_; | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_x_3_28_28_3.bin", &input_size)); | |||
| @@ -553,10 +546,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.SetData(yt_data); | |||
| auto out_data = new float[y_data_size]; | |||
| lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| out_tensor.SetData(out_data); | |||
| out_tensor.MallocData(); | |||
| auto out_data = static_cast<float *>(out_tensor.MutableData()); | |||
| std::vector<lite::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; | |||
| std::vector<lite::Tensor *> maxpool_outputs = {&out_tensor}; | |||
| // ---------------------------------------- | |||
| @@ -585,7 +577,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| x_tensor.SetData(nullptr); | |||
| y_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| MS_LOG(INFO) << "MaxPoolGradBatchFp32 Filter Grad passed"; | |||
| } | |||
| @@ -593,7 +584,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto maxpool = new PoolingParameter(); | |||
| auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(maxpool); | |||
| maxpool->output_channel_ = 3; | |||
| maxpool->input_channel_ = 3; | |||
| @@ -606,7 +597,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| maxpool->stride_w_ = 2; | |||
| size_t input_size; | |||
| size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_; | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); | |||
| @@ -625,9 +615,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.SetData(yt_data); | |||
| auto out_data = new float[y_data_size]; | |||
| lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| out_tensor.SetData(out_data); | |||
| out_tensor.MallocData(); | |||
| auto out_data = static_cast<float *>(out_tensor.MutableData()); | |||
| std::vector<lite::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; | |||
| std::vector<lite::Tensor *> maxpool_outputs = {&out_tensor}; | |||
| @@ -657,7 +647,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| x_tensor.SetData(nullptr); | |||
| y_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| MS_LOG(INFO) << "MaxPoolGradStride2Fp32 Filter Grad passed"; | |||
| } | |||
| @@ -665,7 +654,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { | |||
| // prepare stage | |||
| // input size will be equal to the original size of x, output size will be the output size as in forward | |||
| auto maxpool = new PoolingParameter(); | |||
| auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter))); | |||
| InitPoolingParamFP32(maxpool); | |||
| maxpool->output_channel_ = 3; | |||
| maxpool->input_channel_ = 3; | |||
| @@ -678,7 +667,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { | |||
| maxpool->stride_w_ = 3; | |||
| size_t input_size; | |||
| size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_; | |||
| auto x_data = reinterpret_cast<float *>( | |||
| mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); | |||
| @@ -697,9 +685,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { | |||
| lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); | |||
| yt_tensor.SetData(yt_data); | |||
| auto out_data = new float[y_data_size]; | |||
| lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); | |||
| out_tensor.SetData(out_data); | |||
| out_tensor.MallocData(); | |||
| auto out_data = static_cast<float *>(out_tensor.MutableData()); | |||
| std::vector<lite::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; | |||
| std::vector<lite::Tensor *> maxpool_outputs = {&out_tensor}; | |||
| @@ -729,7 +717,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { | |||
| x_tensor.SetData(nullptr); | |||
| y_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| MS_LOG(INFO) << "MaxPoolGradStride3Fp32 Filter Grad passed"; | |||
| } | |||
| @@ -0,0 +1,696 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <vector> | |||
| // #include "mindspore/lite/src/ir/tensor.h" | |||
| // #include "mindspore/lite/src/lite_kernel.h" | |||
| #include "mindspore/lite/include/context.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/kernel_registry.h" | |||
| #include "src/common/utils.h" | |||
| #include "src/common/file_utils.h" | |||
| #include "src/common/file_utils_ext.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h" | |||
| #include "mindspore/lite/nnacl/fp32_grad/softmax_grad.h" | |||
| namespace mindspore { | |||
| class TestSoftmaxGradFp32 : public mindspore::CommonTest { | |||
| public: | |||
| TestSoftmaxGradFp32() {} | |||
| }; | |||
| void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis) { | |||
| softmax_param->axis_ = axis; | |||
| softmax_param->element_size_ = 1188; | |||
| softmax_param->n_dim_ = 4; | |||
| softmax_param->input_shape_[0] = 1; | |||
| softmax_param->input_shape_[1] = 9; | |||
| softmax_param->input_shape_[2] = 11; | |||
| softmax_param->input_shape_[3] = 12; | |||
| } | |||
| void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis, int n, int c, int h, int w) { | |||
| softmax_param->axis_ = axis; | |||
| softmax_param->element_size_ = n * c * h * w; | |||
| softmax_param->n_dim_ = 4; | |||
| softmax_param->input_shape_[0] = n; | |||
| softmax_param->input_shape_[1] = c; | |||
| softmax_param->input_shape_[2] = h; | |||
| softmax_param->input_shape_[3] = w; | |||
| } | |||
| #if 0 // kernel testing | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis0) { | |||
| auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter))); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 0); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| input_tensor.SetData(input_data); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| yt_tensor.SetData(yt_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| out_tensor.SetData(out_data); | |||
| std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor}; | |||
| std::vector<lite::tensor::Tensor *> outputs = {&out_tensor}; | |||
| // float sum_data[6]; | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr); | |||
| kernel->Init(); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| kernel->Run(); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| kernel->Run(); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| input_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| // delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradKernelAxis0 passed"; | |||
| } | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis1) { | |||
| auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter))); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 1); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| input_tensor.SetData(input_data); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| yt_tensor.SetData(yt_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| out_tensor.SetData(out_data); | |||
| std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor}; | |||
| std::vector<lite::tensor::Tensor *> outputs = {&out_tensor}; | |||
| // float sum_data[6]; | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr); | |||
| kernel->Init(); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| kernel->Run(); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| kernel->Run(); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_1_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| input_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| // delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradKernelAxis1 passed"; | |||
| } | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis2) { | |||
| auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter))); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 2); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| input_tensor.SetData(input_data); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| yt_tensor.SetData(yt_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| out_tensor.SetData(out_data); | |||
| std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor}; | |||
| std::vector<lite::tensor::Tensor *> outputs = {&out_tensor}; | |||
| // float sum_data[6]; | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr); | |||
| kernel->Init(); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| kernel->Run(); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| kernel->Run(); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_2_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| input_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| // delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradKernelAxis2 passed"; | |||
| } | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis3) { | |||
| auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter))); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 3); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| input_tensor.SetData(input_data); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| yt_tensor.SetData(yt_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| out_tensor.SetData(out_data); | |||
| std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor}; | |||
| std::vector<lite::tensor::Tensor *> outputs = {&out_tensor}; | |||
| // float sum_data[6]; | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr); | |||
| kernel->Init(); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| kernel->Run(); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| kernel->Run(); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_3_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| input_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| // delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradKernelAxis3 passed"; | |||
| } | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxisMinus1) { | |||
| auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter))); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, -1); | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| input_tensor.SetData(input_data); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| yt_tensor.SetData(yt_data); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); | |||
| out_tensor.SetData(out_data); | |||
| std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor}; | |||
| std::vector<lite::tensor::Tensor *> outputs = {&out_tensor}; | |||
| // float sum_data[6]; | |||
| kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr); | |||
| kernel->Init(); | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| kernel->Run(); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| kernel->Run(); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_-1_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| input_tensor.SetData(nullptr); | |||
| yt_tensor.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| delete kernel; | |||
| // delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradKernelAxisMinus1 passed"; | |||
| } | |||
| #endif // kernel testing | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 0); | |||
| int inner_size = 1; | |||
| if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; | |||
| for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_out.bin"; | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| delete[] sum_data; | |||
| delete[] sum_mul; | |||
| delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradAxis0 passed"; | |||
| } | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 1); | |||
| int inner_size = 1; | |||
| if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; | |||
| for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_1_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| delete[] sum_data; | |||
| delete[] sum_mul; | |||
| delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradAxis1 passed"; | |||
| } | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 2); | |||
| int inner_size = 1; | |||
| if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; | |||
| for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_2_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| delete[] sum_data; | |||
| delete[] sum_mul; | |||
| delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradAxis2 passed"; | |||
| } | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, 3); | |||
| int inner_size = 1; | |||
| if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; | |||
| for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_3_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| delete[] sum_data; | |||
| delete[] sum_mul; | |||
| delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradAxis3 passed"; | |||
| } | |||
| TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) { | |||
| auto softmax_param = new SoftmaxParameter(); | |||
| // set parameters | |||
| InitSoftMaxParam(softmax_param, -1); | |||
| int inner_size = 1; | |||
| if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; | |||
| for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { | |||
| inner_size *= softmax_param->input_shape_[i]; | |||
| } | |||
| float *sum_data = new (std::nothrow) float[inner_size]; | |||
| float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; | |||
| std::vector<int> shape = {1, 9, 11, 12}; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin"; | |||
| auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| auto out_data = new float[softmax_param->element_size_]; | |||
| // warm up loop | |||
| for (int i = 0; i < 3; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| int loop_count = 3; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::string output_path = "./test_data/softmax/softmaxgrad_-1_out.bin"; | |||
| // auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| EXPECT_EQ(res, 0); | |||
| delete[] input_data; | |||
| delete[] yt_data; | |||
| delete[] out_data; | |||
| delete[] sum_data; | |||
| delete[] sum_mul; | |||
| delete softmax_param; | |||
| MS_LOG(INFO) << "SoftmaxGradAxisMinus1 passed"; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,3 @@ | |||
| nм*7L╨┴╥И{+╦x√╥.V7{н╣┴7ЖС╓╥сЗь8?xс6р)╟7⌠{╧7с≈╤Л'╗╣Ё:^7Ra%╤М7SШц╥≤О27ЖС╓╢щ:в╥≤О╡╥Dщ╤SШц╤[Дь╤#┼√6╬|╦╢[╦╣ гA╤TЭ≥7гD┼╥hкя6щ:W7Ra%7ЩlС╤h{ю╥┘А58"░╥RЗm╥й■n5╧+╦╥Ё;╢71бP╥мц╠╤╫zэ5/ ╤╬8TЭ╦?xS╥ЖС╓╥>{G╥й■Н7х]3╥{N7с6Н:x7╪"╣б╩53╪7С╨с6Яf▓75оa7{N╥[Дь╤╞_╩6сШ.╥б╩7ЖС╓7:З7эU7^╦д°┤╥гD┼╥Н:x7i|√╤пг=╥(П╥:З╥╕╔57ИтЫ4нш/7⌠╪c╥╬▒68И{+6эU╥ ÷Н╥2:С7с7 | |||
| ▄·7SШц╥Й7∙aC╣ХyЪ╥╢[87|cк╥╢< | |||
| 8╢[86Р╤TЭ≥╥`╗╢С═Ж7Ё:ч6#N╤!7h7╩л╥D╚╒╥цВ4╥yэ╥!Д⌡╥сЯ6┬;╩7Й▒78"╦(ШJ╥═х6░Шд73;и7?xс4m1P╦n°8 | |||
| @@ -56,6 +56,36 @@ void AnfExporter::RemoveIfMakeTuple(const CNodePtr &cnode) { | |||
| } | |||
| } | |||
| void AnfExporter::RemoveIfDepend(const CNodePtr &cnode) { | |||
| bool hasDepend = false; | |||
| std::vector<AnfNodePtr> inputs; | |||
| inputs.clear(); | |||
| inputs.emplace_back(cnode->input(0)); | |||
| for (size_t i = 1; i < cnode->inputs().size(); ++i) { | |||
| AnfNodePtr inputNode = cnode->input(i); | |||
| if (!inputNode->isa<CNode>()) { | |||
| inputs.emplace_back(cnode->input(i)); | |||
| continue; | |||
| } | |||
| auto dependNode = utils::cast<CNodePtr>(inputNode); | |||
| if (IsPrimitiveCNode(dependNode, schema::PrimitiveType_Depend)) { | |||
| hasDepend = true; | |||
| for (size_t j = 1; j < dependNode->inputs().size(); ++j) { | |||
| AnfNodePtr dependInputNode = dependNode->input(j); | |||
| if (dependInputNode->isa<CNode>()) { | |||
| inputs.emplace_back(dependInputNode); | |||
| } | |||
| } | |||
| } else { | |||
| inputs.emplace_back(cnode->input(i)); | |||
| } | |||
| } | |||
| if (hasDepend) { | |||
| cnode->set_inputs(inputs); | |||
| } | |||
| } | |||
| int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &meta_graph, | |||
| const std::shared_ptr<PrimitiveC> primitive, | |||
| const std::unique_ptr<schema::CNodeT> &dst_node) { | |||
| @@ -175,10 +205,12 @@ schema::MetaGraphT *AnfExporter::Export(const FuncGraphPtr &func_graph, bool kee | |||
| return nullptr; | |||
| } | |||
| if (primitive_c->Type() == schema::PrimitiveType_TupleGetItem || | |||
| primitive_c->Type() == schema::PrimitiveType_MakeTuple) { | |||
| primitive_c->Type() == schema::PrimitiveType_MakeTuple || | |||
| primitive_c->Type() == schema::PrimitiveType_Depend) { | |||
| continue; | |||
| } | |||
| RemoveIfMakeTuple(cnode); | |||
| RemoveIfDepend(cnode); | |||
| auto primT = primitive_c->GetPrimitiveT(); | |||
| auto node = std::make_unique<schema::CNodeT>(); | |||
| @@ -336,9 +368,49 @@ int AnfExporter::ConvertInputValueNode(std::shared_ptr<AnfNode> input_anode, | |||
| output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); | |||
| meta_graphT->allTensors.emplace_back(std::move(paramTensor)); | |||
| } else if (value->isa<mindspore::ValueSequeue>()) { | |||
| MS_LOG(DEBUG) << "Value type is ValueSequence."; | |||
| return RET_OK; | |||
| } else { | |||
| auto valueAbstract = valueNode->abstract(); | |||
| auto abstractSequnce = utils::cast<abstract::AbstractSequeuePtr>(valueAbstract); | |||
| if (abstractSequnce->isa<abstract::AbstractTuple>()) { | |||
| auto abstractTuple = utils::cast<abstract::AbstractTuplePtr>(valueAbstract); | |||
| auto x_shape_data = abstractTuple->elements(); | |||
| std::vector<int32_t> shape; | |||
| for (std::size_t i = 0; i < abstractTuple->size(); ++i) { | |||
| auto value_track = x_shape_data[i]->GetValueTrack(); | |||
| MS_EXCEPTION_IF_NULL(value_track); | |||
| if (value_track->isa<Int32Imm>()) { | |||
| shape.push_back((GetValue<int>(value_track))); | |||
| } else { | |||
| MS_LOG(ERROR) << "Value type is ValueSequence is not integer, it is " | |||
| << value_track->ToString() << "."; | |||
| } | |||
| } | |||
| if (shape.size()) { | |||
| auto typePtr = abstractTuple->elements()[0]->GetTypeTrack(); // abstractTuple->GetTypeTrack(); | |||
| paramTensor->dataType = typePtr->type_id(); | |||
| paramTensor->dims = {static_cast<int32_t>(shape.size())}; | |||
| paramTensor->nodeType = schema::NodeType_ValueNode; | |||
| paramTensor->data.resize(shape.size() * sizeof(int)); | |||
| memcpy(paramTensor->data.data(), shape.data(), shape.size() * sizeof(int)); | |||
| node_id_map_[valueNode->fullname_with_scope()] = meta_graphT->allTensors.size(); | |||
| output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); | |||
| meta_graphT->allTensors.emplace_back(std::move(paramTensor)); | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Value type is ValueSequence not supported - " << valueAbstract->type_name() << "."; | |||
| } | |||
| } else if (value->isa<mindspore::BoolImm>()) { | |||
| auto valueAbstract = valueNode->abstract(); | |||
| auto abstractScalar = utils::cast<abstract::AbstractScalarPtr>(valueAbstract); | |||
| auto typePtr = abstractScalar->GetTypeTrack(); | |||
| paramTensor->dataType = typePtr->type_id(); | |||
| paramTensor->dims = {1}; | |||
| paramTensor->nodeType = schema::NodeType_ValueNode; | |||
| auto data = value->cast<mindspore::BoolImmPtr>(); | |||
| paramTensor->data.emplace_back(data->value()); | |||
| node_id_map_[valueNode->fullname_with_scope()] = meta_graphT->allTensors.size(); | |||
| output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); | |||
| meta_graphT->allTensors.emplace_back(std::move(paramTensor)); | |||
| } else { | |||
| MS_LOG(ERROR) << "Not support value type , need add support."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -36,6 +36,7 @@ class AnfExporter { | |||
| int SetOpInputNode(const CNodePtr &cnode, const std::unique_ptr<schema::MetaGraphT> &meta_graphT, | |||
| schema::CNodeT *fb_node); | |||
| void RemoveIfMakeTuple(const CNodePtr &cnode); | |||
| void RemoveIfDepend(const CNodePtr &cnode); | |||
| protected: | |||
| int ConvertInputCNode(const std::shared_ptr<AnfNode> input_anode, schema::CNodeT *output_cnode); | |||
| @@ -30,6 +30,7 @@ static const std::vector<schema::PrimitiveType> nhwcOpList = { | |||
| schema::PrimitiveType_Conv2DGradInput, | |||
| schema::PrimitiveType_PoolingGrad, | |||
| schema::PrimitiveType_BiasGrad, | |||
| schema::PrimitiveType_BNGrad, | |||
| #endif | |||
| schema::PrimitiveType_Conv2D, | |||
| schema::PrimitiveType_DeConv2D, | |||
| @@ -39,7 +40,20 @@ static const std::vector<schema::PrimitiveType> nhwcOpList = { | |||
| schema::PrimitiveType_Resize, | |||
| schema::PrimitiveType_BatchNorm, | |||
| schema::PrimitiveType_FusedBatchNorm, | |||
| schema::PrimitiveType_PReLU}; | |||
| schema::PrimitiveType_PReLU, | |||
| schema::PrimitiveType_BiasAdd}; | |||
| static const std::vector<schema::PrimitiveType> nhwcOpDualInputList = { | |||
| #ifdef SUPPORT_TRAIN | |||
| schema::PrimitiveType_Conv2DGradFilter | |||
| #endif | |||
| }; | |||
| static const std::vector<schema::PrimitiveType> nhwcOpAllInputList = { | |||
| #ifdef SUPPORT_TRAIN | |||
| schema::PrimitiveType_PoolingGrad | |||
| #endif | |||
| }; | |||
| static const std::vector<schema::PrimitiveType> fp32FullOpList = { | |||
| schema::PrimitiveType_Concat, schema::PrimitiveType_Add, | |||
| @@ -73,6 +87,10 @@ std::vector<schema::PrimitiveType> Getfp32FullOpList() { return fp32FullOpList; | |||
| std::vector<schema::PrimitiveType> GetNhwcOpList() { return nhwcOpList; } | |||
| std::vector<schema::PrimitiveType> GetNhwcDualInputOpList() { return nhwcOpDualInputList; } | |||
| std::vector<schema::PrimitiveType> GetNhwcAllInputOpList() { return nhwcOpAllInputList; } | |||
| std::vector<schema::PrimitiveType> GetUint8NhwcOpList() { return int8NeedNhwcOpList; } | |||
| std::vector<schema::PrimitiveType> GetUint8OpList() { return int8OpList; } | |||
| @@ -36,6 +36,8 @@ std::vector<schema::PrimitiveType> GetNhwcOpList(); | |||
| std::vector<schema::PrimitiveType> GetNhwcDualInputOpList(); | |||
| std::vector<schema::PrimitiveType> GetNhwcAllInputOpList(); | |||
| std::vector<schema::PrimitiveType> Getfp32FullOpList(); | |||
| std::vector<schema::PrimitiveType> GetUint8NhwcOpList(); | |||
| @@ -40,17 +40,24 @@ FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &old_graph, const conver | |||
| // fusion const_fold | |||
| auto optimizer = std::make_shared<opt::GraphOptimizer>(); | |||
| auto pm = std::make_shared<opt::PassManager>("anf fusion pass manager", false); | |||
| pm->AddPass(std::make_shared<opt::ConvBiasaddFusion>()); | |||
| pm->AddPass(std::make_shared<opt::ConvBatchNormFusion>()); | |||
| pm->AddPass(std::make_shared<opt::ConvScaleFusion>()); | |||
| pm->AddPass(std::make_shared<opt::ConvActivationFusion>(true, "conv_relu", schema::PrimitiveType_Activation, | |||
| schema::ActivationType_RELU)); | |||
| pm->AddPass(std::make_shared<opt::ConvActivationFusion>(true, "conv_relu6", schema::PrimitiveType_Activation, | |||
| schema::ActivationType_RELU6)); | |||
| pm->AddPass(std::make_shared<opt::ConvTupleActivationFusion>( | |||
| true, "conv_tuple_relu", schema::PrimitiveType_Activation, schema::ActivationType_RELU)); | |||
| pm->AddPass(std::make_shared<opt::ConvTupleActivationFusion>( | |||
| true, "conv_tuple_relu6", schema::PrimitiveType_Activation, schema::ActivationType_RELU6)); | |||
| // for now - trainning is not supporting fuse operations | |||
| if (config != nullptr && config->trainModel == false) { | |||
| pm->AddPass(std::make_shared<opt::ConvBiasaddFusion>()); | |||
| pm->AddPass(std::make_shared<opt::ConvBatchNormFusion>()); | |||
| pm->AddPass(std::make_shared<opt::ConvScaleFusion>()); | |||
| pm->AddPass(std::make_shared<opt::ConvActivationFusion>(true, "conv_relu", schema::PrimitiveType_Activation, | |||
| schema::ActivationType_RELU)); | |||
| pm->AddPass(std::make_shared<opt::ConvActivationFusion>(true, "conv_relu6", schema::PrimitiveType_Activation, | |||
| schema::ActivationType_RELU6)); | |||
| pm->AddPass(std::make_shared<opt::ConvTupleActivationFusion>(true, "conv_tuple_relu", | |||
| schema::PrimitiveType_Activation, | |||
| schema::ActivationType_RELU)); | |||
| pm->AddPass(std::make_shared<opt::ConvTupleActivationFusion>(true, "conv_tuple_relu6", | |||
| schema::PrimitiveType_Activation, | |||
| schema::ActivationType_RELU6)); | |||
| } | |||
| pm->AddPass(std::make_shared<opt::ConstFoldPass>()); | |||
| optimizer->AddPassManager(pm); | |||
| FuncGraphPtr new_graph = optimizer->Optimize(old_graph); | |||
| @@ -41,6 +41,8 @@ Flags::Flags() { | |||
| "16"); | |||
| AddFlag(&Flags::configFile, "config_file", "Configuration for post-training.", ""); | |||
| AddFlag(&Flags::formatTrans, "formatTrans", "whether transform format. true | false", "true"); | |||
| AddFlag(&Flags::trainModelIn, "trainModel", "whether the model is going to be trained on device." | |||
| " true | false", "false"); | |||
| } | |||
| int Flags::Init(int argc, const char **argv) { | |||
| @@ -128,6 +130,15 @@ int Flags::Init(int argc, const char **argv) { | |||
| return 1; | |||
| } | |||
| if (this->trainModelIn == "true") { | |||
| this->trainModel = true; | |||
| } else if (this->trainModelIn == "false") { | |||
| this->trainModel = false; | |||
| } else { | |||
| std::cerr << "INPUT ILLEGAL: trainModel must be true|false "; | |||
| return 1; | |||
| } | |||
| return 0; | |||
| } | |||
| } // namespace converter | |||
| @@ -68,6 +68,8 @@ class Flags : public virtual mindspore::lite::FlagParser { | |||
| std::string configFile; | |||
| bool formatTrans = true; | |||
| std::string convWeightQuantChannelThreshold; | |||
| std::string trainModelIn; | |||
| bool trainModel = false; | |||
| }; | |||
| } // namespace converter | |||
| } // namespace lite | |||
| @@ -146,11 +146,29 @@ STATUS FormatTransPass::DoNodeInoutFormatTrans(schema::MetaGraphT *graph) { | |||
| MS_LOG(ERROR) << "InsertNhwc2NchwNode before " << nodeName << "failed"; | |||
| return RET_ERROR; | |||
| } | |||
| iter = InsertFormatTransNode(graph, iter, kAfter, 0, afterNodeType, &status); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "InsertNhwc2NchwNode after " << nodeName << "failed"; | |||
| return RET_ERROR; | |||
| if (IsContain(GetNhwcAllInputOpList(), GetCNodeTType(**iter))) { | |||
| int idx_num = node->inputIndex.size(); | |||
| for (int i = 0; i < idx_num; i++) { | |||
| iter = InsertFormatTransNode(graph, iter, kBefore, i, beforeNodeType, &status); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "InsertNchw2NhwcNode before " << nodeName << "failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| } else if (IsContain(GetNhwcDualInputOpList(), GetCNodeTType(**iter))) { | |||
| for (int i = 0; i < 2; i++) { | |||
| iter = InsertFormatTransNode(graph, iter, kBefore, i, beforeNodeType, &status); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "InsertNchw2NhwcNode before " << nodeName << "failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| } else { | |||
| iter = InsertFormatTransNode(graph, iter, kAfter, 0, afterNodeType, &status); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "InsertNhwc2NchwNode after " << nodeName << "failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||