diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c index 83b4d40838..04da736190 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c @@ -60,32 +60,36 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor for (size_t i = 0; i < perms_num; i++) { ShapePush(perm, &perm_size, perm_data[i]); } - int out_shape[MAX_TRANSPOSE_DIM_SIZE] = {0}; - if (input->shape_size_ != 4 && perms_num == 4) { - for (size_t i = 0; i < input->shape_size_; ++i) { - out_shape[i] = input->shape_[i]; - } - SetShapeArray(output, out_shape, input->shape_size_); - return NNACL_OK; - } const int nchw2nhwc[4] = {0, 2, 3, 1}; const int nhwc2nchw[4] = {0, 3, 1, 2}; + const int trans3d[3] = {0, 2, 1}; if (perms_num == 4) { if (input->format_ == Format_NCHW && CheckPermTransFormat(perm, nchw2nhwc, perms_num)) { output->format_ = Format_NHWC; } else if (input->format_ == Format_NHWC && CheckPermTransFormat(perm, nhwc2nchw, perms_num)) { output->format_ = Format_NCHW; } + // though the perm is 4d in default, the input can be a 3d tensor. The op implementation should be adapted to this. + if (input->shape_size_ == 3) { + ShapeSet(perm, &perm_size, trans3d, 3); + } } - output->shape_size_ = perm_size; - for (size_t i = 0; i < perm_size; ++i) { - out_shape[i] = input->shape_[perm[i]]; - } + // set output shape + int out_shape[MAX_TRANSPOSE_DIM_SIZE] = {0}; + size_t in_shape_size = input->shape_size_; + output->shape_size_ = in_shape_size; if (perm_size == 0) { - size_t shape_size = input->shape_size_; - output->shape_size_ = shape_size; - for (size_t i = 0; i < shape_size; ++i) { - out_shape[shape_size - i - 1] = input->shape_[i]; + for (size_t i = 0; i < in_shape_size; ++i) { + out_shape[in_shape_size - i - 1] = input->shape_[i]; + } + } else if (perm_size != in_shape_size) { + for (size_t i = 0; i < in_shape_size; ++i) { + out_shape[i] = input->shape_[i]; + } + } else { + output->shape_size_ = perm_size; + for (size_t i = 0; i < perm_size; ++i) { + out_shape[i] = input->shape_[perm[i]]; } } SetShapeArray(output, out_shape, output->shape_size_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc index f003bc1255..b39db4e27e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc @@ -41,19 +41,6 @@ int TransposeFp16CPUKernel::Run() { MS_ASSERT(in_tensors_.size() == 1 || in_tensors_.size() == 2); TransposeParameter *param = reinterpret_cast(this->op_parameter_); param->data_size_ = in_tensors_[0]->Size(); - if (in_tensors_.size() == 2) { - auto input_perm = in_tensors_.at(1); - MS_ASSERT(input_perm != nullptr); - MS_ASSERT(input_perm->data_c() != nullptr); - int *perm_data = reinterpret_cast(input_perm->data_c()); - for (int i = 0; i < input_perm->ElementsNum(); ++i) { - param->perm_[i] = perm_data[i]; - } - for (int i = input_perm->ElementsNum(); i < MAX_TRANSPOSE_DIM_SIZE; ++i) { - param->perm_[i] = 0; - } - param->num_axes_ = input_perm->ElementsNum(); - } MS_ASSERT(out_tensors_.size() == 1); auto &in_tensor = in_tensors_.front(); auto &out_tensor = out_tensors_.front(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc index 210e97c273..1a6f24ffb9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc @@ -40,13 +40,22 @@ int TransposeCPUKernel::ReSize() { if (in_tensors_.size() == 2) { param->num_axes_ = in_tensors_.at(1)->ElementsNum(); } - if (in_tensors_.at(kInputIndex)->shape().size() != static_cast(param->num_axes_)) { - return RET_OK; + int trans3d[3] = {0, 2, 1}; + int *perm_data = nullptr; + auto input_tensor = in_tensors_.at(kInputIndex); + if (input_tensor->shape().size() != static_cast(param->num_axes_)) { + if (input_tensor->shape().size() == 3 && param->num_axes_ == 4) { + param->num_axes_ = 3; + perm_data = trans3d; + } else { + return RET_OK; + } + } else { + MS_ASSERT(in_tensors_.size() == 2); + auto perm_tensor = in_tensors_.at(1); + perm_data = reinterpret_cast(perm_tensor->data_c()); } - // get perm data - MS_ASSERT(in_tensors_.size() == 2); - auto perm_tensor = in_tensors_.at(1); - int *perm_data = reinterpret_cast(perm_tensor->data_c()); + // set perm data MS_ASSERT(perm_data != nullptr); for (int i = 0; i < param->num_axes_; ++i) { param->perm_[i] = perm_data[i]; @@ -144,18 +153,6 @@ int TransposeCPUKernel::Run() { memcpy(out_data_, in_data_, in_tensor->ElementsNum() * sizeof(float)); return RET_OK; } - if (in_tensors_.size() == 2) { - auto input_perm = in_tensors_.at(1); - MS_ASSERT(input_perm != nullptr); - MS_ASSERT(input_perm->data_c() != nullptr); - int *perm_data = reinterpret_cast(input_perm->data_c()); - for (int i = 0; i < input_perm->ElementsNum(); ++i) { - param_->perm_[i] = perm_data[i]; - } - for (int i = input_perm->ElementsNum(); i < MAX_TRANSPOSE_DIM_SIZE; ++i) { - param_->perm_[i] = 0; - } - } thread_count_ = op_parameter_->thread_num_; GetNHNCTransposeFunc(in_tensor, out_tensor, param_); if (NHNCTransposeFunc_ != nullptr) { diff --git a/mindspore/lite/test/models_onnx.cfg b/mindspore/lite/test/models_onnx.cfg index c0878f2bf3..ccbc791e4c 100644 --- a/mindspore/lite/test/models_onnx.cfg +++ b/mindspore/lite/test/models_onnx.cfg @@ -79,3 +79,4 @@ Q_face_recognition.onnx Q888_face_recognition.onnx Q888_iris_detect.onnx simple_IPS_model_4D_input.onnx +Harmony_Voiceprint.onnx;1,200,40,1 diff --git a/mindspore/lite/test/models_onnx_fp16.cfg b/mindspore/lite/test/models_onnx_fp16.cfg index d4da1babc8..f94d374d10 100644 --- a/mindspore/lite/test/models_onnx_fp16.cfg +++ b/mindspore/lite/test/models_onnx_fp16.cfg @@ -86,3 +86,6 @@ ml_video_edit_enhance_update_tmp.onnx 0.5 Q888_face_recognition.onnx 3.5 Q888_iris_detect.onnx 0.5 ssd_mobilenet_v1_10.onnx;1,383,640,3 0.5 +# The output from a conv in the later part contains many minus values, the following leakyRelu makes them become very +# close to 0 (-e^-4). The fp16 precision lost a lot in this case and it affects the following computation. +Harmony_Voiceprint.onnx;1,200,40,1 5.5