From: @probiotics_53 Reviewed-by: @zhang_xue_tong Signed-off-by: @zhang_xue_tongpull/14982/MERGE
| @@ -60,32 +60,36 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor | |||||
| for (size_t i = 0; i < perms_num; i++) { | for (size_t i = 0; i < perms_num; i++) { | ||||
| ShapePush(perm, &perm_size, perm_data[i]); | ShapePush(perm, &perm_size, perm_data[i]); | ||||
| } | } | ||||
| int out_shape[MAX_TRANSPOSE_DIM_SIZE] = {0}; | |||||
| if (input->shape_size_ != 4 && perms_num == 4) { | |||||
| for (size_t i = 0; i < input->shape_size_; ++i) { | |||||
| out_shape[i] = input->shape_[i]; | |||||
| } | |||||
| SetShapeArray(output, out_shape, input->shape_size_); | |||||
| return NNACL_OK; | |||||
| } | |||||
| const int nchw2nhwc[4] = {0, 2, 3, 1}; | const int nchw2nhwc[4] = {0, 2, 3, 1}; | ||||
| const int nhwc2nchw[4] = {0, 3, 1, 2}; | const int nhwc2nchw[4] = {0, 3, 1, 2}; | ||||
| const int trans3d[3] = {0, 2, 1}; | |||||
| if (perms_num == 4) { | if (perms_num == 4) { | ||||
| if (input->format_ == Format_NCHW && CheckPermTransFormat(perm, nchw2nhwc, perms_num)) { | if (input->format_ == Format_NCHW && CheckPermTransFormat(perm, nchw2nhwc, perms_num)) { | ||||
| output->format_ = Format_NHWC; | output->format_ = Format_NHWC; | ||||
| } else if (input->format_ == Format_NHWC && CheckPermTransFormat(perm, nhwc2nchw, perms_num)) { | } else if (input->format_ == Format_NHWC && CheckPermTransFormat(perm, nhwc2nchw, perms_num)) { | ||||
| output->format_ = Format_NCHW; | output->format_ = Format_NCHW; | ||||
| } | } | ||||
| // though the perm is 4d in default, the input can be a 3d tensor. The op implementation should be adapted to this. | |||||
| if (input->shape_size_ == 3) { | |||||
| ShapeSet(perm, &perm_size, trans3d, 3); | |||||
| } | |||||
| } | } | ||||
| output->shape_size_ = perm_size; | |||||
| for (size_t i = 0; i < perm_size; ++i) { | |||||
| out_shape[i] = input->shape_[perm[i]]; | |||||
| } | |||||
| // set output shape | |||||
| int out_shape[MAX_TRANSPOSE_DIM_SIZE] = {0}; | |||||
| size_t in_shape_size = input->shape_size_; | |||||
| output->shape_size_ = in_shape_size; | |||||
| if (perm_size == 0) { | if (perm_size == 0) { | ||||
| size_t shape_size = input->shape_size_; | |||||
| output->shape_size_ = shape_size; | |||||
| for (size_t i = 0; i < shape_size; ++i) { | |||||
| out_shape[shape_size - i - 1] = input->shape_[i]; | |||||
| for (size_t i = 0; i < in_shape_size; ++i) { | |||||
| out_shape[in_shape_size - i - 1] = input->shape_[i]; | |||||
| } | |||||
| } else if (perm_size != in_shape_size) { | |||||
| for (size_t i = 0; i < in_shape_size; ++i) { | |||||
| out_shape[i] = input->shape_[i]; | |||||
| } | |||||
| } else { | |||||
| output->shape_size_ = perm_size; | |||||
| for (size_t i = 0; i < perm_size; ++i) { | |||||
| out_shape[i] = input->shape_[perm[i]]; | |||||
| } | } | ||||
| } | } | ||||
| SetShapeArray(output, out_shape, output->shape_size_); | SetShapeArray(output, out_shape, output->shape_size_); | ||||
| @@ -41,19 +41,6 @@ int TransposeFp16CPUKernel::Run() { | |||||
| MS_ASSERT(in_tensors_.size() == 1 || in_tensors_.size() == 2); | MS_ASSERT(in_tensors_.size() == 1 || in_tensors_.size() == 2); | ||||
| TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); | TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); | ||||
| param->data_size_ = in_tensors_[0]->Size(); | param->data_size_ = in_tensors_[0]->Size(); | ||||
| if (in_tensors_.size() == 2) { | |||||
| auto input_perm = in_tensors_.at(1); | |||||
| MS_ASSERT(input_perm != nullptr); | |||||
| MS_ASSERT(input_perm->data_c() != nullptr); | |||||
| int *perm_data = reinterpret_cast<int *>(input_perm->data_c()); | |||||
| for (int i = 0; i < input_perm->ElementsNum(); ++i) { | |||||
| param->perm_[i] = perm_data[i]; | |||||
| } | |||||
| for (int i = input_perm->ElementsNum(); i < MAX_TRANSPOSE_DIM_SIZE; ++i) { | |||||
| param->perm_[i] = 0; | |||||
| } | |||||
| param->num_axes_ = input_perm->ElementsNum(); | |||||
| } | |||||
| MS_ASSERT(out_tensors_.size() == 1); | MS_ASSERT(out_tensors_.size() == 1); | ||||
| auto &in_tensor = in_tensors_.front(); | auto &in_tensor = in_tensors_.front(); | ||||
| auto &out_tensor = out_tensors_.front(); | auto &out_tensor = out_tensors_.front(); | ||||
| @@ -40,13 +40,22 @@ int TransposeCPUKernel::ReSize() { | |||||
| if (in_tensors_.size() == 2) { | if (in_tensors_.size() == 2) { | ||||
| param->num_axes_ = in_tensors_.at(1)->ElementsNum(); | param->num_axes_ = in_tensors_.at(1)->ElementsNum(); | ||||
| } | } | ||||
| if (in_tensors_.at(kInputIndex)->shape().size() != static_cast<size_t>(param->num_axes_)) { | |||||
| return RET_OK; | |||||
| int trans3d[3] = {0, 2, 1}; | |||||
| int *perm_data = nullptr; | |||||
| auto input_tensor = in_tensors_.at(kInputIndex); | |||||
| if (input_tensor->shape().size() != static_cast<size_t>(param->num_axes_)) { | |||||
| if (input_tensor->shape().size() == 3 && param->num_axes_ == 4) { | |||||
| param->num_axes_ = 3; | |||||
| perm_data = trans3d; | |||||
| } else { | |||||
| return RET_OK; | |||||
| } | |||||
| } else { | |||||
| MS_ASSERT(in_tensors_.size() == 2); | |||||
| auto perm_tensor = in_tensors_.at(1); | |||||
| perm_data = reinterpret_cast<int *>(perm_tensor->data_c()); | |||||
| } | } | ||||
| // get perm data | |||||
| MS_ASSERT(in_tensors_.size() == 2); | |||||
| auto perm_tensor = in_tensors_.at(1); | |||||
| int *perm_data = reinterpret_cast<int *>(perm_tensor->data_c()); | |||||
| // set perm data | |||||
| MS_ASSERT(perm_data != nullptr); | MS_ASSERT(perm_data != nullptr); | ||||
| for (int i = 0; i < param->num_axes_; ++i) { | for (int i = 0; i < param->num_axes_; ++i) { | ||||
| param->perm_[i] = perm_data[i]; | param->perm_[i] = perm_data[i]; | ||||
| @@ -144,18 +153,6 @@ int TransposeCPUKernel::Run() { | |||||
| memcpy(out_data_, in_data_, in_tensor->ElementsNum() * sizeof(float)); | memcpy(out_data_, in_data_, in_tensor->ElementsNum() * sizeof(float)); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (in_tensors_.size() == 2) { | |||||
| auto input_perm = in_tensors_.at(1); | |||||
| MS_ASSERT(input_perm != nullptr); | |||||
| MS_ASSERT(input_perm->data_c() != nullptr); | |||||
| int *perm_data = reinterpret_cast<int *>(input_perm->data_c()); | |||||
| for (int i = 0; i < input_perm->ElementsNum(); ++i) { | |||||
| param_->perm_[i] = perm_data[i]; | |||||
| } | |||||
| for (int i = input_perm->ElementsNum(); i < MAX_TRANSPOSE_DIM_SIZE; ++i) { | |||||
| param_->perm_[i] = 0; | |||||
| } | |||||
| } | |||||
| thread_count_ = op_parameter_->thread_num_; | thread_count_ = op_parameter_->thread_num_; | ||||
| GetNHNCTransposeFunc(in_tensor, out_tensor, param_); | GetNHNCTransposeFunc(in_tensor, out_tensor, param_); | ||||
| if (NHNCTransposeFunc_ != nullptr) { | if (NHNCTransposeFunc_ != nullptr) { | ||||
| @@ -79,3 +79,4 @@ Q_face_recognition.onnx | |||||
| Q888_face_recognition.onnx | Q888_face_recognition.onnx | ||||
| Q888_iris_detect.onnx | Q888_iris_detect.onnx | ||||
| simple_IPS_model_4D_input.onnx | simple_IPS_model_4D_input.onnx | ||||
| Harmony_Voiceprint.onnx;1,200,40,1 | |||||
| @@ -86,3 +86,6 @@ ml_video_edit_enhance_update_tmp.onnx 0.5 | |||||
| Q888_face_recognition.onnx 3.5 | Q888_face_recognition.onnx 3.5 | ||||
| Q888_iris_detect.onnx 0.5 | Q888_iris_detect.onnx 0.5 | ||||
| ssd_mobilenet_v1_10.onnx;1,383,640,3 0.5 | ssd_mobilenet_v1_10.onnx;1,383,640,3 0.5 | ||||
| # The output from a conv in the later part contains many minus values, the following leakyRelu makes them become very | |||||
| # close to 0 (-e^-4). The fp16 precision lost a lot in this case and it affects the following computation. | |||||
| Harmony_Voiceprint.onnx;1,200,40,1 5.5 | |||||