Browse Source

!14982 [MS][LITE]support the anlysis to inserted 3d transpose op

From: @probiotics_53
Reviewed-by: @zhang_xue_tong
Signed-off-by: @zhang_xue_tong
pull/14982/MERGE
mindspore-ci-bot Gitee 4 years ago
parent
commit
cd98d99ef2
5 changed files with 39 additions and 47 deletions
  1. +20
    -16
      mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c
  2. +0
    -13
      mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc
  3. +15
    -18
      mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
  4. +1
    -0
      mindspore/lite/test/models_onnx.cfg
  5. +3
    -0
      mindspore/lite/test/models_onnx_fp16.cfg

+ 20
- 16
mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c View File

@@ -60,32 +60,36 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
for (size_t i = 0; i < perms_num; i++) {
ShapePush(perm, &perm_size, perm_data[i]);
}
int out_shape[MAX_TRANSPOSE_DIM_SIZE] = {0};
if (input->shape_size_ != 4 && perms_num == 4) {
for (size_t i = 0; i < input->shape_size_; ++i) {
out_shape[i] = input->shape_[i];
}
SetShapeArray(output, out_shape, input->shape_size_);
return NNACL_OK;
}
const int nchw2nhwc[4] = {0, 2, 3, 1};
const int nhwc2nchw[4] = {0, 3, 1, 2};
const int trans3d[3] = {0, 2, 1};
if (perms_num == 4) {
if (input->format_ == Format_NCHW && CheckPermTransFormat(perm, nchw2nhwc, perms_num)) {
output->format_ = Format_NHWC;
} else if (input->format_ == Format_NHWC && CheckPermTransFormat(perm, nhwc2nchw, perms_num)) {
output->format_ = Format_NCHW;
}
// though the perm is 4d in default, the input can be a 3d tensor. The op implementation should be adapted to this.
if (input->shape_size_ == 3) {
ShapeSet(perm, &perm_size, trans3d, 3);
}
}
output->shape_size_ = perm_size;
for (size_t i = 0; i < perm_size; ++i) {
out_shape[i] = input->shape_[perm[i]];
}
// set output shape
int out_shape[MAX_TRANSPOSE_DIM_SIZE] = {0};
size_t in_shape_size = input->shape_size_;
output->shape_size_ = in_shape_size;
if (perm_size == 0) {
size_t shape_size = input->shape_size_;
output->shape_size_ = shape_size;
for (size_t i = 0; i < shape_size; ++i) {
out_shape[shape_size - i - 1] = input->shape_[i];
for (size_t i = 0; i < in_shape_size; ++i) {
out_shape[in_shape_size - i - 1] = input->shape_[i];
}
} else if (perm_size != in_shape_size) {
for (size_t i = 0; i < in_shape_size; ++i) {
out_shape[i] = input->shape_[i];
}
} else {
output->shape_size_ = perm_size;
for (size_t i = 0; i < perm_size; ++i) {
out_shape[i] = input->shape_[perm[i]];
}
}
SetShapeArray(output, out_shape, output->shape_size_);


+ 0
- 13
mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc View File

@@ -41,19 +41,6 @@ int TransposeFp16CPUKernel::Run() {
MS_ASSERT(in_tensors_.size() == 1 || in_tensors_.size() == 2);
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
param->data_size_ = in_tensors_[0]->Size();
if (in_tensors_.size() == 2) {
auto input_perm = in_tensors_.at(1);
MS_ASSERT(input_perm != nullptr);
MS_ASSERT(input_perm->data_c() != nullptr);
int *perm_data = reinterpret_cast<int *>(input_perm->data_c());
for (int i = 0; i < input_perm->ElementsNum(); ++i) {
param->perm_[i] = perm_data[i];
}
for (int i = input_perm->ElementsNum(); i < MAX_TRANSPOSE_DIM_SIZE; ++i) {
param->perm_[i] = 0;
}
param->num_axes_ = input_perm->ElementsNum();
}
MS_ASSERT(out_tensors_.size() == 1);
auto &in_tensor = in_tensors_.front();
auto &out_tensor = out_tensors_.front();


+ 15
- 18
mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc View File

@@ -40,13 +40,22 @@ int TransposeCPUKernel::ReSize() {
if (in_tensors_.size() == 2) {
param->num_axes_ = in_tensors_.at(1)->ElementsNum();
}
if (in_tensors_.at(kInputIndex)->shape().size() != static_cast<size_t>(param->num_axes_)) {
return RET_OK;
int trans3d[3] = {0, 2, 1};
int *perm_data = nullptr;
auto input_tensor = in_tensors_.at(kInputIndex);
if (input_tensor->shape().size() != static_cast<size_t>(param->num_axes_)) {
if (input_tensor->shape().size() == 3 && param->num_axes_ == 4) {
param->num_axes_ = 3;
perm_data = trans3d;
} else {
return RET_OK;
}
} else {
MS_ASSERT(in_tensors_.size() == 2);
auto perm_tensor = in_tensors_.at(1);
perm_data = reinterpret_cast<int *>(perm_tensor->data_c());
}
// get perm data
MS_ASSERT(in_tensors_.size() == 2);
auto perm_tensor = in_tensors_.at(1);
int *perm_data = reinterpret_cast<int *>(perm_tensor->data_c());
// set perm data
MS_ASSERT(perm_data != nullptr);
for (int i = 0; i < param->num_axes_; ++i) {
param->perm_[i] = perm_data[i];
@@ -144,18 +153,6 @@ int TransposeCPUKernel::Run() {
memcpy(out_data_, in_data_, in_tensor->ElementsNum() * sizeof(float));
return RET_OK;
}
if (in_tensors_.size() == 2) {
auto input_perm = in_tensors_.at(1);
MS_ASSERT(input_perm != nullptr);
MS_ASSERT(input_perm->data_c() != nullptr);
int *perm_data = reinterpret_cast<int *>(input_perm->data_c());
for (int i = 0; i < input_perm->ElementsNum(); ++i) {
param_->perm_[i] = perm_data[i];
}
for (int i = input_perm->ElementsNum(); i < MAX_TRANSPOSE_DIM_SIZE; ++i) {
param_->perm_[i] = 0;
}
}
thread_count_ = op_parameter_->thread_num_;
GetNHNCTransposeFunc(in_tensor, out_tensor, param_);
if (NHNCTransposeFunc_ != nullptr) {


+ 1
- 0
mindspore/lite/test/models_onnx.cfg View File

@@ -79,3 +79,4 @@ Q_face_recognition.onnx
Q888_face_recognition.onnx
Q888_iris_detect.onnx
simple_IPS_model_4D_input.onnx
Harmony_Voiceprint.onnx;1,200,40,1

+ 3
- 0
mindspore/lite/test/models_onnx_fp16.cfg View File

@@ -86,3 +86,6 @@ ml_video_edit_enhance_update_tmp.onnx 0.5
Q888_face_recognition.onnx 3.5
Q888_iris_detect.onnx 0.5
ssd_mobilenet_v1_10.onnx;1,383,640,3 0.5
# The output from a conv in the later part contains many minus values, the following leakyRelu makes them become very
# close to 0 (-e^-4). The fp16 precision lost a lot in this case and it affects the following computation.
Harmony_Voiceprint.onnx;1,200,40,1 5.5

Loading…
Cancel
Save