diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c index bf724b0067..3cb6be5fb9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,11 +17,11 @@ #include "nnacl/fp32/strided_slice_fp32.h" #include "nnacl/errorcode.h" -void PadStridedSliceParameterTo6D(StridedSliceParameter *param) { - int32_t begins[DIMENSION_6D]; - int32_t ends[DIMENSION_6D]; - int32_t strides[DIMENSION_6D]; - int32_t input_shape[DIMENSION_6D]; +void PadStridedSliceParameterTo8D(StridedSliceParameter *param) { + int32_t begins[DIMENSION_8D]; + int32_t ends[DIMENSION_8D]; + int32_t strides[DIMENSION_8D]; + int32_t input_shape[DIMENSION_8D]; int32_t i; for (i = 0; i < param->num_axes_; ++i) { begins[i] = param->begins_[i]; @@ -35,9 +35,8 @@ void PadStridedSliceParameterTo6D(StridedSliceParameter *param) { ends[i] = param->in_shape_[i]; strides[i] = 1; } - int32_t real_index = param->in_shape_length_ - 1; - for (i = DIMENSION_6D - 1; i >= 0; --i) { + for (i = DIMENSION_8D - 1; i >= 0; --i) { if (real_index >= 0) { param->begins_[i] = begins[real_index]; param->ends_[i] = ends[real_index]; @@ -50,20 +49,8 @@ void PadStridedSliceParameterTo6D(StridedSliceParameter *param) { param->in_shape_[i] = 1; } } - param->num_axes_ = DIMENSION_6D; - param->in_shape_length_ = DIMENSION_6D; -} - -void ChangeNegToPositive(StridedSliceParameter *param) { - int i; - for (i = 0; i < DIMENSION_6D; ++i) { - if (param->begins_[i] < 0) { - param->begins_[i] += param->in_shape_[i]; - } - if (param->ends_[i] < 0) { - param->ends_[i] += param->in_shape_[i]; - } - } + param->num_axes_ = DIMENSION_8D; + param->in_shape_length_ = DIMENSION_8D; } bool LoopContinue(int stride, int i, int end) { return stride > 0 ? i < end : i > end; } @@ -80,45 +67,46 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p int *ends = param->ends_; int *strides = param->strides_; int *in_shape = param->in_shape_; - - if (param->num_axes_ < DIMENSION_6D) { - PadStridedSliceParameterTo6D(param); + if (param->num_axes_ < DIMENSION_8D) { + PadStridedSliceParameterTo8D(param); } - - size_t dim_offset[DIMENSION_6D - 1]; - dim_offset[4] = in_shape[5]; + size_t dim_offset[DIMENSION_8D - 1]; + dim_offset[6] = in_shape[7]; + dim_offset[5] = in_shape[6] * dim_offset[6]; + dim_offset[4] = in_shape[5] * dim_offset[5]; dim_offset[3] = in_shape[4] * dim_offset[4]; dim_offset[2] = in_shape[3] * dim_offset[3]; dim_offset[1] = in_shape[2] * dim_offset[2]; dim_offset[0] = in_shape[1] * dim_offset[1]; size_t out_offset = 0; - int32_t dim0, dim1, dim2, dim3, dim4, dim5; + int32_t dim0, dim1, dim2, dim3, dim4, dim5, dim6, dim7; for (dim0 = begins[0]; LoopContinue(strides[0], dim0, ends[0]); dim0 += strides[0]) { for (dim1 = begins[1]; LoopContinue(strides[1], dim1, ends[1]); dim1 += strides[1]) { for (dim2 = begins[2]; LoopContinue(strides[2], dim2, ends[2]); dim2 += strides[2]) { for (dim3 = begins[3]; LoopContinue(strides[3], dim3, ends[3]); dim3 += strides[3]) { for (dim4 = begins[4]; LoopContinue(strides[4], dim4, ends[4]); dim4 += strides[4]) { for (dim5 = begins[5]; LoopContinue(strides[5], dim5, ends[5]); dim5 += strides[5]) { - int32_t in_offset = dim0 * dim_offset[0] + dim1 * dim_offset[1] + dim2 * dim_offset[2] + - dim3 * dim_offset[3] + dim4 * dim_offset[4] + dim5; - if (param->data_type == kDataTypeFloat) { - *((float *)out_data + out_offset) = *((float *)in_data + in_offset); - } else if (param->data_type == kDataTypeInt8) { - *((int8_t *)out_data + out_offset) = *((int8_t *)in_data + in_offset); - } else if (param->data_type == kDataTypeInt) { - *((int32_t *)out_data + out_offset) = *((int32_t *)in_data + in_offset); - } else if (param->data_type == kDataTypeFloat64) { - *((double *)out_data + out_offset) = *((double *)in_data + in_offset); - } else if (param->data_type == kDataTypeBool) { - *((bool *)out_data + out_offset) = *((bool *)in_data + in_offset); + for (dim6 = begins[6]; LoopContinue(strides[6], dim6, ends[6]); dim6 += strides[6]) { + for (dim7 = begins[7]; LoopContinue(strides[7], dim7, ends[7]); dim7 += strides[7]) { + int32_t in_offset = dim0 * dim_offset[0] + dim1 * dim_offset[1] + dim2 * dim_offset[2] + + dim3 * dim_offset[3] + dim4 * dim_offset[4] + dim5 * dim_offset[5] + + dim6 * dim_offset[6] + dim7; + if (param->data_type == kDataTypeFloat) { + *((float *)out_data + out_offset) = *((float *)in_data + in_offset); + } else if (param->data_type == kDataTypeInt8) { + *((int8_t *)out_data + out_offset) = *((int8_t *)in_data + in_offset); + } else if (param->data_type == kDataTypeInt) { + *((int32_t *)out_data + out_offset) = *((int32_t *)in_data + in_offset); #ifdef ENABLE_ARM64 - } else if (param->data_type == kDataTypeFloat16) { - *((float16_t *)out_data + out_offset) = *((float16_t *)in_data + in_offset); + } else if (param->data_type == kDataTypeFloat16) { + *((float16_t *)out_data + out_offset) = *((float16_t *)in_data + in_offset); #endif - } else { - return NNACL_ERR; + } else { + return NNACL_ERR; + } + out_offset++; + } } - out_offset++; } } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c index 02d7ea8c8a..2a0593e7e3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,32 +29,34 @@ int DoStridedSliceGrad(const float *inputs, float *output, const int *dx_shape, if (inputs == NULL || output == NULL || param == NULL) { return NNACL_NULL_PTR; } - if (param->num_axes_ > DIMENSION_7D) { + if (param->num_axes_ > DIMENSION_8D) { return NNACL_PARAM_INVALID; } size_t size = 1; int *s = param->strides_; int *b = param->begins_; - for (int i = 0; i < DIMENSION_7D; i++) { + for (int i = 0; i < DIMENSION_8D; i++) { size *= param->in_shape_[i]; } for (size_t pos = 0; pos < size; pos++) { - size_t i = CalcIndex(param->in_shape_, 6, 0, pos); - size_t j = CalcIndex(param->in_shape_, 5, 1, pos); - size_t k = CalcIndex(param->in_shape_, 4, 2, pos); - size_t l = CalcIndex(param->in_shape_, 3, 3, pos); - size_t m = CalcIndex(param->in_shape_, 2, 4, pos); - size_t n = CalcIndex(param->in_shape_, 1, 5, pos); - size_t o = CalcIndex(param->in_shape_, 0, 6, pos); - + size_t i = CalcIndex(param->in_shape_, 7, 0, pos); + size_t j = CalcIndex(param->in_shape_, 6, 1, pos); + size_t k = CalcIndex(param->in_shape_, 5, 2, pos); + size_t l = CalcIndex(param->in_shape_, 4, 3, pos); + size_t m = CalcIndex(param->in_shape_, 3, 4, pos); + size_t n = CalcIndex(param->in_shape_, 2, 5, pos); + size_t o = CalcIndex(param->in_shape_, 1, 6, pos); + size_t p = CalcIndex(param->in_shape_, 0, 7, pos); size_t input_idx = - (i * s[0] + b[0]) * dx_shape[1] * dx_shape[2] * dx_shape[3] * dx_shape[4] * dx_shape[5] * dx_shape[6] + - (j * s[1] + b[1]) * dx_shape[2] * dx_shape[3] * dx_shape[4] * dx_shape[5] * dx_shape[6] + - (k * s[2] + b[2]) * dx_shape[3] * dx_shape[4] * dx_shape[5] * dx_shape[6] + - (l * s[3] + b[3]) * dx_shape[4] * dx_shape[5] * dx_shape[6] + (m * s[4] + b[4]) * dx_shape[5] * dx_shape[6] + - (n * s[5] + b[5]) * dx_shape[6] + (o * s[6] + b[6]); + (i * s[0] + b[0]) * dx_shape[1] * dx_shape[2] * dx_shape[3] * dx_shape[4] * dx_shape[5] * dx_shape[6] * + dx_shape[7] + + (j * s[1] + b[1]) * dx_shape[2] * dx_shape[3] * dx_shape[4] * dx_shape[5] * dx_shape[6] * dx_shape[7] + + (k * s[2] + b[2]) * dx_shape[3] * dx_shape[4] * dx_shape[5] * dx_shape[6] * dx_shape[7] + + (l * s[3] + b[3]) * dx_shape[4] * dx_shape[5] * dx_shape[6] * dx_shape[7] + + (m * s[4] + b[4]) * dx_shape[5] * dx_shape[6] * dx_shape[7] + (n * s[5] + b[5]) * dx_shape[6] * dx_shape[7] + + (o * s[6] + b[6]) * dx_shape[7] + (p * s[7] + b[7]); output[input_idx] = inputs[pos]; } return NNACL_OK; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c index a067336c23..d4abaf0f96 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c @@ -319,6 +319,9 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten int in_shape[MAX_SHAPE_SIZE]; size_t in_shape_size = 0; + if (input->shape_size_ > MAX_SHAPE_SIZE) { + return NNACL_ERR; + } ShapeSet(in_shape, &in_shape_size, input->shape_, input->shape_size_); StridedSliceTransferBuffer transfer_buffer; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c index 13f1ec740e..11583aca7c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c @@ -52,12 +52,12 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor if (perms_num != 0 && perm_data == NULL) { return NNACL_INFER_INVALID; } - int perm[MAX_SHAPE_SIZE] = {0}; + int perm[MAX_TRANSPOSE_DIM_SIZE] = {0}; size_t perm_size = 0; for (size_t i = 0; i < perms_num; i++) { ShapePush(perm, &perm_size, perm_data[i]); } - int out_shape[MAX_SHAPE_SIZE] = {0}; + int out_shape[MAX_TRANSPOSE_DIM_SIZE] = {0}; if (input->shape_size_ != 4 && perms_num == 4) { for (size_t i = 0; i < input->shape_size_; ++i) { out_shape[i] = input->shape_[i]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h index 7c2db6ca69..474125c7f7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,6 +47,7 @@ #define MAX_SHAPE_SIZE 8 #define DIMENSION_4D 4 +#define DIMENSION_5D 5 #define DIMENSION_6D 6 #define DIMENSION_7D 7 #define DIMENSION_8D 8 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/transpose.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/transpose.h index 6db866f982..47a8abae40 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/transpose.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/transpose.h @@ -19,18 +19,18 @@ #include "nnacl/op_base.h" -#define MAX_TRANSPOSE_DIM_SIZE 6 +#define MAX_TRANSPOSE_DIM_SIZE 20 typedef struct TransposeParameter { // primitive parameter OpParameter op_parameter_; - int perm_[MAX_SHAPE_SIZE]; + int perm_[MAX_TRANSPOSE_DIM_SIZE]; size_t perm_size_; bool conjugate_; // shape correlative - int strides_[MAX_SHAPE_SIZE]; - int out_strides_[MAX_SHAPE_SIZE]; + int strides_[MAX_TRANSPOSE_DIM_SIZE]; + int out_strides_[MAX_TRANSPOSE_DIM_SIZE]; // other parameter int num_axes_; diff --git a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc index 6d80e17674..ecfdd1a12f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,6 +57,12 @@ void StridedSliceCPUKernel::InitFastRunParam() { } int StridedSliceCPUKernel::ReSize() { + auto input_tensor = in_tensors_.at(0); + auto begin_tensor = in_tensors_.at(1); + if (input_tensor->shape().size() > DIMENSION_8D || begin_tensor->shape().size() > DIMENSION_8D) { + MS_LOG(ERROR) << "StridedSlice not support input rank or begin num exceeds " << DIMENSION_8D; + return RET_ERROR; + } fast_run_ = MatchFastPattern(); if (fast_run_) { InitFastRunParam(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc index dfc6834aeb..f003bc1255 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,7 +49,7 @@ int TransposeFp16CPUKernel::Run() { for (int i = 0; i < input_perm->ElementsNum(); ++i) { param->perm_[i] = perm_data[i]; } - for (int i = input_perm->ElementsNum(); i < MAX_SHAPE_SIZE; ++i) { + for (int i = input_perm->ElementsNum(); i < MAX_TRANSPOSE_DIM_SIZE; ++i) { param->perm_[i] = 0; } param->num_axes_ = input_perm->ElementsNum(); @@ -71,7 +71,7 @@ int TransposeFp16CPUKernel::Run() { return RET_OK; } int dims = out_tensor->shape().size(); - if (dims > MAX_TRANSPOSE_DIM_SIZE) { + if (dims > DIMENSION_6D) { dim_size_ = reinterpret_cast(context_->allocator->Malloc(dims * sizeof(int))); if (dim_size_ == nullptr) { MS_LOG(ERROR) << "Malloc data failed"; @@ -88,7 +88,7 @@ int TransposeFp16CPUKernel::Run() { MS_ASSERT(out_shape_); auto ret = Fp16DoTranspose(in_data_fp16_, out_data_fp16_, out_shape_, param, dim_size_, position_); - if (dims > MAX_TRANSPOSE_DIM_SIZE) { + if (dims > DIMENSION_6D) { context_->allocator->Free(dim_size_); context_->allocator->Free(position_); dim_size_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc index 61286ea34c..210e97c273 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -152,7 +152,7 @@ int TransposeCPUKernel::Run() { for (int i = 0; i < input_perm->ElementsNum(); ++i) { param_->perm_[i] = perm_data[i]; } - for (int i = input_perm->ElementsNum(); i < MAX_SHAPE_SIZE; ++i) { + for (int i = input_perm->ElementsNum(); i < MAX_TRANSPOSE_DIM_SIZE; ++i) { param_->perm_[i] = 0; } } @@ -169,7 +169,7 @@ int TransposeCPUKernel::Run() { MS_ASSERT(out_shape_); dims_ = out_tensor->shape().size(); - if (dims_ > MAX_TRANSPOSE_DIM_SIZE) { + if (dims_ > DIMENSION_6D) { dim_size_ = reinterpret_cast(context_->allocator->Malloc(dims_ * sizeof(int))); if (dim_size_ == nullptr) { MS_LOG(ERROR) << "Malloc data failed"; @@ -187,13 +187,13 @@ int TransposeCPUKernel::Run() { } } int ret; - if (dims_ > MAX_TRANSPOSE_DIM_SIZE) { + if (dims_ > DIMENSION_6D) { ret = ParallelLaunch(static_cast(this->context_)->thread_pool_, TransposeImpl, this, thread_count_); } else { ret = DoTransposeFp32(in_data_, out_data_, out_shape_, param_); } - if (dims_ > MAX_TRANSPOSE_DIM_SIZE) { + if (dims_ > DIMENSION_6D) { context_->allocator->Free(dim_size_); context_->allocator->Free(position_); dim_size_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/strided_slice_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/strided_slice_grad.cc index 4b63d47b2d..6057cc94ec 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/strided_slice_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/strided_slice_grad.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,10 +53,10 @@ int StridedSliceGradCPUKernel::Init() { } void StridedSliceGradCPUKernel::FillEmptyDims() { - int32_t begins[DIMENSION_7D]; - int32_t ends[DIMENSION_7D]; - int32_t strides[DIMENSION_7D]; - int32_t input_shape[DIMENSION_7D]; + int32_t begins[DIMENSION_8D]; + int32_t ends[DIMENSION_8D]; + int32_t strides[DIMENSION_8D]; + int32_t input_shape[DIMENSION_8D]; int32_t i; for (i = 0; i < param_->num_axes_; ++i) { begins[i] = param_->begins_[i]; @@ -72,7 +72,7 @@ void StridedSliceGradCPUKernel::FillEmptyDims() { } int32_t real_index = param_->in_shape_length_ - 1; - for (i = DIMENSION_7D - 1; i >= 0; --i) { + for (i = DIMENSION_8D - 1; i >= 0; --i) { if (real_index >= 0) { param_->begins_[i] = begins[real_index]; param_->ends_[i] = ends[real_index]; @@ -85,10 +85,10 @@ void StridedSliceGradCPUKernel::FillEmptyDims() { param_->in_shape_[i] = 1; } } - param_->num_axes_ = DIMENSION_7D; - param_->in_shape_length_ = DIMENSION_7D; + param_->num_axes_ = DIMENSION_8D; + param_->in_shape_length_ = DIMENSION_8D; - for (i = 0; i < DIMENSION_7D; ++i) { + for (i = 0; i < DIMENSION_8D; ++i) { if (param_->begins_[i] < 0) { param_->begins_[i] += param_->in_shape_[i]; } @@ -101,7 +101,7 @@ void StridedSliceGradCPUKernel::FillEmptyDims() { void StridedSliceGradCPUKernel::FillOutputDim() { auto output = out_tensors_.at(0); size_t out_size = output->shape().size(); - for (size_t i = 0; i < DIMENSION_7D; i++) { + for (size_t i = 0; i < DIMENSION_8D; i++) { if (i < out_size) { output_shape_.push_back(output->shape()[i]); } else { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc index 27265d5ef2..7e8e4568a2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,7 +102,7 @@ int TransposeInt8CPUKernel::ReSize() { transpose_param_->out_strides_[i] = out_shape.at(i + 1) * transpose_param_->out_strides_[i + 1]; } - extra_dims_ = out_shape.size() > MAX_SHAPE_SIZE; + extra_dims_ = out_shape.size() > DIMENSION_5D; num_unit_ = static_cast(in_shape.at(transpose_param_->perm_[kNHWC_H])); thread_h_num_ = MSMIN(thread_num_, num_unit_); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h index a0b4db3ec9..c5279ee17e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,8 +60,8 @@ class TransposeInt8CPUKernel : public LiteKernel { int thread_h_stride_ = 0; int thread_h_num_ = 0; int num_unit_ = 0; - int in_shape_[8] = {0}; - int out_shape_[8] = {0}; + int in_shape_[20] = {0}; + int out_shape_[20] = {0}; int nhnc_param_[3] = {0}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/common/strided_slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/common/strided_slice_tests.cc index 7126a2ad57..7a70c96669 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/common/strided_slice_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/common/strided_slice_tests.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include "common/common_test.h" #include "nnacl/fp32/strided_slice_fp32.h" #include "mindspore/lite/src/kernel_registry.h" +#include "nnacl/strided_slice_parameter.h" namespace mindspore { class TestStridedSlice : public mindspore::CommonTest { @@ -25,23 +26,28 @@ class TestStridedSlice : public mindspore::CommonTest { TestStridedSlice() {} }; -void InitStridedSliceParam(StridedSliceParameter *strided_slice_param) { - strided_slice_param->begins_[0] = 0; - strided_slice_param->begins_[1] = 0; - strided_slice_param->begins_[2] = 0; - - strided_slice_param->ends_[0] = 1; - strided_slice_param->ends_[1] = 2; - strided_slice_param->ends_[2] = 4; - - strided_slice_param->strides_[0] = 1; - strided_slice_param->strides_[1] = 2; - strided_slice_param->strides_[2] = 2; - - strided_slice_param->in_shape_[0] = 1; - strided_slice_param->in_shape_[1] = 2; - strided_slice_param->in_shape_[2] = 4; - strided_slice_param->num_axes_ = 3; +void InitStridedSliceParam(StridedSliceParameter *param, const lite::Tensor *in_tensor, + const lite::Tensor *begin_tensor, const lite::Tensor *end_tensor, + const lite::Tensor *stride_tensor) { + int dim = begin_tensor->ElementsNum(); + auto input_shape = in_tensor->shape(); + int *begin = reinterpret_cast(begin_tensor->data_c()); + int *end = reinterpret_cast(end_tensor->data_c()); + int *stride = reinterpret_cast(stride_tensor->data_c()); + for (int i = 0; i < dim; ++i) { + param->begins_[i] = begin[i]; + param->ends_[i] = end[i]; + param->strides_[i] = stride[i]; + param->in_shape_[i] = input_shape[i]; + } + param->num_axes_ = dim; + param->in_shape_length_ = dim; + param->data_type = kDataTypeFloat; + param->begins_mask_ = 0; + param->ends_mask_ = 0; + param->ellipsisMask_ = 0; + param->newAxisMask_ = 0; + param->shrinkAxisMask_ = 0; } TEST_F(TestStridedSlice, StridedSlice) { @@ -51,30 +57,218 @@ TEST_F(TestStridedSlice, StridedSlice) { float output_data[2] = {0}; in_tensor.set_data(input_data); out_tensor.set_data(output_data); - std::vector inputs = {&in_tensor}; + lite::Tensor begins_tensor(kNumberTypeInt32, {3}); + int begins_data[] = {0, 0, 0}; + begins_tensor.set_data(begins_data); + lite::Tensor ends_tensor(kNumberTypeInt32, {3}); + int ends_data[] = {1, 2, 4}; + ends_tensor.set_data(ends_data); + lite::Tensor strides_tensor(kNumberTypeInt32, {3}); + int strides_data[] = {1, 2, 2}; + strides_tensor.set_data(strides_data); + std::vector inputs = {&in_tensor, &begins_tensor, &ends_tensor, &strides_tensor}; std::vector outputs = {&out_tensor}; - - StridedSliceParameter parameter = {0}; - InitStridedSliceParam(¶meter); - parameter.op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + StridedSliceParameter *parameter = new StridedSliceParameter; + parameter->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + parameter->op_parameter_.infer_flag_ = true; + InitStridedSliceParam(parameter, &in_tensor, &begins_tensor, &ends_tensor, &strides_tensor); kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); ASSERT_NE(creator, nullptr); + auto ctx = std::make_shared(); + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + ASSERT_NE(kernel, nullptr); + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + float expect[2] = {0.2390374, 0.05051243}; + ASSERT_NEAR(output_data[0], expect[0], 0.001); + ASSERT_NEAR(output_data[1], expect[1], 0.001); + in_tensor.set_data(nullptr); + begins_tensor.set_data(nullptr); + ends_tensor.set_data(nullptr); + strides_tensor.set_data(nullptr); + out_tensor.set_data(nullptr); + delete kernel; +} +// 7d +TEST_F(TestStridedSlice, 7d) { + lite::Tensor in_tensor(kNumberTypeFloat32, {1, 2, 4, 1, 1, 1, 1}); + lite::Tensor out_tensor(kNumberTypeFloat32, {1, 1, 2, 1, 1, 1, 1}); + float input_data[] = {0.2390374, 0.92039955, 0.05051243, 0.49574447, 0.8355223, 0.02647042, 0.08811307, 0.4566604}; + float output_data[2] = {0}; + in_tensor.set_data(input_data); + out_tensor.set_data(output_data); + lite::Tensor begins_tensor(kNumberTypeInt32, {7}); + int begins_data[] = {0, 0, 1, 0, 0, 0, 0}; + begins_tensor.set_data(begins_data); + lite::Tensor ends_tensor(kNumberTypeInt32, {7}); + int ends_data[] = {1, 2, 4, 1, 1, 1, 1}; + ends_tensor.set_data(ends_data); + lite::Tensor strides_tensor(kNumberTypeInt32, {7}); + int strides_data[] = {1, 2, 2, 1, 1, 1, 1}; + strides_tensor.set_data(strides_data); + std::vector inputs = {&in_tensor, &begins_tensor, &ends_tensor, &strides_tensor}; + std::vector outputs = {&out_tensor}; + StridedSliceParameter *parameter = new StridedSliceParameter; + parameter->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + parameter->op_parameter_.infer_flag_ = true; + InitStridedSliceParam(parameter, &in_tensor, &begins_tensor, &ends_tensor, &strides_tensor); + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto ctx = std::make_shared(); ASSERT_EQ(lite::RET_OK, ctx->Init()); - auto kernel = creator(inputs, outputs, reinterpret_cast(¶meter), ctx.get(), desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); ASSERT_NE(kernel, nullptr); + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + float expect[2] = {0.92039955, 0.49574447}; + ASSERT_NEAR(output_data[0], expect[0], 0.001); + ASSERT_NEAR(output_data[1], expect[1], 0.001); + in_tensor.set_data(nullptr); + begins_tensor.set_data(nullptr); + ends_tensor.set_data(nullptr); + strides_tensor.set_data(nullptr); + out_tensor.set_data(nullptr); + delete kernel; +} +// 8d +TEST_F(TestStridedSlice, 8d) { + lite::Tensor in_tensor(kNumberTypeInt8, {2, 2, 2, 3, 1, 1, 1, 1}); + lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 1, 2, 1, 1, 1, 1}); + int8_t input_data[] = {-12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int8_t output_data[2] = {0}; + in_tensor.set_data(input_data); + out_tensor.set_data(output_data); + lite::Tensor begins_tensor(kNumberTypeInt32, {8}); + int begins_data[] = {0, 0, 1, 0, 0, 0, 0, 0}; + begins_tensor.set_data(begins_data); + lite::Tensor ends_tensor(kNumberTypeInt32, {8}); + int ends_data[] = {1, 2, 2, 3, 1, 1, 1, 1}; + ends_tensor.set_data(ends_data); + lite::Tensor strides_tensor(kNumberTypeInt32, {8}); + int strides_data[] = {1, 2, 1, 2, 1, 1, 1, 1}; + strides_tensor.set_data(strides_data); + std::vector inputs = {&in_tensor, &begins_tensor, &ends_tensor, &strides_tensor}; + std::vector outputs = {&out_tensor}; + StridedSliceParameter *parameter = new StridedSliceParameter; + parameter->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + parameter->op_parameter_.infer_flag_ = true; + InitStridedSliceParam(parameter, &in_tensor, &begins_tensor, &ends_tensor, &strides_tensor); + parameter->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_StridedSlice}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + auto ctx = std::make_shared(); + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + ASSERT_NE(kernel, nullptr); auto ret = kernel->Run(); EXPECT_EQ(0, ret); + int8_t expect[4] = {-9, -7}; + for (unsigned int i = 0; i < sizeof(expect); ++i) { + EXPECT_EQ(output_data[i], expect[i]); + } + in_tensor.set_data(nullptr); + begins_tensor.set_data(nullptr); + ends_tensor.set_data(nullptr); + strides_tensor.set_data(nullptr); + out_tensor.set_data(nullptr); + delete kernel; +} - float expect[2] = {0.2390374, 0.05051243}; - ASSERT_EQ(0, CompareOutputData(output_data, expect, 2, 0.000001)); +// fast run (7d) +TEST_F(TestStridedSlice, FastRun7d) { + lite::Tensor in_tensor(kNumberTypeFloat32, {1, 2, 4, 1, 1, 1, 1}); + lite::Tensor out_tensor(kNumberTypeFloat32, {1, 2, 2, 1, 1, 1, 1}); + float input_data[] = {0.2390374, 0.92039955, 0.05051243, 0.49574447, 0.8355223, 0.02647042, 0.08811307, 0.4566604}; + float output_data[4] = {0}; + in_tensor.set_data(input_data); + out_tensor.set_data(output_data); + lite::Tensor begins_tensor(kNumberTypeInt32, {7}); + int begins_data[] = {0, 0, 1, 0, 0, 0, 0}; + begins_tensor.set_data(begins_data); + lite::Tensor ends_tensor(kNumberTypeInt32, {7}); + int ends_data[] = {1, 2, 4, 1, 1, 1, 1}; + ends_tensor.set_data(ends_data); + lite::Tensor strides_tensor(kNumberTypeInt32, {7}); + int strides_data[] = {1, 1, 2, 1, 1, 1, 1}; + strides_tensor.set_data(strides_data); + std::vector inputs = {&in_tensor, &begins_tensor, &ends_tensor, &strides_tensor}; + std::vector outputs = {&out_tensor}; + StridedSliceParameter *parameter = new StridedSliceParameter; + parameter->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + parameter->op_parameter_.infer_flag_ = true; + InitStridedSliceParam(parameter, &in_tensor, &begins_tensor, &ends_tensor, &strides_tensor); + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + auto ctx = std::make_shared(); + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + ASSERT_NE(kernel, nullptr); + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + float expect[4] = {0.92039955, 0.49574447, 0.02647042, 0.4566604}; + ASSERT_NEAR(output_data[0], expect[0], 0.001); + ASSERT_NEAR(output_data[1], expect[1], 0.001); + ASSERT_NEAR(output_data[2], expect[2], 0.001); + ASSERT_NEAR(output_data[3], expect[3], 0.001); + in_tensor.set_data(nullptr); + begins_tensor.set_data(nullptr); + ends_tensor.set_data(nullptr); + strides_tensor.set_data(nullptr); + out_tensor.set_data(nullptr); + delete kernel; +} +// fast run (7d single thread) +TEST_F(TestStridedSlice, FastRun7dSingleThread) { + lite::Tensor in_tensor(kNumberTypeFloat32, {1, 2, 4, 1, 1, 1, 1}); + lite::Tensor out_tensor(kNumberTypeFloat32, {1, 2, 2, 1, 1, 1, 1}); + float input_data[] = {0.2390374, 0.92039955, 0.05051243, 0.49574447, 0.8355223, 0.02647042, 0.08811307, 0.4566604}; + float output_data[4] = {0}; + in_tensor.set_data(input_data); + out_tensor.set_data(output_data); + lite::Tensor begins_tensor(kNumberTypeInt32, {7}); + int begins_data[] = {0, 0, 1, 0, 0, 0, 0}; + begins_tensor.set_data(begins_data); + lite::Tensor ends_tensor(kNumberTypeInt32, {7}); + int ends_data[] = {1, 2, 4, 1, 1, 1, 1}; + ends_tensor.set_data(ends_data); + lite::Tensor strides_tensor(kNumberTypeInt32, {7}); + int strides_data[] = {1, 1, 2, 1, 1, 1, 1}; + strides_tensor.set_data(strides_data); + std::vector inputs = {&in_tensor, &begins_tensor, &ends_tensor, &strides_tensor}; + std::vector outputs = {&out_tensor}; + StridedSliceParameter *parameter = new StridedSliceParameter; + parameter->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + parameter->op_parameter_.infer_flag_ = true; + InitStridedSliceParam(parameter, &in_tensor, &begins_tensor, &ends_tensor, &strides_tensor); + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + auto ctx = std::make_shared(); + ctx->thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + ASSERT_NE(kernel, nullptr); + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + float expect[4] = {0.92039955, 0.49574447, 0.02647042, 0.4566604}; + ASSERT_NEAR(output_data[0], expect[0], 0.001); + ASSERT_NEAR(output_data[1], expect[1], 0.001); + ASSERT_NEAR(output_data[2], expect[2], 0.001); + ASSERT_NEAR(output_data[3], expect[3], 0.001); in_tensor.set_data(nullptr); + begins_tensor.set_data(nullptr); + ends_tensor.set_data(nullptr); + strides_tensor.set_data(nullptr); out_tensor.set_data(nullptr); + delete kernel; } TEST_F(TestStridedSlice, StridedSliceInt8) { @@ -84,44 +278,40 @@ TEST_F(TestStridedSlice, StridedSliceInt8) { int8_t output_data[4] = {0}; in_tensor.set_data(input_data); out_tensor.set_data(output_data); - std::vector inputs = {&in_tensor}; + lite::Tensor begins_tensor(kNumberTypeInt32, {3}); + int begins_data[] = {0, 1, 2}; + begins_tensor.set_data(begins_data); + lite::Tensor ends_tensor(kNumberTypeInt32, {3}); + int ends_data[] = {2, 3, 4}; + ends_tensor.set_data(ends_data); + lite::Tensor strides_tensor(kNumberTypeInt32, {3}); + int strides_data[] = {1, 2, 1}; + strides_tensor.set_data(strides_data); + std::vector inputs = {&in_tensor, &begins_tensor, &ends_tensor, &strides_tensor}; std::vector outputs = {&out_tensor}; - - StridedSliceParameter parameter = {0}; - parameter.begins_[0] = 0; - parameter.begins_[1] = 1; - parameter.begins_[2] = 2; - parameter.ends_[0] = 2; - parameter.ends_[1] = 3; - parameter.ends_[2] = 4; - parameter.strides_[0] = 1; - parameter.strides_[1] = 2; - parameter.strides_[2] = 1; - parameter.in_shape_[0] = 2; - parameter.in_shape_[1] = 3; - parameter.in_shape_[2] = 4; - parameter.num_axes_ = 3; - - parameter.op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + StridedSliceParameter *parameter = new StridedSliceParameter; + parameter->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + parameter->op_parameter_.infer_flag_ = true; + InitStridedSliceParam(parameter, &in_tensor, &begins_tensor, &ends_tensor, &strides_tensor); + parameter->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_StridedSlice}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); ASSERT_NE(creator, nullptr); - auto ctx = std::make_shared(); ASSERT_EQ(lite::RET_OK, ctx->Init()); - auto kernel = creator(inputs, outputs, reinterpret_cast(¶meter), ctx.get(), desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); ASSERT_NE(kernel, nullptr); - auto ret = kernel->Run(); EXPECT_EQ(0, ret); - int8_t expect[4] = {-6, -5, 7, 8}; for (unsigned int i = 0; i < sizeof(expect); ++i) { EXPECT_EQ(output_data[i], expect[i]); } - in_tensor.set_data(nullptr); + begins_tensor.set_data(nullptr); + ends_tensor.set_data(nullptr); + strides_tensor.set_data(nullptr); out_tensor.set_data(nullptr); + delete kernel; } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc index f49ee9f670..ce014b1c96 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc @@ -30,24 +30,97 @@ class TestTransposeFp32 : public mindspore::CommonTest { TestTransposeFp32() {} }; -TEST_F(TestTransposeFp32, TransposeFp32_axes4) { - /* 1x2x3x4 */ +TEST_F(TestTransposeFp32, 10D) { + lite::Tensor in_tensor(kNumberTypeFloat32, {2, 3, 4, 1, 1, 1, 1, 1, 1, 1}); + float in[24] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; + in_tensor.set_data(in); + lite::Tensor perm_tensor(kNumberTypeInt32, {10}); + int perm[10] = {2, 0, 1, 3, 4, 5, 6, 7, 8, 9}; + perm_tensor.set_data(perm); + lite::Tensor out_tensor(kNumberTypeFloat32, {4, 2, 3, 1, 1, 1, 1, 1, 1, 1}); + float out[24] = {0}; + out_tensor.set_data(out); + auto param = new (std::nothrow) TransposeParameter(); + if (param == nullptr) { + MS_LOG(ERROR) << "New param fails."; + return; + } + param->op_parameter_.infer_flag_ = true; + param->op_parameter_.type_ = schema::PrimitiveType_Transpose; + std::vector inputs = {&in_tensor, &perm_tensor}; + std::vector outputs = {&out_tensor}; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + auto ctx = std::make_shared(); + ctx->thread_num_ = 2; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(param), ctx.get(), desc); + ASSERT_NE(kernel, nullptr); + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + float expect[24] = {1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24}; + for (int i = 0; i < 24; ++i) { + ASSERT_NEAR(out[i], expect[i], 0.001); + } + in_tensor.set_data(nullptr); + perm_tensor.set_data(nullptr); + out_tensor.set_data(nullptr); + delete kernel; +} + +TEST_F(TestTransposeFp32, 10DSingleThread) { + lite::Tensor in_tensor(kNumberTypeFloat32, {2, 3, 4, 1, 1, 1, 1, 1, 1, 1}); + float in[24] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; + in_tensor.set_data(in); + lite::Tensor perm_tensor(kNumberTypeInt32, {10}); + int perm[10] = {2, 0, 1, 3, 4, 5, 6, 7, 8, 9}; + perm_tensor.set_data(perm); + lite::Tensor out_tensor(kNumberTypeFloat32, {4, 2, 3, 1, 1, 1, 1, 1, 1, 1}); + float out[24] = {0}; + out_tensor.set_data(out); + auto param = new (std::nothrow) TransposeParameter(); + if (param == nullptr) { + MS_LOG(ERROR) << "New param fails."; + return; + } + param->op_parameter_.infer_flag_ = true; + param->op_parameter_.type_ = schema::PrimitiveType_Transpose; + std::vector inputs = {&in_tensor, &perm_tensor}; + std::vector outputs = {&out_tensor}; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + auto ctx = std::make_shared(); + ctx->thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(param), ctx.get(), desc); + ASSERT_NE(kernel, nullptr); + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + float expect[24] = {1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24}; + for (int i = 0; i < 24; ++i) { + ASSERT_NEAR(out[i], expect[i], 0.001); + } + in_tensor.set_data(nullptr); + perm_tensor.set_data(nullptr); + out_tensor.set_data(nullptr); + delete kernel; +} + +TEST_F(TestTransposeFp32, TransposeFp32_axes4) { /* 1x2x3x4 */ float in[24] = {-0.35779851, -0.4857257, 1.2791597, -0.36793608, 0.95098744, -0.12716428, 0.17405411, 0.42663834, -1.11871315, 1.02777593, 1.20223761, 0.30183748, 1.39663453, -1.11923312, -1.02032341, 1.91074871, 1.52489095, -1.13020852, -0.66358529, 1.8033383, 0.62647028, 1.03094635, -1.65733338, 0.3952082}; - float out[24] = {0}; - float correct[24] = {-0.35779851, 1.39663453, 0.95098744, 1.52489095, -1.11871315, 0.62647028, -0.4857257, -1.11923312, -0.12716428, -1.13020852, 1.02777593, 1.03094635, 1.2791597, -1.02032341, 0.17405411, -0.66358529, 1.20223761, -1.65733338, -0.36793608, 1.91074871, 0.42663834, 1.8033383, 0.30183748, 0.3952082}; - int output_shape[4] = {4, 3, 2, 1}; int perm[8] = {3, 2, 1, 0, 0, 0, 0, 0}; int strides[8] = {24, 12, 4, 1, 1, 1, 1, 1}; int out_strides[8] = {6, 2, 1, 1, 1, 1, 1, 1}; - auto param = new (std::nothrow) TransposeParameter(); if (param == nullptr) { MS_LOG(ERROR) << "New param fails."; @@ -56,37 +129,30 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes4) { param->num_axes_ = 4; param->conjugate_ = false; param->data_size_ = 24 * sizeof(float); - for (int i = 0; i < 8; i++) { param->perm_[i] = perm[i]; param->strides_[i] = strides[i]; param->out_strides_[i] = out_strides[i]; } - auto ret = DoTransposeFp32(in, out, output_shape, param); ASSERT_EQ(ret, 0); delete param; ASSERT_EQ(0, CompareOutputData(out, correct, 24, 0.000001)); } -TEST_F(TestTransposeFp32, TransposeFp32_axes3) { - /* 2x3x4 */ +TEST_F(TestTransposeFp32, TransposeFp32_axes3) { /* 2x3x4 */ float in[24] = {1.62434536, -0.61175641, -0.52817175, -1.07296862, 0.86540763, -2.3015387, 1.74481176, -0.7612069, 0.3190391, -0.24937038, 1.46210794, -2.06014071, -0.3224172, -0.38405435, 1.13376944, -1.09989127, -0.17242821, -0.87785842, 0.04221375, 0.58281521, -1.10061918, 1.14472371, 0.90159072, 0.50249434}; - float out[24] = {0}; - float correct[24] = {1.62434536, -0.3224172, 0.86540763, -0.17242821, 0.3190391, -1.10061918, -0.61175641, -0.38405435, -2.3015387, -0.87785842, -0.24937038, 1.14472371, -0.52817175, 1.13376944, 1.74481176, 0.04221375, 1.46210794, 0.90159072, -1.07296862, -1.09989127, -0.7612069, 0.58281521, -2.06014071, 0.50249434}; - int output_shape[3] = {4, 3, 2}; int perm[8] = {2, 1, 0, 0, 0, 0, 0, 0}; int strides[8] = {12, 4, 1, 1, 1, 1, 1, 1}; int out_strides[8] = {6, 2, 1, 1, 1, 1, 1, 1}; - auto param = new (std::nothrow) TransposeParameter(); if (param == nullptr) { MS_LOG(ERROR) << "New param fails."; @@ -95,98 +161,73 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes3) { param->num_axes_ = 3; param->conjugate_ = false; param->data_size_ = 24 * sizeof(float); - for (int i = 0; i < 8; i++) { param->perm_[i] = perm[i]; param->strides_[i] = strides[i]; param->out_strides_[i] = out_strides[i]; } - auto ret = DoTransposeFp32(in, out, output_shape, param); ASSERT_EQ(ret, 0); delete param; ASSERT_EQ(0, CompareOutputData(out, correct, 24, 0.000001)); } -TEST_F(TestTransposeFp32, TransposeFp32_axes2) { - /* 6x4 */ +TEST_F(TestTransposeFp32, TransposeFp32_axes2) { /* 6x4 */ float in[24] = {1.62434536, -0.61175641, -0.52817175, -1.07296862, 0.86540763, -2.3015387, 1.74481176, -0.7612069, 0.3190391, -0.24937038, 1.46210794, -2.06014071, -0.3224172, -0.38405435, 1.13376944, -1.09989127, -0.17242821, -0.87785842, 0.04221375, 0.58281521, -1.10061918, 1.14472371, 0.90159072, 0.50249434}; - float out[24] = {0}; - float correct[24] = {1.62434536, 0.86540763, 0.3190391, -0.3224172, -0.17242821, -1.10061918, -0.61175641, -2.3015387, -0.24937038, -0.38405435, -0.87785842, 1.14472371, -0.52817175, 1.74481176, 1.46210794, 1.13376944, 0.04221375, 0.90159072, -1.07296862, -0.7612069, -2.06014071, -1.09989127, 0.58281521, 0.50249434}; - int output_shape[2] = {4, 6}; int perm[8] = {1, 0, 0, 0, 0, 0, 0, 0}; int strides[8] = {4, 1, 1, 1, 1, 1, 1, 1}; int out_strides[8] = {6, 1, 1, 1, 1, 1, 1, 1}; - auto param = new (std::nothrow) TransposeParameter(); if (param == nullptr) { MS_LOG(ERROR) << "New param fails."; return; } - param->num_axes_ = 2; param->conjugate_ = false; param->data_size_ = 24 * sizeof(float); - for (int i = 0; i < 8; i++) { param->perm_[i] = perm[i]; param->strides_[i] = strides[i]; param->out_strides_[i] = out_strides[i]; } - auto ret = DoTransposeFp32(in, out, output_shape, param); ASSERT_EQ(ret, 0); delete param; ASSERT_EQ(0, CompareOutputData(out, correct, 24, 0.000001)); } -TEST_F(TestTransposeFp32, TransposeFp32_test5) { - /* 1x2x3x2x2 */ +TEST_F(TestTransposeFp32, TransposeFp32_test5) { /* 1x2x3x2x2 */ std::vector input = {1.62434536, -0.61175641, -0.52817175, -1.07296862, 0.86540763, -2.3015387, 1.74481176, -0.7612069, 0.3190391, -0.24937038, 1.46210794, -2.06014071, -0.3224172, -0.38405435, 1.13376944, -1.09989127, -0.17242821, -0.87785842, 0.04221375, 0.58281521, -1.10061918, 1.14472371, 0.90159072, 0.50249434}; - float correct[24] = {1.62434536, -0.3224172, 0.86540763, -0.17242821, 0.3190391, -1.10061918, -0.52817175, 1.13376944, 1.74481176, 0.04221375, 1.46210794, 0.90159072, -0.61175641, -0.38405435, -2.3015387, -0.87785842, -0.24937038, 1.14472371, -1.07296862, -1.09989127, -0.7612069, 0.58281521, -2.06014071, 0.50249434}; - std::vector output(24); std::vector input_shape = {1, 2, 3, 2, 2}; std::vector output_shape = {2, 2, 3, 2, 1}; - int perm[8] = {4, 3, 2, 1, 0, 0, 0, 0}; - int strides[8] = {24, 12, 4, 2, 1, 1, 1, 1}; - int out_strides[8] = {12, 6, 2, 1, 1, 1, 1, 1}; - - TransposeParameter param; - param.op_parameter_.type_ = schema::PrimitiveType_Transpose; - param.num_axes_ = 5; - param.conjugate_ = false; - param.data_size_ = 24 * sizeof(float); - - for (int i = 0; i < 8; i++) { - param.perm_[i] = perm[i]; - param.strides_[i] = strides[i]; - param.out_strides_[i] = out_strides[i]; - } - + int perm[5] = {4, 3, 2, 1, 0}; + TransposeParameter *param = new (std::nothrow) TransposeParameter; + param->op_parameter_.infer_flag_ = true; + param->op_parameter_.type_ = schema::PrimitiveType_Transpose; lite::Tensor input_tensor; input_tensor.set_data(input.data()); input_tensor.set_shape(input_shape); input_tensor.set_format(schema::Format_NHWC); input_tensor.set_data_type(kNumberTypeFloat32); - std::vector inputs_tensor; - inputs_tensor.emplace_back(&input_tensor); - + lite::Tensor perm_tensor(kNumberTypeInt32, {5}); + perm_tensor.set_data(perm); + std::vector inputs_tensor{&input_tensor, &perm_tensor}; lite::Tensor output_tensor; output_tensor.set_data(output.data()); output_tensor.set_shape(output_shape); @@ -194,7 +235,6 @@ TEST_F(TestTransposeFp32, TransposeFp32_test5) { output_tensor.set_data_type(kNumberTypeFloat32); std::vector outputs_tensor; outputs_tensor.emplace_back(&output_tensor); - lite::InnerContext ctx; ctx.thread_num_ = 2; ASSERT_EQ(lite::RET_OK, ctx.Init()); @@ -202,17 +242,18 @@ TEST_F(TestTransposeFp32, TransposeFp32_test5) { auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); ASSERT_NE(creator, nullptr); kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(¶m), &ctx, desc); + creator(inputs_tensor, outputs_tensor, reinterpret_cast(param), &ctx, desc); ASSERT_NE(kernel, nullptr); kernel->Run(); - for (int i = 0; i < 24; ++i) { std::cout << output[i] << " "; } std::cout << "\n"; ASSERT_EQ(0, CompareOutputData(output.data(), correct, 24, 0.000001)); input_tensor.set_data(nullptr); + perm_tensor.set_data(nullptr); output_tensor.set_data(nullptr); + delete kernel; } } // namespace mindspore