| @@ -127,8 +127,30 @@ void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strid | |||||
| } | } | ||||
| } | } | ||||
| void TransposeDims(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end, int dims, int *size, int *position) { | |||||
| *(size + dims - 1) = 1; | |||||
| for (int i = dims - 1; i > 0; --i) { | |||||
| *(size + i - 1) = *(size + i) * output_shape[i]; | |||||
| } | |||||
| for (size_t idx = 0; idx < (*size) * output_shape[0]; ++idx) { | |||||
| int pos = idx; | |||||
| int output_idx = 0; | |||||
| int input_idx = 0; | |||||
| for (int i = 0; i < dims; ++i) { | |||||
| *(position + i) = pos / *(size + i); | |||||
| int out_stride = i < dims - 1 ? out_strides[i] : 1; | |||||
| output_idx += (*(position + i) * out_stride); | |||||
| input_idx += (*(position + i) * strides[perm[i]]); | |||||
| pos -= *(position + i) * (*(size + i)); | |||||
| } | |||||
| out_data[output_idx] = in_data[input_idx]; | |||||
| } | |||||
| } | |||||
| int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, | int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, | ||||
| TransposeParameter *transpose_param, int h_start, int h_end) { | |||||
| TransposeParameter *transpose_param, int h_start, int h_end, int *size, int *position) { | |||||
| if (in_data == NULL || out_data == NULL) { | if (in_data == NULL || out_data == NULL) { | ||||
| return NNACL_ERR; | return NNACL_ERR; | ||||
| } | } | ||||
| @@ -138,7 +160,7 @@ int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_s | |||||
| int data_size = transpose_param->data_size_; | int data_size = transpose_param->data_size_; | ||||
| int num_axes = transpose_param->num_axes_; | int num_axes = transpose_param->num_axes_; | ||||
| if (num_axes < 2 || num_axes > 5) { | |||||
| if (num_axes < 2) { | |||||
| return NNACL_ERR; | return NNACL_ERR; | ||||
| } | } | ||||
| @@ -163,6 +185,9 @@ int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_s | |||||
| TransposeDim4(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end); | TransposeDim4(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end); | ||||
| } else if (num_axes == 5) { | } else if (num_axes == 5) { | ||||
| TransposeDim5(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end); | TransposeDim5(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end); | ||||
| } else { | |||||
| TransposeDims(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end, num_axes, size, | |||||
| position); | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| @@ -33,7 +33,7 @@ typedef struct TransposeParameter { | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, | int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, | ||||
| TransposeParameter *transpose_param, int h_start, int h_end); | |||||
| TransposeParameter *transpose_param, int h_start, int h_end, int *size, int *position); | |||||
| void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | ||||
| int h_start, int h_end); | int h_start, int h_end); | ||||
| void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | ||||
| @@ -42,6 +42,8 @@ void TransposeDim4(float *in_data, float *out_data, int *strides, int *out_strid | |||||
| int h_start, int h_end); | int h_start, int h_end); | ||||
| void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | ||||
| int h_start, int h_end); | int h_start, int h_end); | ||||
| void TransposeDims(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end, int dims, int *size, int *position); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp32/transpose.h" | #include "src/runtime/kernel/arm/fp32/transpose.h" | ||||
| #include <vector> | #include <vector> | ||||
| #include "nnacl/transpose.h" | #include "nnacl/transpose.h" | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| @@ -29,6 +30,10 @@ using mindspore::lite::RET_OP_EXECUTE_FAILURE; | |||||
| using mindspore::schema::PrimitiveType_Transpose; | using mindspore::schema::PrimitiveType_Transpose; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int maxDimSize = 5; | |||||
| } // namespace | |||||
| int TransposeCPUKernel::Init() { | int TransposeCPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -90,8 +95,16 @@ int TransposeCPUKernel::TransposeParallel(int task_id) { | |||||
| } | } | ||||
| int thread_offset = task_id * thread_h_stride_; | int thread_offset = task_id * thread_h_stride_; | ||||
| TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); | TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); | ||||
| auto ret = | |||||
| DoTranspose(in_data_, out_data_, in_shape_, out_shape_, param, thread_offset, thread_offset + num_unit_thread); | |||||
| int *size = nullptr; | |||||
| int *position = nullptr; | |||||
| if (this->dim_size_ != nullptr && this->position_ != nullptr) { | |||||
| size = this->dim_size_ + task_id * param->num_axes_; | |||||
| position = this->position_ + task_id * param->num_axes_; | |||||
| } | |||||
| auto ret = DoTranspose(in_data_, out_data_, in_shape_, out_shape_, param, thread_offset, | |||||
| thread_offset + num_unit_thread, size, position); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Transpose error task_id[" << task_id << "] error_code[" << ret << "]"; | MS_LOG(ERROR) << "Transpose error task_id[" << task_id << "] error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -120,8 +133,29 @@ int TransposeCPUKernel::Run() { | |||||
| } | } | ||||
| in_data_ = reinterpret_cast<float *>(in_tensor->MutableData()); | in_data_ = reinterpret_cast<float *>(in_tensor->MutableData()); | ||||
| out_data_ = reinterpret_cast<float *>(out_tensor->MutableData()); | out_data_ = reinterpret_cast<float *>(out_tensor->MutableData()); | ||||
| int dims = out_tensor->shape().size(); | |||||
| if (dims > maxDimSize) { | |||||
| dim_size_ = reinterpret_cast<int *>(context_->allocator->Malloc(dims * thread_h_num_ * sizeof(int))); | |||||
| if (dim_size_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc data failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| position_ = reinterpret_cast<int *>(context_->allocator->Malloc(dims * thread_h_num_ * sizeof(int))); | |||||
| if (position_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc data failed"; | |||||
| context_->allocator->Free(dim_size_); | |||||
| dim_size_ = nullptr; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, TransposeRun, this, thread_h_num_); | auto ret = ParallelLaunch(this->context_->thread_pool_, TransposeRun, this, thread_h_num_); | ||||
| if (dims > maxDimSize) { | |||||
| context_->allocator->Free(dim_size_); | |||||
| context_->allocator->Free(position_); | |||||
| dim_size_ = nullptr; | |||||
| position_ = nullptr; | |||||
| } | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -46,6 +46,8 @@ class TransposeCPUKernel : public LiteKernel { | |||||
| float *out_data_; | float *out_data_; | ||||
| int *in_shape_ = nullptr; | int *in_shape_ = nullptr; | ||||
| int *out_shape_ = nullptr; | int *out_shape_ = nullptr; | ||||
| int *dim_size_ = nullptr; | |||||
| int *position_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -6,3 +6,5 @@ gts_version-RFB-320_simplified.onnx | |||||
| mnist-8.onnx | mnist-8.onnx | ||||
| crnn_lite_lstm_v2.onnx:32,32,32,1 | crnn_lite_lstm_v2.onnx:32,32,32,1 | ||||
| psenet_lite_mbv2.onnx:1,32,32,3 | psenet_lite_mbv2.onnx:1,32,32,3 | ||||
| super-resolution-10.onnx:1,224,224,1 | |||||
| tinyyolov2-8.onnx:1,416,416,3 | |||||
| @@ -64,7 +64,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes4) { | |||||
| param->out_strides_[i] = out_strides[i]; | param->out_strides_[i] = out_strides[i]; | ||||
| } | } | ||||
| auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 3); | |||||
| auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 3, nullptr, nullptr); | |||||
| ASSERT_EQ(ret, 0); | ASSERT_EQ(ret, 0); | ||||
| delete param; | delete param; | ||||
| CompareOutputData(out, correct, 24, 0.000001); | CompareOutputData(out, correct, 24, 0.000001); | ||||
| @@ -104,7 +104,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes3) { | |||||
| param->out_strides_[i] = out_strides[i]; | param->out_strides_[i] = out_strides[i]; | ||||
| } | } | ||||
| auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 3); | |||||
| auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 3, nullptr, nullptr); | |||||
| ASSERT_EQ(ret, 0); | ASSERT_EQ(ret, 0); | ||||
| delete param; | delete param; | ||||
| CompareOutputData(out, correct, 24, 0.000001); | CompareOutputData(out, correct, 24, 0.000001); | ||||
| @@ -145,7 +145,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes2) { | |||||
| param->out_strides_[i] = out_strides[i]; | param->out_strides_[i] = out_strides[i]; | ||||
| } | } | ||||
| auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 6); | |||||
| auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 6, nullptr, nullptr); | |||||
| ASSERT_EQ(ret, 0); | ASSERT_EQ(ret, 0); | ||||
| delete param; | delete param; | ||||
| CompareOutputData(out, correct, 24, 0.000001); | CompareOutputData(out, correct, 24, 0.000001); | ||||
| @@ -92,7 +92,7 @@ STATUS InferShapePass::Run(MetaGraphT *graph) { | |||||
| auto input_tensor = graph->allTensors[idx].get(); | auto input_tensor = graph->allTensors[idx].get(); | ||||
| for (auto &dim : input_tensor->dims) { | for (auto &dim : input_tensor->dims) { | ||||
| if (dim == 0) { | if (dim == 0) { | ||||
| MS_LOG(WARNING) << "One dimension of the input shape is 0, which would be set to 32 as a default value."; | |||||
| MS_LOG(WARNING) << "One dimension of the input shape is 0, which would be set to -1 as a default value."; | |||||
| dim = DEFAULT_DIM_VALUE; | dim = DEFAULT_DIM_VALUE; | ||||
| } | } | ||||
| } | } | ||||