| @@ -0,0 +1,239 @@ | |||
| #ifdef BFC_MEMORY | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp32/transpose_server_fp32.h" | |||
| #define JUDGEPART(NUM) \ | |||
| if (dim_start##NUM == overflow_point##NUM) { \ | |||
| dim_start##NUM = 0; \ | |||
| } else { \ | |||
| ++dim_start##NUM; \ | |||
| in_offset += stride##NUM; \ | |||
| continue; \ | |||
| } | |||
| void DoTransposeServerDim3(const float *in_data, float *out_data, const int64_t *overflow_points, | |||
| const int64_t *strides, const TransposeBlockBoundaryInfo *boundary_info) { | |||
| int64_t stride2 = strides[THIRD_INPUT]; | |||
| int64_t size = boundary_info->sizes[0]; | |||
| int64_t in_offset = boundary_info->in_offsets[0]; | |||
| out_data += boundary_info->out_start_offset; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride2]; | |||
| } | |||
| int64_t dim_start1 = boundary_info->start_dim[1]; | |||
| int64_t overflow_point1 = overflow_points[1]; | |||
| int64_t overflow_point2 = overflow_points[THIRD_INPUT]; | |||
| int64_t stride0 = strides[0]; | |||
| int64_t stride1 = strides[1]; | |||
| int64_t last_dim = overflow_point2 + 1; | |||
| out_data += size; | |||
| size = boundary_info->sizes[1]; | |||
| in_offset = boundary_info->in_offsets[1]; | |||
| for (int64_t i = 0; i < size; i += last_dim) { | |||
| for (int64_t j = 0; j < overflow_point2; ++j) { | |||
| out_data[i + j] = in_data[in_offset]; | |||
| in_offset += stride2; | |||
| } | |||
| out_data[i + overflow_point2] = in_data[in_offset]; | |||
| JUDGEPART(1) | |||
| in_offset += stride0; | |||
| } | |||
| out_data += size; | |||
| size = boundary_info->sizes[THIRD_INPUT]; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride2]; | |||
| } | |||
| } | |||
| void DoTransposeServerDim4(const float *in_data, float *out_data, const int64_t *overflow_points, | |||
| const int64_t *strides, const TransposeBlockBoundaryInfo *boundary_info) { | |||
| int64_t stride3 = strides[FOURTH_INPUT]; | |||
| int64_t size = boundary_info->sizes[0]; | |||
| int64_t in_offset = boundary_info->in_offsets[0]; | |||
| out_data += boundary_info->out_start_offset; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride3]; | |||
| } | |||
| int64_t dim_start1 = boundary_info->start_dim[1]; | |||
| int64_t dim_start2 = boundary_info->start_dim[THIRD_INPUT]; | |||
| int64_t overflow_point1 = overflow_points[1]; | |||
| int64_t overflow_point2 = overflow_points[THIRD_INPUT]; | |||
| int64_t overflow_point3 = overflow_points[FOURTH_INPUT]; | |||
| int64_t stride0 = strides[0]; | |||
| int64_t stride1 = strides[1]; | |||
| int64_t stride2 = strides[THIRD_INPUT]; | |||
| int64_t last_dim = overflow_point3 + 1; | |||
| out_data += size; | |||
| size = boundary_info->sizes[1]; | |||
| in_offset = boundary_info->in_offsets[1]; | |||
| for (int64_t i = 0; i < size; i += last_dim) { | |||
| for (int64_t j = 0; j < overflow_point3; ++j) { | |||
| out_data[i + j] = in_data[in_offset]; | |||
| in_offset += stride3; | |||
| } | |||
| out_data[i + overflow_point3] = in_data[in_offset]; | |||
| JUDGEPART(2) | |||
| JUDGEPART(1) | |||
| in_offset += stride0; | |||
| } | |||
| out_data += size; | |||
| size = boundary_info->sizes[THIRD_INPUT]; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride3]; | |||
| } | |||
| } | |||
| void DoTransposeServerDim5(const float *in_data, float *out_data, const int64_t *overflow_points, | |||
| const int64_t *strides, const TransposeBlockBoundaryInfo *boundary_info) { | |||
| int64_t stride4 = strides[FIFTH_INPUT]; | |||
| int64_t size = boundary_info->sizes[0]; | |||
| int64_t in_offset = boundary_info->in_offsets[0]; | |||
| out_data += boundary_info->out_start_offset; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride4]; | |||
| } | |||
| int64_t dim_start1 = boundary_info->start_dim[1]; | |||
| int64_t dim_start2 = boundary_info->start_dim[THIRD_INPUT]; | |||
| int64_t dim_start3 = boundary_info->start_dim[FOURTH_INPUT]; | |||
| int64_t overflow_point1 = overflow_points[1]; | |||
| int64_t overflow_point2 = overflow_points[THIRD_INPUT]; | |||
| int64_t overflow_point3 = overflow_points[FOURTH_INPUT]; | |||
| int64_t overflow_point4 = overflow_points[FIFTH_INPUT]; | |||
| int64_t stride0 = strides[0]; | |||
| int64_t stride1 = strides[1]; | |||
| int64_t stride2 = strides[THIRD_INPUT]; | |||
| int64_t stride3 = strides[FOURTH_INPUT]; | |||
| int64_t last_dim = overflow_point4 + 1; | |||
| out_data += size; | |||
| size = boundary_info->sizes[1]; | |||
| in_offset = boundary_info->in_offsets[1]; | |||
| for (int64_t i = 0; i < size; i += last_dim) { | |||
| for (int64_t j = 0; j < overflow_point4; ++j) { | |||
| out_data[i + j] = in_data[in_offset]; | |||
| in_offset += stride4; | |||
| } | |||
| out_data[i + overflow_point4] = in_data[in_offset]; | |||
| JUDGEPART(3) | |||
| JUDGEPART(2) | |||
| JUDGEPART(1) | |||
| in_offset += stride0; | |||
| } | |||
| out_data += size; | |||
| size = boundary_info->sizes[THIRD_INPUT]; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride4]; | |||
| } | |||
| } | |||
| void DoTransposeServerDim6(const float *in_data, float *out_data, const int64_t *overflow_points, | |||
| const int64_t *strides, const TransposeBlockBoundaryInfo *boundary_info) { | |||
| int64_t stride5 = strides[SIXTH_INPUT]; | |||
| int64_t size = boundary_info->sizes[0]; | |||
| int64_t in_offset = boundary_info->in_offsets[0]; | |||
| out_data += boundary_info->out_start_offset; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride5]; | |||
| } | |||
| int64_t dim_start1 = boundary_info->start_dim[1]; | |||
| int64_t dim_start2 = boundary_info->start_dim[THIRD_INPUT]; | |||
| int64_t dim_start3 = boundary_info->start_dim[FOURTH_INPUT]; | |||
| int64_t dim_start4 = boundary_info->start_dim[FIFTH_INPUT]; | |||
| int64_t overflow_point1 = overflow_points[1]; | |||
| int64_t overflow_point2 = overflow_points[THIRD_INPUT]; | |||
| int64_t overflow_point3 = overflow_points[FOURTH_INPUT]; | |||
| int64_t overflow_point4 = overflow_points[FIFTH_INPUT]; | |||
| int64_t overflow_point5 = overflow_points[SIXTH_INPUT]; | |||
| int64_t stride0 = strides[0]; | |||
| int64_t stride1 = strides[1]; | |||
| int64_t stride2 = strides[THIRD_INPUT]; | |||
| int64_t stride3 = strides[FOURTH_INPUT]; | |||
| int64_t stride4 = strides[FIFTH_INPUT]; | |||
| int64_t last_dim = overflow_point5 + 1; | |||
| out_data += size; | |||
| size = boundary_info->sizes[1]; | |||
| in_offset = boundary_info->in_offsets[1]; | |||
| for (int64_t i = 0; i < size; i += last_dim) { | |||
| for (int64_t j = 0; j < overflow_point5; ++j) { | |||
| out_data[i + j] = in_data[in_offset]; | |||
| in_offset += stride5; | |||
| } | |||
| out_data[i + overflow_point5] = in_data[in_offset]; | |||
| JUDGEPART(4) | |||
| JUDGEPART(3) | |||
| JUDGEPART(2) | |||
| JUDGEPART(1) | |||
| in_offset += stride0; | |||
| } | |||
| out_data += size; | |||
| size = boundary_info->sizes[THIRD_INPUT]; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride5]; | |||
| } | |||
| } | |||
| void DoTransposeServer(const float *in_data, float *out_data, const int64_t *overflow_points, const int64_t *strides, | |||
| int axis_num, const TransposeBlockBoundaryInfo *boundary_info) { | |||
| if (axis_num == DIMENSION_3D) { | |||
| DoTransposeServerDim3(in_data, out_data, overflow_points, strides, boundary_info); | |||
| return; | |||
| } else if (axis_num == DIMENSION_4D) { | |||
| DoTransposeServerDim4(in_data, out_data, overflow_points, strides, boundary_info); | |||
| return; | |||
| } else if (axis_num == DIMENSION_5D) { | |||
| DoTransposeServerDim5(in_data, out_data, overflow_points, strides, boundary_info); | |||
| return; | |||
| } else if (axis_num == DIMENSION_6D) { | |||
| DoTransposeServerDim6(in_data, out_data, overflow_points, strides, boundary_info); | |||
| return; | |||
| } | |||
| out_data += boundary_info->out_start_offset; | |||
| int64_t stride = strides[axis_num - 1]; | |||
| int64_t size = boundary_info->sizes[0]; | |||
| int64_t in_offset = boundary_info->in_offsets[0]; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride]; | |||
| } | |||
| int64_t dim_info[MAX_TRANSPOSE_DIM_SIZE] = {}; | |||
| for (int i = 0; i < axis_num; ++i) { | |||
| dim_info[i] = boundary_info->start_dim[i]; | |||
| } | |||
| int64_t last_overflow_point = overflow_points[axis_num - 1]; | |||
| int64_t last_dim = last_overflow_point + 1; | |||
| out_data += size; | |||
| size = boundary_info->sizes[1]; | |||
| for (int64_t i = 0; i < size; i += last_dim) { | |||
| for (int64_t j = 0; j < last_overflow_point; ++j) { | |||
| out_data[i + j] = in_data[in_offset]; | |||
| in_offset += stride; | |||
| } | |||
| out_data[i + last_overflow_point] = in_data[in_offset]; | |||
| int j = axis_num - 2; | |||
| while (dim_info[j] == overflow_points[j]) { | |||
| dim_info[j] = 0; | |||
| --j; | |||
| } | |||
| ++dim_info[j]; | |||
| in_offset += strides[j]; | |||
| } | |||
| out_data += size; | |||
| size = boundary_info->sizes[THIRD_INPUT]; | |||
| for (int64_t i = 0; i < size; ++i) { | |||
| out_data[i] = in_data[in_offset + i * stride]; | |||
| } | |||
| } | |||
| #endif | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_NNACL_FP32_TRANSPOSE_SERVER_FP32_H_ | |||
| #define MINDSPORE_NNACL_FP32_TRANSPOSE_SERVER_FP32_H_ | |||
| #ifdef BFC_MEMORY | |||
| #include "nnacl/transpose.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| typedef struct TransposeBlockBoundaryInfo { | |||
| int64_t out_start_offset; | |||
| int64_t sizes[C3NUM]; | |||
| int64_t in_offsets[C2NUM]; | |||
| int64_t start_dim[MAX_TRANSPOSE_DIM_SIZE]; | |||
| } TransposeBlockBoundaryInfo; | |||
| void DoTransposeServer(const float *in_data, float *out_data, const int64_t *overflow_points, const int64_t *strides, | |||
| int axis_num, const TransposeBlockBoundaryInfo *boundary_info); | |||
| #ifdef __cplusplus | |||
| }; | |||
| #endif | |||
| #endif // MINDSPORE_NNACL_FP32_TRANSPOSE_SERVER_FP32_H_ | |||
| #endif | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -1,5 +1,6 @@ | |||
| #ifndef BFC_MEMORY | |||
| /** | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -55,3 +56,4 @@ int TransposeCPUKernel::DoTransposeMultiThread(int task_id) { | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Transpose, LiteKernelCreator<TransposeCPUKernel>) | |||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Transpose, LiteKernelCreator<TransposeCPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| #endif | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_ARM_FP32_TRANSPOSE_FP32_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_ARM_FP32_TRANSPOSE_FP32_H_ | |||
| #ifndef BFC_MEMORY | |||
| #include <vector> | |||
| #include "src/runtime/kernel/arm/base/transpose_base.h" | |||
| @@ -36,3 +37,4 @@ class TransposeCPUKernel : public TransposeBaseCPUKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_ARM_FP32_TRANSPOSE_FP32_H_ | |||
| #endif | |||
| @@ -0,0 +1,134 @@ | |||
| #ifdef BFC_MEMORY | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/transpose_server_fp32.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "nnacl/fp32/pack_fp32.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Transpose; | |||
| namespace mindspore::kernel { | |||
| namespace { | |||
| constexpr int64_t kMinCostPerThread = 1 << 18; | |||
| } | |||
| int TransposeServerCPUKernel::ReSize() { | |||
| auto ret = TransposeBaseCPUKernel::ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Do transpose resize failed."; | |||
| return ret; | |||
| } | |||
| if (!is_valid_ || opt_run_) { | |||
| return RET_OK; | |||
| } | |||
| ComputeIndividualOfflineInfo(); | |||
| return ChooseThreadCuttingStrategy(); | |||
| } | |||
| void TransposeServerCPUKernel::ComputeIndividualOfflineInfo() { | |||
| MS_ASSERT(param_->num_axes_ >= C3NUM); | |||
| overflow_points_.resize(param_->num_axes_); | |||
| for (int i = 0; i < param_->num_axes_; ++i) { | |||
| overflow_points_[i] = (out_shape_[i] - 1); | |||
| } | |||
| strides_.resize(param_->num_axes_); | |||
| for (int i = 0; i < param_->num_axes_; ++i) { | |||
| strides_[i] = param_->strides_[param_->perm_[i]]; | |||
| } | |||
| std::vector<int64_t> in_strides_temp = strides_; | |||
| for (int i = param_->num_axes_ - C2NUM; i >= 0; --i) { | |||
| strides_[i] = | |||
| strides_[i] - in_strides_temp[i + 1] - in_strides_temp[i + 1] * overflow_points_[i + 1] + strides_[i + 1]; | |||
| } | |||
| } | |||
| int TransposeServerCPUKernel::ChooseThreadCuttingStrategy() { | |||
| block_boundary_infos_.clear(); | |||
| int64_t element_num = in_tensors_.front()->ElementsNum(); | |||
| if (element_num <= kMinCostPerThread) { | |||
| thread_num_ = 1; | |||
| } else { | |||
| thread_num_ = MSMIN(op_parameter_->thread_num_, UP_DIV(element_num, kMinCostPerThread)); | |||
| } | |||
| if (thread_num_ < 1) { | |||
| thread_num_ = 1; | |||
| } | |||
| if (thread_num_ > C4NUM) { | |||
| thread_num_ = C4NUM; | |||
| } | |||
| int64_t block_size = element_num / thread_num_; | |||
| int64_t remain_data = element_num - block_size * thread_num_; | |||
| int64_t split_point = 0; | |||
| block_boundary_infos_.clear(); | |||
| std::vector<int64_t> post_multi(param_->num_axes_, 1); | |||
| for (int i = param_->num_axes_ - C2NUM; i >= 0; --i) { | |||
| post_multi[i] = post_multi[i + 1] * out_shape_[i + 1]; | |||
| } | |||
| while (split_point < element_num) { | |||
| TransposeBlockBoundaryInfo block_boundary_info; | |||
| int64_t in_offset = 0; | |||
| block_boundary_info.out_start_offset = split_point; | |||
| for (int i = 0; i < param_->num_axes_; ++i) { | |||
| block_boundary_info.start_dim[i] = split_point / post_multi[i] % out_shape_[i]; | |||
| in_offset += block_boundary_info.start_dim[i] * param_->strides_[param_->perm_[i]]; | |||
| } | |||
| block_boundary_info.in_offsets[0] = in_offset; | |||
| split_point += block_size; | |||
| if (remain_data > 0) { | |||
| ++split_point; | |||
| --remain_data; | |||
| } | |||
| if (split_point > element_num) { | |||
| split_point = element_num; | |||
| } | |||
| int64_t size = split_point - block_boundary_info.out_start_offset; | |||
| int last_axis_index = param_->num_axes_ - 1; | |||
| block_boundary_info.sizes[0] = | |||
| MSMIN(size, out_shape_[last_axis_index] - block_boundary_info.start_dim[last_axis_index]); | |||
| size -= block_boundary_info.sizes[0]; | |||
| block_boundary_info.sizes[1] = DOWN_ROUND(size, out_shape_[last_axis_index]); | |||
| block_boundary_info.sizes[C2NUM] = size - block_boundary_info.sizes[1]; | |||
| int64_t out_offset = block_boundary_info.out_start_offset + block_boundary_info.sizes[0]; | |||
| in_offset = 0; | |||
| for (int i = 0; i < param_->num_axes_; ++i) { | |||
| block_boundary_info.start_dim[i] = out_offset / post_multi[i] % out_shape_[i]; | |||
| in_offset += block_boundary_info.start_dim[i] * param_->strides_[param_->perm_[i]]; | |||
| } | |||
| block_boundary_info.in_offsets[1] = in_offset; | |||
| block_boundary_infos_.push_back(block_boundary_info); | |||
| } | |||
| thread_num_ = block_boundary_infos_.size(); | |||
| return RET_OK; | |||
| } | |||
| int TransposeServerCPUKernel::DoTransposeSingleThread() { return DoTransposeMultiThread(0); } | |||
| int TransposeServerCPUKernel::DoTransposeMultiThread(int task_id) { | |||
| if (opt_run_) { | |||
| PackNHWCToNCHWFp32(in_data_, out_data_, opt_param_[FIRST_INPUT], opt_param_[SECOND_INPUT], opt_param_[THIRD_INPUT], | |||
| task_id, thread_num_); | |||
| return RET_OK; | |||
| } | |||
| DoTransposeServer(static_cast<float *>(in_data_), static_cast<float *>(out_data_), overflow_points_.data(), | |||
| strides_.data(), param_->num_axes_, &block_boundary_infos_[task_id]); | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Transpose, LiteKernelCreator<TransposeServerCPUKernel>) | |||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Transpose, LiteKernelCreator<TransposeServerCPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| #endif | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_ARM_FP32_TRANSPOSE_SERVER_FP32_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_ARM_FP32_TRANSPOSE_SERVER_FP32_H_ | |||
| #ifdef BFC_MEMORY | |||
| #include <vector> | |||
| #include "src/runtime/kernel/arm/base/transpose_base.h" | |||
| #include "nnacl/fp32/transpose_server_fp32.h" | |||
| namespace mindspore::kernel { | |||
| class TransposeServerCPUKernel : public TransposeBaseCPUKernel { | |||
| public: | |||
| explicit TransposeServerCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : TransposeBaseCPUKernel(param, inputs, outputs, ctx) {} | |||
| ~TransposeServerCPUKernel() override = default; | |||
| int ReSize() override; | |||
| private: | |||
| void ComputeIndividualOfflineInfo(); | |||
| int ChooseThreadCuttingStrategy(); | |||
| int DoTransposeSingleThread() override; | |||
| int DoTransposeMultiThread(int task_id) override; | |||
| std::vector<int64_t> overflow_points_; | |||
| std::vector<int64_t> strides_; | |||
| std::vector<TransposeBlockBoundaryInfo> block_boundary_infos_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_ARM_FP32_TRANSPOSE_SERVER_FP32_H_ | |||
| #endif | |||