Merge pull request !6292 from wangzhe/mastertags/v1.0.0
| @@ -10,6 +10,8 @@ file(GLOB KERNEL_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic_self.c | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic.c | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/matmul.c | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/reduce.c | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic.c | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/fp32/*.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/common/*.cc | |||
| ) | |||
| @@ -0,0 +1,238 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "internal/src/kernel/fp32/arithmetic.h" | |||
| #include "internal/src/lite_log.h" | |||
| #include "internal/include/errorcode.h" | |||
| #include "internal/include/model.h" | |||
| #include "internal/include/ms_tensor.h" | |||
| #include "internal/include/lite_utils.h" | |||
| #include "src/runtime/allocator.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/fp32/arithmetic.h" | |||
| #include "schema/ops_generated.h" | |||
| typedef int (*ArithmeticRun)(float *input0, float *input1, float *output, int element_size); | |||
| typedef int (*ArithmeticOptRun)(float *input0, float *input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int BroadcastRun(float *input0, float *input1, float *output, int dim, int out_count, int break_pos, | |||
| ArithmeticRun arithmetic_run, ArithmeticParameter *params) { | |||
| if (dim > break_pos) { | |||
| return arithmetic_run(input0, input1, output, out_count); | |||
| } | |||
| for (int i = 0; i < params->out_shape_[dim]; ++i) { | |||
| int pos0_ = params->in_shape0_[dim] == 1 ? 0 : i; | |||
| int pos1_ = params->in_shape1_[dim] == 1 ? 0 : i; | |||
| int error_code = | |||
| BroadcastRun(input0 + pos0_ * params->in_strides0_[dim], input1 + pos1_ * params->in_strides1_[dim], | |||
| output + i * params->out_strides_[dim], dim + 1, out_count, break_pos, arithmetic_run, params); | |||
| if (error_code != RET_OK) { | |||
| return error_code; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int CalBroadCasting(const TensorPtrVector &in_tensors, int *outside, int *break_pos, ArithmeticParameter *params) { | |||
| params->broadcasting_ = false; | |||
| for (int i = 0; i < params->ndim_; i++) { | |||
| if (params->in_shape0_[i] != params->in_shape1_[i]) { | |||
| if (params->in_shape0_[i] == 1) { | |||
| params->out_shape_[i] = params->in_shape1_[i]; | |||
| } else if (params->in_shape1_[i] == 1) { | |||
| params->out_shape_[i] = params->in_shape0_[i]; | |||
| } else { | |||
| LITE_ERROR_LOG("shapes of input tensors can not be broadCasted"); | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| params->broadcasting_ = true; | |||
| } else { | |||
| params->out_shape_[i] = params->in_shape0_[i]; | |||
| } | |||
| } | |||
| if (params->broadcasting_) { | |||
| *outside = 1; | |||
| for (auto i = params->ndim_ - 1; i >= 0; --i) { | |||
| if (params->in_shape0_[i] != params->in_shape1_[i]) { | |||
| *break_pos = i; | |||
| break; | |||
| } | |||
| (*outside) *= params->out_shape_[i]; | |||
| } | |||
| ComputeStrides(params->in_shape0_, params->in_strides0_, params->ndim_); | |||
| ComputeStrides(params->in_shape1_, params->in_strides1_, params->ndim_); | |||
| ComputeStrides(params->out_shape_, params->out_strides_, params->ndim_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int RunArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, ArithmeticRun arithmetic_run, | |||
| ArithmeticOptRun arithmetic_opt_run, int outside, int break_pos, ArithmeticParameter *params) { | |||
| int error_code = RET_OK; | |||
| int count = out_tensors[0]->ElementsNum(); | |||
| float *input0_data = reinterpret_cast<float *>(in_tensors[0]->data_); | |||
| float *input1_data1 = reinterpret_cast<float *>(in_tensors[1]->data_); | |||
| float *output_data = reinterpret_cast<float *>(out_tensors[0]->data_); | |||
| if (params->broadcasting_) { | |||
| error_code = BroadcastRun(input0_data, input1_data1, output_data, 0, outside, break_pos, arithmetic_run, params); | |||
| } else if (arithmetic_opt_run != NULL) { | |||
| error_code = arithmetic_opt_run(input0_data, input1_data1, output_data, count, params); | |||
| } else { | |||
| error_code = arithmetic_run(input0_data, input1_data1, output_data, count); | |||
| } | |||
| if (error_code != RET_OK) { | |||
| return error_code; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) { | |||
| if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) { | |||
| LITE_ERROR_LOG("input tensors num not correct or input data is NULL!") | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (out_tensors.size() != 1) { | |||
| LITE_ERROR_LOG("output tensors num not correct!") | |||
| return RET_ERROR; | |||
| } | |||
| ShapeVector in_shape0 = in_tensors[0]->shape_; | |||
| ShapeVector in_shape1 = in_tensors[1]->shape_; | |||
| int ndim0 = in_shape0.size(); | |||
| int ndim1 = in_shape1.size(); | |||
| ArithmeticParameter *arithmeticParameter = (ArithmeticParameter *)param; | |||
| if (ndim0 < ndim1) { | |||
| arithmeticParameter->ndim_ = ndim1; | |||
| int fill_dim_num = ndim1 - ndim0; | |||
| int j = 0; | |||
| for (size_t i = 0; i < ndim1; i++) { | |||
| if (i < fill_dim_num) { | |||
| arithmeticParameter->in_shape0_[i] = 1; | |||
| } else { | |||
| arithmeticParameter->in_shape0_[i] = in_shape0[j++]; | |||
| } | |||
| arithmeticParameter->in_shape1_[i] = in_shape1[i]; | |||
| } | |||
| } else if (ndim0 > ndim1) { | |||
| arithmeticParameter->ndim_ = ndim0; | |||
| int fill_dim_num = ndim0 - ndim1; | |||
| int j = 0; | |||
| for (size_t i = 0; i < ndim0; i++) { | |||
| if (i < fill_dim_num) { | |||
| arithmeticParameter->in_shape1_[i] = 1; | |||
| } else { | |||
| arithmeticParameter->in_shape1_[i] = in_shape1[j++]; | |||
| } | |||
| arithmeticParameter->in_shape0_[i] = in_shape0[i]; | |||
| } | |||
| } else { | |||
| arithmeticParameter->ndim_ = ndim0; | |||
| for (size_t i = 0; i < ndim0; i++) { | |||
| arithmeticParameter->in_shape0_[i] = in_shape0[i]; | |||
| arithmeticParameter->in_shape1_[i] = in_shape1[i]; | |||
| } | |||
| } | |||
| ShapeVector out_shape; | |||
| for (int i = 0; i < arithmeticParameter->ndim_; i++) { | |||
| if (arithmeticParameter->in_shape0_[i] != arithmeticParameter->in_shape1_[i]) { | |||
| if (arithmeticParameter->in_shape0_[i] == 1) { | |||
| out_shape.push_back(arithmeticParameter->in_shape1_[i]); | |||
| } else if (arithmeticParameter->in_shape1_[i] == 1) { | |||
| out_shape.push_back(arithmeticParameter->in_shape0_[i]); | |||
| } else { | |||
| LITE_ERROR_LOG("shapes of input tensors can not be broadcasted!") | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| } else { | |||
| out_shape.push_back(arithmeticParameter->in_shape0_[i]); | |||
| } | |||
| } | |||
| out_tensors[0]->shape_ = out_shape; | |||
| out_tensors[0]->data_type_ = in_tensors[0]->data_type_; | |||
| out_tensors[0]->format_ = in_tensors[0]->format_; | |||
| return RET_OK; | |||
| } | |||
| int ChooseKernel(const int kernel_type, ArithmeticRun *arithmetic_run, ArithmeticParameter *params) { | |||
| if (kernel_type == KernelType::Mul) { | |||
| if (params->activation_type_ == mindspore::schema::ActivationType_RELU) { | |||
| *arithmetic_run = ElementMulRelu; | |||
| } else if (params->activation_type_ == mindspore::schema::ActivationType_RELU6) { | |||
| *arithmetic_run = ElementMulRelu6; | |||
| } else { | |||
| *arithmetic_run = ElementMul; | |||
| } | |||
| } else { | |||
| LITE_ERROR_LOG("unsupported operator type"); | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ChooseOptKernel(const int kernel_type, ArithmeticOptRun *arithmetic_opt_run, ArithmeticParameter *params) { | |||
| if (kernel_type == KernelType::Mul) { | |||
| if (params->activation_type_ == mindspore::schema::ActivationType_RELU) { | |||
| *arithmetic_opt_run = ElementOptMulRelu; | |||
| } else if (params->activation_type_ == mindspore::schema::ActivationType_RELU6) { | |||
| *arithmetic_opt_run = ElementOptMulRelu6; | |||
| } else { | |||
| *arithmetic_opt_run = ElementOptMul; | |||
| } | |||
| } else { | |||
| LITE_INFO_LOG("kernel not have opt version"); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DoArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node, | |||
| mindspore::lite::Allocator *allocator) { | |||
| if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) { | |||
| LITE_ERROR_LOG("input tensors num not correct or input data is NULL!") | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) { | |||
| LITE_ERROR_LOG("output tensors num not correct or output data is NULL!") | |||
| return RET_ERROR; | |||
| } | |||
| if (allocator == NULL) { | |||
| LITE_ERROR_LOG("allocator is NULL!") | |||
| return RET_ERROR; | |||
| } | |||
| ArithmeticParameter *params = reinterpret_cast<ArithmeticParameter *>(node->primitive_); | |||
| ArithmeticRun arithmetic_run = NULL; | |||
| int kernel_type = params->op_parameter_.type_; | |||
| int status = ChooseKernel(kernel_type, &arithmetic_run, params); | |||
| if (status != RET_OK) { | |||
| return status; | |||
| } | |||
| int outside = 0; | |||
| int break_pos = 0; | |||
| // when one of input only has one element | |||
| params->in_elements_num0_ = in_tensors[0]->ElementsNum(); | |||
| params->in_elements_num1_ = in_tensors[1]->ElementsNum(); | |||
| params->out_elements_num_ = out_tensors[0]->ElementsNum(); | |||
| ArithmeticOptRun arithmetic_opt_run = NULL; | |||
| if (params->in_elements_num0_ == 1 || params->in_elements_num1_ == 1) { | |||
| params->broadcasting_ = false; | |||
| ChooseOptKernel(kernel_type, &arithmetic_opt_run, params); | |||
| } else { | |||
| int ret = CalBroadCasting(in_tensors, &outside, &break_pos, params); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| } | |||
| return RunArithmetic(in_tensors, out_tensors, arithmetic_run, arithmetic_opt_run, outside, break_pos, params); | |||
| } | |||
| @@ -0,0 +1,29 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_ | |||
| #define INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_ | |||
| #include "internal/include/model.h" | |||
| #include "internal/include/lite_utils.h" | |||
| #include "src/runtime/allocator.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param); | |||
| int DoArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node, | |||
| mindspore::lite::Allocator *allocator); | |||
| #endif // INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_ | |||
| @@ -0,0 +1,82 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "internal/src/kernel/fp32/bias_add.h" | |||
| #include "internal/include/model.h" | |||
| #include "internal/include/ms_tensor.h" | |||
| #include "internal/include/lite_utils.h" | |||
| #include "src/runtime/allocator.h" | |||
| #include "internal/src/lite_log.h" | |||
| #include "internal/include/errorcode.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/fp32/arithmetic.h" | |||
| int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) { | |||
| if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) { | |||
| LITE_ERROR_LOG("input tensors num not correct or input data is NULL!") | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (out_tensors.size() != 1) { | |||
| LITE_ERROR_LOG("output tensors num not correct!") | |||
| return RET_ERROR; | |||
| } | |||
| out_tensors[0]->shape_ = in_tensors[0]->shape_; | |||
| out_tensors[0]->data_type_ = in_tensors[0]->data_type_; | |||
| out_tensors[0]->format_ = in_tensors[0]->format_; | |||
| return RET_OK; | |||
| } | |||
| int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node, | |||
| mindspore::lite::Allocator *allocator) { | |||
| if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) { | |||
| LITE_ERROR_LOG("input tensors num not correct or input data is NULL!") | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) { | |||
| LITE_ERROR_LOG("output tensors num not correct or output data is NULL!") | |||
| return RET_ERROR; | |||
| } | |||
| if (allocator == NULL) { | |||
| LITE_ERROR_LOG("allocator is NULL!") | |||
| return RET_ERROR; | |||
| } | |||
| ArithmeticParameter *params = reinterpret_cast<ArithmeticParameter *>(node->primitive_); | |||
| ShapeVector dims = in_tensors[0]->shape_; | |||
| params->ndim_ = dims.size(); | |||
| for (size_t i = 0; i < params->ndim_; i++) { | |||
| params->in_shape0_[i] = dims[i]; | |||
| params->in_shape1_[i] = 1; | |||
| params->out_shape_[i] = dims[i]; | |||
| } | |||
| params->in_shape1_[params->ndim_ - 1] = dims[params->ndim_ - 1]; | |||
| float *in = reinterpret_cast<float *>(in_tensors[0]->data_); | |||
| float *bias = reinterpret_cast<float *>(in_tensors[1]->data_); | |||
| float *out = reinterpret_cast<float *>(out_tensors[0]->data_); | |||
| size_t data_size = in_tensors[0]->ElementsNum(); | |||
| float *tile_in = reinterpret_cast<float *>(allocator->Malloc(data_size * sizeof(float))); | |||
| float *tile_bias = reinterpret_cast<float *>(allocator->Malloc(data_size * sizeof(float))); | |||
| if (tile_in == NULL || tile_bias == NULL) { | |||
| LITE_ERROR_LOG("Memory allocation failed!") | |||
| allocator->Free(tile_in); | |||
| allocator->Free(tile_bias); | |||
| return RET_ERROR; | |||
| } | |||
| BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, params); | |||
| allocator->Free(tile_in); | |||
| allocator->Free(tile_bias); | |||
| return RET_OK; | |||
| } | |||
| @@ -0,0 +1,28 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_ | |||
| #define INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_ | |||
| #include "internal/include/model.h" | |||
| #include "internal/include/lite_utils.h" | |||
| #include "src/runtime/allocator.h" | |||
| int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param); | |||
| int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node, | |||
| mindspore::lite::Allocator *allocator); | |||
| #endif // INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_ | |||
| @@ -0,0 +1,233 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "internal/src/kernel/fp32/reduce.h" | |||
| #include <vector> | |||
| #include "internal/include/model.h" | |||
| #include "internal/include/lite_utils.h" | |||
| #include "src/runtime/allocator.h" | |||
| #include "internal/src/lite_log.h" | |||
| #include "internal/include/errorcode.h" | |||
| #include "nnacl/reduce_parameter.h" | |||
| #include "nnacl/fp32/reduce.h" | |||
| #include "schema/ops_generated.h" | |||
| typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data, | |||
| float *dst_data, const int tid, const int thread_num); | |||
| int MallocTmpBuffer(std::vector<float *> *data_buffers, const ShapeVector &shape, const int *axes, const int num_axes, | |||
| mindspore::lite::Allocator *allocator) { | |||
| for (int i = 0; i < data_buffers->size(); ++i) { | |||
| if (data_buffers->at(i) != NULL) { | |||
| free(data_buffers->at(i)); | |||
| data_buffers->at(i) = NULL; | |||
| } | |||
| } | |||
| data_buffers->clear(); | |||
| ShapeVector input_shape = shape; | |||
| const int rank = input_shape.size(); | |||
| for (auto i = 0; i < num_axes - 1; i++) { | |||
| int axis = axes[i]; | |||
| size_t size = 1; | |||
| for (int j = 0; j < rank; j++) { | |||
| if (axis != j) { | |||
| size *= input_shape[j]; | |||
| } | |||
| } | |||
| float *buffer = reinterpret_cast<float *>(allocator->Malloc(size * sizeof(float))); | |||
| if (buffer == NULL) { | |||
| LITE_ERROR_LOG("Memory allocation failed!") | |||
| return RET_ERROR; | |||
| } | |||
| data_buffers->emplace_back(buffer); | |||
| input_shape[axis] = 1; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int FreeTmpBuffer(std::vector<float *> *data_buffers, mindspore::lite::Allocator *allocator) { | |||
| for (int i = 0; i < data_buffers->size(); ++i) { | |||
| allocator->Free(data_buffers->at(i)); | |||
| } | |||
| data_buffers->clear(); | |||
| return RET_OK; | |||
| } | |||
| int RunReduce(Reducer reducer, std::vector<float *> data_buffers, float *in_data, float *out_data, Int32Vector axes, | |||
| ShapeVector shape) { | |||
| int rank = shape.size(); | |||
| float *dst_data = NULL; | |||
| float *src_data = in_data; | |||
| ShapeVector tmp_shape = shape; | |||
| for (size_t i = 0; i < axes.size(); ++i) { | |||
| if (i != axes.size() - 1) { | |||
| dst_data = data_buffers[i]; | |||
| } else { | |||
| dst_data = out_data; | |||
| } | |||
| int axis = axes[i]; | |||
| int outer_size = 1; | |||
| for (int j = 0; j < axis; j++) { | |||
| outer_size *= tmp_shape[j]; | |||
| } | |||
| int inner_size = 1; | |||
| for (int k = axis + 1; k < rank; k++) { | |||
| inner_size *= tmp_shape[k]; | |||
| } | |||
| int axis_size = tmp_shape[axis]; | |||
| int error_code = reducer(outer_size, inner_size, axis_size, src_data, dst_data, 0, 1); | |||
| if (error_code != RET_OK) { | |||
| LITE_ERROR_LOG("Reduce run error!") | |||
| return RET_ERROR; | |||
| } | |||
| tmp_shape[axis] = 1; | |||
| src_data = dst_data; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) { | |||
| if (in_tensors.size() != 1 || in_tensors[0]->data_ == NULL) { | |||
| LITE_ERROR_LOG("input tensors num not correct or input data is NULL!") | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (out_tensors.size() != 1) { | |||
| LITE_ERROR_LOG("output tensors num not correct!") | |||
| return RET_ERROR; | |||
| } | |||
| ReduceParameter *reduceParameter = reinterpret_cast<ReduceParameter *>(param); | |||
| bool keep_dims = reduceParameter->keep_dims_; | |||
| int num_axes = reduceParameter->num_axes_; | |||
| ShapeVector in_shape = in_tensors[0]->shape_; | |||
| int rank = in_shape.size(); | |||
| Int32Vector out_shape; | |||
| Int32Vector axes; | |||
| int actual_axes_num = num_axes; | |||
| for (int i = 0; i < num_axes; ++i) { | |||
| if (reduceParameter->axes_[i] < -rank || reduceParameter->axes_[i] >= rank) { | |||
| LITE_ERROR_LOG("reduce_sum got invalid axis!") | |||
| return RET_ERROR; | |||
| } | |||
| if (reduceParameter->axes_[i] < 0) { | |||
| axes.push_back(reduceParameter->axes_[i] + rank); | |||
| } else { | |||
| axes.push_back(reduceParameter->axes_[i]); | |||
| } | |||
| } | |||
| if (reduceParameter->reduce_to_end_) { | |||
| if (num_axes != 1) { | |||
| LITE_ERROR_LOG("Reduce when reduce_to_end, num of axis should be 1!") | |||
| return RET_ERROR; | |||
| } | |||
| int begin_axis = axes[0]; | |||
| num_axes = rank - begin_axis; | |||
| for (auto i = begin_axis + 1; i < rank; ++i) { | |||
| axes[actual_axes_num++] = i; | |||
| } | |||
| } | |||
| if (num_axes == 0) { | |||
| axes.resize(rank); | |||
| for (size_t i = 0; i < rank; i++) { | |||
| axes[i] = i; | |||
| if (keep_dims) { | |||
| out_shape.push_back(1); | |||
| } | |||
| } | |||
| reduceParameter->num_axes_ = axes.size(); | |||
| for (int i = 0; i < axes.size(); ++i) { | |||
| reduceParameter->axes_[i] = axes[i]; | |||
| } | |||
| out_tensors[0]->shape_ = out_shape; | |||
| out_tensors[0]->data_type_ = in_tensors[0]->data_type_; | |||
| out_tensors[0]->format_ = in_tensors[0]->format_; | |||
| return RET_OK; | |||
| } | |||
| // reduce on selected axes | |||
| for (size_t i = 0; i < rank; i++) { | |||
| bool reduce_axis = false; | |||
| for (size_t idx = 0; idx < num_axes; ++idx) { | |||
| if (axes[idx] == i) { | |||
| reduce_axis = true; | |||
| break; | |||
| } | |||
| } | |||
| if (reduce_axis) { | |||
| if (keep_dims) { | |||
| out_shape.push_back(1); | |||
| } | |||
| } else { | |||
| out_shape.push_back(in_shape[i]); | |||
| } | |||
| } | |||
| reduceParameter->num_axes_ = axes.size(); | |||
| for (int i = 0; i < axes.size(); ++i) { | |||
| reduceParameter->axes_[i] = axes[i]; | |||
| } | |||
| out_tensors[0]->shape_ = out_shape; | |||
| out_tensors[0]->data_type_ = in_tensors[0]->data_type_; | |||
| out_tensors[0]->format_ = in_tensors[0]->format_; | |||
| return RET_OK; | |||
| } | |||
| int DoReduce(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node, | |||
| mindspore::lite::Allocator *allocator) { | |||
| if (in_tensors.size() != 1 || in_tensors[0]->data_ == NULL) { | |||
| LITE_ERROR_LOG("input tensors num not correct or input data is NULL!") | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) { | |||
| LITE_ERROR_LOG("output tensors num not correct or output data is NULL!") | |||
| return RET_ERROR; | |||
| } | |||
| if (allocator == NULL) { | |||
| LITE_ERROR_LOG("allocator is NULL!") | |||
| return RET_ERROR; | |||
| } | |||
| ReduceParameter *params = reinterpret_cast<ReduceParameter *>(node->primitive_); | |||
| Reducer reducer = NULL; | |||
| if (params->mode_ == mindspore::schema::ReduceMode::ReduceMode_ReduceSum) { | |||
| reducer = ReduceSum; | |||
| } else if (params->mode_ == mindspore::schema::ReduceMode::ReduceMode_ReduceMean) { | |||
| reducer = ReduceMean; | |||
| } | |||
| std::vector<float *> data_buffers; | |||
| int status = MallocTmpBuffer(&data_buffers, in_tensors[0]->shape_, params->axes_, params->num_axes_, allocator); | |||
| if (status != RET_OK) { | |||
| FreeTmpBuffer(&data_buffers, allocator); | |||
| return status; | |||
| } | |||
| Int32Vector axes; | |||
| for (int i = 0; i < params->num_axes_; ++i) { | |||
| axes.push_back(params->axes_[i]); | |||
| } | |||
| status = RunReduce(reducer, data_buffers, reinterpret_cast<float *>(in_tensors[0]->data_), | |||
| reinterpret_cast<float *>(out_tensors[0]->data_), axes, in_tensors[0]->shape_); | |||
| if (status != RET_OK) { | |||
| return status; | |||
| } | |||
| status = FreeTmpBuffer(&data_buffers, allocator); | |||
| if (status != RET_OK) { | |||
| return status; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -0,0 +1,29 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_ | |||
| #define INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_ | |||
| #include "internal/include/model.h" | |||
| #include "internal/include/ms_tensor.h" | |||
| #include "internal/include/lite_utils.h" | |||
| #include "src/runtime/allocator.h" | |||
| int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param); | |||
| int DoReduce(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node, | |||
| mindspore::lite::Allocator *allocator); | |||
| #endif // INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_ | |||
| @@ -33,6 +33,7 @@ class InferTest : public mindspore::CommonTest { | |||
| TEST_F(InferTest, TestSession) { | |||
| Model model; | |||
| Node node; | |||
| node.name_ = String("node"); | |||
| model.nodes_.push_back(&node); | |||
| node.node_type_ = NodeType::NodeType_CNode; | |||
| @@ -64,7 +65,7 @@ TEST_F(InferTest, TestSession) { | |||
| TensorPtrVector outvec = session.GetOutputs(); | |||
| ASSERT_EQ(outvec.size(), 1); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << *(reinterpret_cast<float *>(outvec.at(0)->data_)+ i) << " "; | |||
| std::cout << *(reinterpret_cast<float *>(outvec.at(0)->data_) + i) << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(reinterpret_cast<float *>(outvec.at(0)->data_), expect_out, kOutSize, 0.000001); | |||