From: @fuzhiye Reviewed-by: @zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tongtags/v1.2.0-rc1
| @@ -13,21 +13,16 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_FP32_STACK_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP32_STACK_H_ | |||||
| #include "nnacl/base/stack_base.h" | |||||
| #include "nnacl/op_base.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| void DoStack(const float *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis, | |||||
| float *output); | |||||
| void DoStackInt32(const int32_t *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis, | |||||
| int32_t *output); | |||||
| void DoStackOneInput(const int8_t *input, int8_t *output, size_t data_size); | |||||
| #ifdef __cplusplus | |||||
| void Stack(char **inputs, char *output, size_t input_num, size_t copy_size, size_t outter_size) { | |||||
| size_t in_offset = 0; | |||||
| size_t out_offset = 0; | |||||
| for (size_t i = 0; i < outter_size; ++i) { | |||||
| for (size_t j = 0; j < input_num; ++j) { | |||||
| memcpy(output + out_offset, inputs[j] + in_offset, copy_size); | |||||
| out_offset += copy_size; | |||||
| } | |||||
| in_offset += copy_size; | |||||
| } | |||||
| } | } | ||||
| #endif | |||||
| #endif // MINDSPORE_LITE_NNACL_FP32_STACK_H_ | |||||
| @@ -13,21 +13,18 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_ | |||||
| #ifndef MINDSPORE_LITE_NNACL_STACK_H_ | |||||
| #define MINDSPORE_LITE_NNACL_STACK_H_ | |||||
| #include <string.h> | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #ifdef ENABLE_NEON | |||||
| #include <arm_neon.h> | |||||
| #endif | |||||
| #include "nnacl/stack_parameter.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void DoStackFp16(const float16_t *const *inputs, size_t input_num, int *in_shape, size_t shape_size, int axis, | |||||
| float16_t *output); | |||||
| void Stack(char **inputs, char *output, size_t input_num, size_t copy_size, size_t outter_size); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_ | |||||
| #endif // MINDSPORE_LITE_NNACL_STACK_H_ | |||||
| @@ -1,54 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp16/stack_fp16.h" | |||||
| #include "nnacl/common_func.h" | |||||
| size_t Fp16GetStackCopyNum(int axis, int *in_shape, size_t shape_size) { | |||||
| size_t one_input_size = 1; | |||||
| for (size_t i = 0; i < shape_size; ++i) { | |||||
| one_input_size *= in_shape[i]; | |||||
| } | |||||
| int in_strides[4]; | |||||
| ComputeStrides(in_shape, in_strides, shape_size); | |||||
| size_t copy_num = axis > 0 ? in_strides[axis - 1] : one_input_size; | |||||
| return copy_num; | |||||
| } | |||||
| size_t Fp16GetStackPreAxisCount2(const int *in_shape, int axis) { | |||||
| size_t pre_axis_count = 1; | |||||
| for (size_t i = 0; i < axis; ++i) { | |||||
| pre_axis_count *= in_shape[i]; | |||||
| } | |||||
| return pre_axis_count; | |||||
| } | |||||
| void DoStackFp16(const float16_t *const *inputs, size_t input_num, int *in_shape, size_t shape_size, int axis, | |||||
| float16_t *output) { | |||||
| size_t copy_num = Fp16GetStackCopyNum(axis, in_shape, shape_size); | |||||
| size_t copy_size = copy_num * sizeof(float16_t); | |||||
| size_t pre_axis_count = Fp16GetStackPreAxisCount2(in_shape, axis); | |||||
| size_t in_offset = 0; | |||||
| size_t out_offset = 0; | |||||
| for (size_t i = 0; i < pre_axis_count; ++i) { | |||||
| for (size_t j = 0; j < input_num; ++j) { | |||||
| memcpy(output + out_offset, inputs[j] + in_offset, copy_size); | |||||
| out_offset += copy_num; | |||||
| } | |||||
| in_offset += copy_num; | |||||
| } | |||||
| } | |||||
| @@ -1,72 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp32/stack_fp32.h" | |||||
| #include "nnacl/common_func.h" | |||||
| size_t GetStackCopyNum(int axis, const int *in_shape, size_t shape_size) { | |||||
| size_t one_input_size = 1; | |||||
| for (size_t i = 0; i < shape_size; ++i) { | |||||
| one_input_size *= in_shape[i]; | |||||
| } | |||||
| int in_strides[4]; | |||||
| ComputeStrides(in_shape, in_strides, shape_size); | |||||
| size_t copy_num = axis > 0 ? in_strides[axis - 1] : one_input_size; | |||||
| return copy_num; | |||||
| } | |||||
| size_t GetStackPreAxisCount(const int *in_shape, int axis) { | |||||
| size_t pre_axis_count = 1; | |||||
| for (size_t i = 0; i < axis; ++i) { | |||||
| pre_axis_count *= in_shape[i]; | |||||
| } | |||||
| return pre_axis_count; | |||||
| } | |||||
| void DoStack(const float *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis, | |||||
| float *output) { | |||||
| size_t copy_num = GetStackCopyNum(axis, in_shape, shape_size); | |||||
| size_t copy_size = copy_num * sizeof(float); | |||||
| size_t pre_axis_count = GetStackPreAxisCount(in_shape, axis); | |||||
| size_t in_offset = 0; | |||||
| size_t out_offset = 0; | |||||
| for (size_t i = 0; i < pre_axis_count; ++i) { | |||||
| for (size_t j = 0; j < input_num; ++j) { | |||||
| memcpy(output + out_offset, inputs[j] + in_offset, copy_size); | |||||
| out_offset += copy_num; | |||||
| } | |||||
| in_offset += copy_num; | |||||
| } | |||||
| } | |||||
| void DoStackInt32(const int32_t *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis, | |||||
| int32_t *output) { | |||||
| size_t copy_num = GetStackCopyNum(axis, in_shape, shape_size); | |||||
| size_t copy_size = copy_num * sizeof(int32_t); | |||||
| size_t pre_axis_count = GetStackPreAxisCount(in_shape, axis); | |||||
| size_t in_offset = 0; | |||||
| size_t out_offset = 0; | |||||
| for (size_t i = 0; i < pre_axis_count; ++i) { | |||||
| for (size_t j = 0; j < input_num; ++j) { | |||||
| memcpy(output + out_offset, inputs[j] + in_offset, copy_size); | |||||
| out_offset += copy_num; | |||||
| } | |||||
| in_offset += copy_num; | |||||
| } | |||||
| } | |||||
| void DoStackOneInput(const int8_t *input, int8_t *output, size_t data_size) { memcpy(output, input, data_size); } | |||||
| @@ -60,6 +60,7 @@ | |||||
| #define kNHWC_C 3 | #define kNHWC_C 3 | ||||
| #define kInputSize1 2 | #define kInputSize1 2 | ||||
| #define kInputSize2 3 | #define kInputSize2 3 | ||||
| #define MAX_LEN 256 | |||||
| typedef enum LiteDataType { | typedef enum LiteDataType { | ||||
| kDataTypeFloat, | kDataTypeFloat, | ||||
| @@ -23,8 +23,6 @@ | |||||
| #include "nnacl/conv_parameter.h" | #include "nnacl/conv_parameter.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #define MAX_LEN 256 | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,90 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/runtime/kernel/arm/base/stack_base.h" | |||||
| #include <vector> | |||||
| #include "schema/model_generated.h" | |||||
| #include "src/kernel_registry.h" | |||||
| #include "nnacl/base/stack_base.h" | |||||
| #include "nnacl/stack_parameter.h" | |||||
| #include "include/errorcode.h" | |||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::lite::RET_ERROR; | |||||
| using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Stack; | |||||
| namespace mindspore::kernel { | |||||
| static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) { | |||||
| int copy_num = 1; | |||||
| if (axis > 0) { | |||||
| for (int j = n_dim - 1; j > axis - 1; j--) { | |||||
| copy_num *= in_shape[j]; | |||||
| } | |||||
| } else { | |||||
| for (int i = 0; i < n_dim; ++i) { | |||||
| copy_num *= in_shape[i]; | |||||
| } | |||||
| } | |||||
| return copy_num; | |||||
| } | |||||
| static size_t GetOutterSize(const std::vector<int> &in_shape, int axis) { | |||||
| size_t outter_size = 1; | |||||
| for (int i = 0; i < axis; ++i) { | |||||
| outter_size *= in_shape[i]; | |||||
| } | |||||
| return outter_size; | |||||
| } | |||||
| int StackBaseCPUKernel::ReSize() { | |||||
| auto param = reinterpret_cast<StackParameter *>(op_parameter_); | |||||
| auto input0_shape = in_tensors_.front()->shape(); | |||||
| axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() + 1 : param->axis_; | |||||
| auto input_nums = in_tensors_.size(); | |||||
| if (input_nums == 1) { | |||||
| copy_size_ = in_tensors_.front()->Size(); | |||||
| } else { | |||||
| MS_ASSERT(input_nums > 1); | |||||
| copy_size_ = GetCopyNum(input0_shape, axis_, input0_shape.size()) * data_type_size_; | |||||
| outter_size_ = GetOutterSize(input0_shape, axis_); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int StackBaseCPUKernel::Init() { | |||||
| auto input0_tensor = in_tensors_.front(); | |||||
| data_type_size_ = input0_tensor->Size() / input0_tensor->ElementsNum(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int StackBaseCPUKernel::Run() { | |||||
| size_t inputs_num = in_tensors_.size(); | |||||
| char **all_inputs = static_cast<char **>(context_->allocator->Malloc(inputs_num * sizeof(char *))); | |||||
| for (size_t j = 0; j < inputs_num; ++j) { | |||||
| all_inputs[j] = reinterpret_cast<char *>(in_tensors_.at(j)->data_c()); | |||||
| } | |||||
| auto output_data = reinterpret_cast<char *>(out_tensors_.at(0)->data_c()); | |||||
| Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outter_size_); | |||||
| context_->allocator->Free(all_inputs); | |||||
| return RET_OK; | |||||
| } | |||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>) | |||||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>) | |||||
| } // namespace mindspore::kernel | |||||
| @@ -13,21 +13,22 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/stack_parameter.h" | |||||
| using mindspore::lite::InnerContext; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class StackCPUKernel : public LiteKernel { | |||||
| class StackBaseCPUKernel : public LiteKernel { | |||||
| public: | public: | ||||
| StackCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||||
| const mindspore::lite::PrimitiveC *primitive) | |||||
| StackBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, | |||||
| const mindspore::lite::PrimitiveC *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~StackCPUKernel() = default; | |||||
| ~StackBaseCPUKernel() override = default; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -35,7 +36,9 @@ class StackCPUKernel : public LiteKernel { | |||||
| protected: | protected: | ||||
| int axis_ = 0; | int axis_ = 0; | ||||
| size_t data_type_size_ = 0; | |||||
| size_t copy_size_ = 0; | |||||
| size_t outter_size_ = 1; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_ | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_ | |||||
| @@ -21,7 +21,7 @@ | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/kernel/arm/fp16/common_fp16.h" | #include "src/runtime/kernel/arm/fp16/common_fp16.h" | ||||
| #include "nnacl/fp16/cast_fp16.h" | #include "nnacl/fp16/cast_fp16.h" | ||||
| #include "nnacl/fp16/stack_fp16.h" | |||||
| #include "nnacl/base/stack_base.h" | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| @@ -29,25 +29,18 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Stack; | using mindspore::schema::PrimitiveType_Stack; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int StackFp16CPUKernel::Init() { | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| void StackFp16CPUKernel::InitMallocFlags() { | void StackFp16CPUKernel::InitMallocFlags() { | ||||
| malloc_buffers_.resize(in_tensors_.size()); | malloc_buffers_.resize(in_tensors_.size()); | ||||
| for (size_t i = 0; i < in_tensors_.size(); ++i) { | for (size_t i = 0; i < in_tensors_.size(); ++i) { | ||||
| malloc_buffers_.at(i) = in_tensors_.at(i)->data_type() == kNumberTypeFloat32; | malloc_buffers_.at(i) = in_tensors_.at(i)->data_type() == kNumberTypeFloat32; | ||||
| } | } | ||||
| malloc_out = out_tensors_.at(0)->data_type() == kNumberTypeFloat32; | |||||
| malloc_out_ = out_tensors_.at(0)->data_type() == kNumberTypeFloat32; | |||||
| } | } | ||||
| int StackFp16CPUKernel::MallocAssignBuffer() { | int StackFp16CPUKernel::MallocAssignBuffer() { | ||||
| buffers_.resize(in_tensors_.size(), nullptr); | buffers_.resize(in_tensors_.size(), nullptr); | ||||
| for (size_t i = 0; i < in_tensors_.size(); ++i) { | for (size_t i = 0; i < in_tensors_.size(); ++i) { | ||||
| buffers_.at(i) = ConvertInputFp32toFp16(in_tensors_.at(i), context_); | |||||
| buffers_.at(i) = reinterpret_cast<char *>(ConvertInputFp32toFp16(in_tensors_.at(i), context_)); | |||||
| if (buffers_.at(i) == nullptr) { | if (buffers_.at(i) == nullptr) { | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -68,33 +61,33 @@ void StackFp16CPUKernel::FreeBuffer() { | |||||
| buffers_.at(i) = nullptr; | buffers_.at(i) = nullptr; | ||||
| } | } | ||||
| } | } | ||||
| if (malloc_out && out_buffer_ != nullptr) { | |||||
| if (malloc_out_ && out_buffer_ != nullptr) { | |||||
| context_->allocator->Free(out_buffer_); | context_->allocator->Free(out_buffer_); | ||||
| out_buffer_ = nullptr; | out_buffer_ = nullptr; | ||||
| } | } | ||||
| } | } | ||||
| int StackFp16CPUKernel::Run() { | |||||
| size_t inputs_num = in_tensors_.size(); | |||||
| auto input0 = in_tensors_.at(0); | |||||
| if (inputs_num == 1) { | |||||
| memcpy(out_tensors_.at(0)->MutableData(), input0->MutableData(), input0->Size()); | |||||
| int StackFp16CPUKernel::Init() { | |||||
| data_type_size_ = sizeof(float16_t); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int StackFp16CPUKernel::Run() { | |||||
| InitMallocFlags(); | InitMallocFlags(); | ||||
| auto ret = MallocAssignBuffer(); | auto ret = MallocAssignBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| FreeBuffer(); | FreeBuffer(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| auto input0_shape = input0->shape(); | |||||
| DoStackFp16(buffers_.data(), inputs_num, input0_shape.data(), input0_shape.size(), axis_, out_buffer_); | |||||
| Stack(buffers_.data(), reinterpret_cast<char *>(out_buffer_), in_tensors_.size(), copy_size_, outter_size_); | |||||
| // if output tensor is fp32, we need to transform | // if output tensor is fp32, we need to transform | ||||
| if (malloc_out) { | |||||
| if (malloc_out_) { | |||||
| auto out_tensor = out_tensors_.at(0); | auto out_tensor = out_tensors_.at(0); | ||||
| Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->MutableData()), out_tensor->ElementsNum()); | Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->MutableData()), out_tensor->ElementsNum()); | ||||
| } | } | ||||
| FreeBuffer(); | FreeBuffer(); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -18,16 +18,16 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "src/runtime/kernel/arm/fp32/stack_fp32.h" | |||||
| #include "src/runtime/kernel/arm/base/stack_base.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class StackFp16CPUKernel : public StackCPUKernel { | |||||
| class StackFp16CPUKernel : public StackBaseCPUKernel { | |||||
| public: | public: | ||||
| StackFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | StackFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : StackCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~StackFp16CPUKernel() = default; | |||||
| : StackBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~StackFp16CPUKernel() override = default; | |||||
| int Init() override; | int Init() override; | ||||
| int Run() override; | int Run() override; | ||||
| @@ -38,9 +38,9 @@ class StackFp16CPUKernel : public StackCPUKernel { | |||||
| private: | private: | ||||
| std::vector<bool> malloc_buffers_; | std::vector<bool> malloc_buffers_; | ||||
| std::vector<float16_t *> buffers_; | |||||
| std::vector<char *> buffers_; | |||||
| float16_t *out_buffer_; | float16_t *out_buffer_; | ||||
| bool malloc_out; | |||||
| bool malloc_out_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -114,7 +114,7 @@ int ConvolutionDelegateCPUKernel::Init() { | |||||
| } | } | ||||
| int ConvolutionDelegateCPUKernel::ReSize() { | int ConvolutionDelegateCPUKernel::ReSize() { | ||||
| // Updata shape info of input and output | |||||
| // Update shape info of input and output | |||||
| SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(), out_tensors_.front(), | SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(), out_tensors_.front(), | ||||
| context_); | context_); | ||||
| if (conv_kernel_ == nullptr) { | if (conv_kernel_ == nullptr) { | ||||
| @@ -1,81 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/runtime/kernel/arm/fp32/stack_fp32.h" | |||||
| #include <vector> | |||||
| #include "schema/model_generated.h" | |||||
| #include "src/kernel_registry.h" | |||||
| #include "nnacl/fp32/stack_fp32.h" | |||||
| #include "nnacl/stack_parameter.h" | |||||
| #include "include/errorcode.h" | |||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::lite::RET_ERROR; | |||||
| using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Stack; | |||||
| namespace mindspore::kernel { | |||||
| int StackCPUKernel::ReSize() { | |||||
| StackParameter *param = reinterpret_cast<StackParameter *>(op_parameter_); | |||||
| auto input0_shape = in_tensors_.at(0)->shape(); | |||||
| axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() + 1 : param->axis_; | |||||
| return RET_OK; | |||||
| } | |||||
| int StackCPUKernel::Init() { | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int StackCPUKernel::Run() { | |||||
| size_t inputs_num = in_tensors_.size(); | |||||
| auto input0 = in_tensors_.at(0); | |||||
| if (inputs_num == 1) { | |||||
| auto *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData()); | |||||
| MS_ASSERT(output_data); | |||||
| auto *input_data = reinterpret_cast<const int8_t *>(input0->MutableData()); | |||||
| MS_ASSERT(input_data); | |||||
| DoStackOneInput(input_data, output_data, input0->Size()); | |||||
| return RET_OK; | |||||
| } | |||||
| auto input0_shape = in_tensors_.at(0)->shape(); | |||||
| if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32 || in_tensors_.at(0)->data_type() == kNumberTypeFloat) { | |||||
| auto *output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||||
| MS_ASSERT(output_data); | |||||
| float *inputs[inputs_num]; | |||||
| for (size_t i = 0; i < inputs_num; ++i) { | |||||
| inputs[i] = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData()); | |||||
| MS_ASSERT(inputs[i]); | |||||
| } | |||||
| DoStack(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data); | |||||
| } else { | |||||
| auto *output_data = reinterpret_cast<int32_t *>(out_tensors_.at(0)->MutableData()); | |||||
| MS_ASSERT(output_data); | |||||
| int32_t *inputs[inputs_num]; | |||||
| for (size_t i = 0; i < inputs_num; ++i) { | |||||
| inputs[i] = reinterpret_cast<int32_t *>(in_tensors_.at(i)->MutableData()); | |||||
| MS_ASSERT(inputs[i]); | |||||
| } | |||||
| DoStackInt32(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Stack, LiteKernelCreator<StackCPUKernel>) | |||||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Stack, LiteKernelCreator<StackCPUKernel>) | |||||
| } // namespace mindspore::kernel | |||||
| @@ -14,7 +14,7 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/nnacl/fp32/stack_fp32.h" | |||||
| #include "mindspore/lite/nnacl/base/stack_base.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| class StackTestFp32 : public mindspore::CommonTest { | class StackTestFp32 : public mindspore::CommonTest { | ||||
| @@ -26,16 +26,15 @@ TEST_F(StackTestFp32, StackTest1) { | |||||
| float input0[6] = {1, 2, 3, 10, 20, 30}; | float input0[6] = {1, 2, 3, 10, 20, 30}; | ||||
| float input1[6] = {4, 5, 6, 40, 50, 60}; | float input1[6] = {4, 5, 6, 40, 50, 60}; | ||||
| float input2[6] = {7, 8, 9, 70, 80, 90}; | float input2[6] = {7, 8, 9, 70, 80, 90}; | ||||
| float *input[3]; | |||||
| input[0] = input0; | |||||
| input[1] = input1; | |||||
| input[2] = input2; | |||||
| char *input[3]; | |||||
| input[0] = reinterpret_cast<char *>(input0); | |||||
| input[1] = reinterpret_cast<char *>(input1); | |||||
| input[2] = reinterpret_cast<char *>(input2); | |||||
| std::vector<int> shape = {2, 3}; | std::vector<int> shape = {2, 3}; | ||||
| int axis = 2; | |||||
| constexpr int kOutSize = 18; | constexpr int kOutSize = 18; | ||||
| float expect_out[kOutSize] = {1, 4, 7, 2, 5, 8, 3, 6, 9, 10, 40, 70, 20, 50, 80, 30, 60, 90}; | float expect_out[kOutSize] = {1, 4, 7, 2, 5, 8, 3, 6, 9, 10, 40, 70, 20, 50, 80, 30, 60, 90}; | ||||
| float output[kOutSize]; | float output[kOutSize]; | ||||
| DoStack(input, 3, shape.data(), shape.size(), axis, output); | |||||
| Stack(input, reinterpret_cast<char *>(output), 3, 4, 6); | |||||
| for (float i : output) { | for (float i : output) { | ||||
| std::cout << i << " "; | std::cout << i << " "; | ||||
| } | } | ||||