From: @fuzhiye Reviewed-by: @zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tongtags/v1.2.0-rc1
| @@ -13,21 +13,16 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_STACK_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_STACK_H_ | |||
| #include "nnacl/base/stack_base.h" | |||
| #include "nnacl/op_base.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DoStack(const float *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis, | |||
| float *output); | |||
| void DoStackInt32(const int32_t *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis, | |||
| int32_t *output); | |||
| void DoStackOneInput(const int8_t *input, int8_t *output, size_t data_size); | |||
| #ifdef __cplusplus | |||
| void Stack(char **inputs, char *output, size_t input_num, size_t copy_size, size_t outter_size) { | |||
| size_t in_offset = 0; | |||
| size_t out_offset = 0; | |||
| for (size_t i = 0; i < outter_size; ++i) { | |||
| for (size_t j = 0; j < input_num; ++j) { | |||
| memcpy(output + out_offset, inputs[j] + in_offset, copy_size); | |||
| out_offset += copy_size; | |||
| } | |||
| in_offset += copy_size; | |||
| } | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_FP32_STACK_H_ | |||
| @@ -13,21 +13,18 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_ | |||
| #ifndef MINDSPORE_LITE_NNACL_STACK_H_ | |||
| #define MINDSPORE_LITE_NNACL_STACK_H_ | |||
| #include <string.h> | |||
| #include "nnacl/op_base.h" | |||
| #ifdef ENABLE_NEON | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #include "nnacl/stack_parameter.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DoStackFp16(const float16_t *const *inputs, size_t input_num, int *in_shape, size_t shape_size, int axis, | |||
| float16_t *output); | |||
| void Stack(char **inputs, char *output, size_t input_num, size_t copy_size, size_t outter_size); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_ | |||
| #endif // MINDSPORE_LITE_NNACL_STACK_H_ | |||
| @@ -1,54 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp16/stack_fp16.h" | |||
| #include "nnacl/common_func.h" | |||
| size_t Fp16GetStackCopyNum(int axis, int *in_shape, size_t shape_size) { | |||
| size_t one_input_size = 1; | |||
| for (size_t i = 0; i < shape_size; ++i) { | |||
| one_input_size *= in_shape[i]; | |||
| } | |||
| int in_strides[4]; | |||
| ComputeStrides(in_shape, in_strides, shape_size); | |||
| size_t copy_num = axis > 0 ? in_strides[axis - 1] : one_input_size; | |||
| return copy_num; | |||
| } | |||
| size_t Fp16GetStackPreAxisCount2(const int *in_shape, int axis) { | |||
| size_t pre_axis_count = 1; | |||
| for (size_t i = 0; i < axis; ++i) { | |||
| pre_axis_count *= in_shape[i]; | |||
| } | |||
| return pre_axis_count; | |||
| } | |||
| void DoStackFp16(const float16_t *const *inputs, size_t input_num, int *in_shape, size_t shape_size, int axis, | |||
| float16_t *output) { | |||
| size_t copy_num = Fp16GetStackCopyNum(axis, in_shape, shape_size); | |||
| size_t copy_size = copy_num * sizeof(float16_t); | |||
| size_t pre_axis_count = Fp16GetStackPreAxisCount2(in_shape, axis); | |||
| size_t in_offset = 0; | |||
| size_t out_offset = 0; | |||
| for (size_t i = 0; i < pre_axis_count; ++i) { | |||
| for (size_t j = 0; j < input_num; ++j) { | |||
| memcpy(output + out_offset, inputs[j] + in_offset, copy_size); | |||
| out_offset += copy_num; | |||
| } | |||
| in_offset += copy_num; | |||
| } | |||
| } | |||
| @@ -1,72 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp32/stack_fp32.h" | |||
| #include "nnacl/common_func.h" | |||
| size_t GetStackCopyNum(int axis, const int *in_shape, size_t shape_size) { | |||
| size_t one_input_size = 1; | |||
| for (size_t i = 0; i < shape_size; ++i) { | |||
| one_input_size *= in_shape[i]; | |||
| } | |||
| int in_strides[4]; | |||
| ComputeStrides(in_shape, in_strides, shape_size); | |||
| size_t copy_num = axis > 0 ? in_strides[axis - 1] : one_input_size; | |||
| return copy_num; | |||
| } | |||
| size_t GetStackPreAxisCount(const int *in_shape, int axis) { | |||
| size_t pre_axis_count = 1; | |||
| for (size_t i = 0; i < axis; ++i) { | |||
| pre_axis_count *= in_shape[i]; | |||
| } | |||
| return pre_axis_count; | |||
| } | |||
| void DoStack(const float *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis, | |||
| float *output) { | |||
| size_t copy_num = GetStackCopyNum(axis, in_shape, shape_size); | |||
| size_t copy_size = copy_num * sizeof(float); | |||
| size_t pre_axis_count = GetStackPreAxisCount(in_shape, axis); | |||
| size_t in_offset = 0; | |||
| size_t out_offset = 0; | |||
| for (size_t i = 0; i < pre_axis_count; ++i) { | |||
| for (size_t j = 0; j < input_num; ++j) { | |||
| memcpy(output + out_offset, inputs[j] + in_offset, copy_size); | |||
| out_offset += copy_num; | |||
| } | |||
| in_offset += copy_num; | |||
| } | |||
| } | |||
| void DoStackInt32(const int32_t *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis, | |||
| int32_t *output) { | |||
| size_t copy_num = GetStackCopyNum(axis, in_shape, shape_size); | |||
| size_t copy_size = copy_num * sizeof(int32_t); | |||
| size_t pre_axis_count = GetStackPreAxisCount(in_shape, axis); | |||
| size_t in_offset = 0; | |||
| size_t out_offset = 0; | |||
| for (size_t i = 0; i < pre_axis_count; ++i) { | |||
| for (size_t j = 0; j < input_num; ++j) { | |||
| memcpy(output + out_offset, inputs[j] + in_offset, copy_size); | |||
| out_offset += copy_num; | |||
| } | |||
| in_offset += copy_num; | |||
| } | |||
| } | |||
| void DoStackOneInput(const int8_t *input, int8_t *output, size_t data_size) { memcpy(output, input, data_size); } | |||
| @@ -60,6 +60,7 @@ | |||
| #define kNHWC_C 3 | |||
| #define kInputSize1 2 | |||
| #define kInputSize2 3 | |||
| #define MAX_LEN 256 | |||
| typedef enum LiteDataType { | |||
| kDataTypeFloat, | |||
| @@ -23,8 +23,6 @@ | |||
| #include "nnacl/conv_parameter.h" | |||
| #include "nnacl/op_base.h" | |||
| #define MAX_LEN 256 | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| @@ -0,0 +1,90 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/base/stack_base.h" | |||
| #include <vector> | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "nnacl/base/stack_base.h" | |||
| #include "nnacl/stack_parameter.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Stack; | |||
| namespace mindspore::kernel { | |||
| static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) { | |||
| int copy_num = 1; | |||
| if (axis > 0) { | |||
| for (int j = n_dim - 1; j > axis - 1; j--) { | |||
| copy_num *= in_shape[j]; | |||
| } | |||
| } else { | |||
| for (int i = 0; i < n_dim; ++i) { | |||
| copy_num *= in_shape[i]; | |||
| } | |||
| } | |||
| return copy_num; | |||
| } | |||
| static size_t GetOutterSize(const std::vector<int> &in_shape, int axis) { | |||
| size_t outter_size = 1; | |||
| for (int i = 0; i < axis; ++i) { | |||
| outter_size *= in_shape[i]; | |||
| } | |||
| return outter_size; | |||
| } | |||
| int StackBaseCPUKernel::ReSize() { | |||
| auto param = reinterpret_cast<StackParameter *>(op_parameter_); | |||
| auto input0_shape = in_tensors_.front()->shape(); | |||
| axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() + 1 : param->axis_; | |||
| auto input_nums = in_tensors_.size(); | |||
| if (input_nums == 1) { | |||
| copy_size_ = in_tensors_.front()->Size(); | |||
| } else { | |||
| MS_ASSERT(input_nums > 1); | |||
| copy_size_ = GetCopyNum(input0_shape, axis_, input0_shape.size()) * data_type_size_; | |||
| outter_size_ = GetOutterSize(input0_shape, axis_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int StackBaseCPUKernel::Init() { | |||
| auto input0_tensor = in_tensors_.front(); | |||
| data_type_size_ = input0_tensor->Size() / input0_tensor->ElementsNum(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int StackBaseCPUKernel::Run() { | |||
| size_t inputs_num = in_tensors_.size(); | |||
| char **all_inputs = static_cast<char **>(context_->allocator->Malloc(inputs_num * sizeof(char *))); | |||
| for (size_t j = 0; j < inputs_num; ++j) { | |||
| all_inputs[j] = reinterpret_cast<char *>(in_tensors_.at(j)->data_c()); | |||
| } | |||
| auto output_data = reinterpret_cast<char *>(out_tensors_.at(0)->data_c()); | |||
| Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outter_size_); | |||
| context_->allocator->Free(all_inputs); | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>) | |||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -13,21 +13,22 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/stack_parameter.h" | |||
| using mindspore::lite::InnerContext; | |||
| namespace mindspore::kernel { | |||
| class StackCPUKernel : public LiteKernel { | |||
| class StackBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| StackCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| StackBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~StackCPUKernel() = default; | |||
| ~StackBaseCPUKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -35,7 +36,9 @@ class StackCPUKernel : public LiteKernel { | |||
| protected: | |||
| int axis_ = 0; | |||
| size_t data_type_size_ = 0; | |||
| size_t copy_size_ = 0; | |||
| size_t outter_size_ = 1; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_ | |||
| @@ -21,7 +21,7 @@ | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/kernel/arm/fp16/common_fp16.h" | |||
| #include "nnacl/fp16/cast_fp16.h" | |||
| #include "nnacl/fp16/stack_fp16.h" | |||
| #include "nnacl/base/stack_base.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| @@ -29,25 +29,18 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Stack; | |||
| namespace mindspore::kernel { | |||
| int StackFp16CPUKernel::Init() { | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| void StackFp16CPUKernel::InitMallocFlags() { | |||
| malloc_buffers_.resize(in_tensors_.size()); | |||
| for (size_t i = 0; i < in_tensors_.size(); ++i) { | |||
| malloc_buffers_.at(i) = in_tensors_.at(i)->data_type() == kNumberTypeFloat32; | |||
| } | |||
| malloc_out = out_tensors_.at(0)->data_type() == kNumberTypeFloat32; | |||
| malloc_out_ = out_tensors_.at(0)->data_type() == kNumberTypeFloat32; | |||
| } | |||
| int StackFp16CPUKernel::MallocAssignBuffer() { | |||
| buffers_.resize(in_tensors_.size(), nullptr); | |||
| for (size_t i = 0; i < in_tensors_.size(); ++i) { | |||
| buffers_.at(i) = ConvertInputFp32toFp16(in_tensors_.at(i), context_); | |||
| buffers_.at(i) = reinterpret_cast<char *>(ConvertInputFp32toFp16(in_tensors_.at(i), context_)); | |||
| if (buffers_.at(i) == nullptr) { | |||
| return RET_ERROR; | |||
| } | |||
| @@ -68,33 +61,33 @@ void StackFp16CPUKernel::FreeBuffer() { | |||
| buffers_.at(i) = nullptr; | |||
| } | |||
| } | |||
| if (malloc_out && out_buffer_ != nullptr) { | |||
| if (malloc_out_ && out_buffer_ != nullptr) { | |||
| context_->allocator->Free(out_buffer_); | |||
| out_buffer_ = nullptr; | |||
| } | |||
| } | |||
| int StackFp16CPUKernel::Run() { | |||
| size_t inputs_num = in_tensors_.size(); | |||
| auto input0 = in_tensors_.at(0); | |||
| if (inputs_num == 1) { | |||
| memcpy(out_tensors_.at(0)->MutableData(), input0->MutableData(), input0->Size()); | |||
| int StackFp16CPUKernel::Init() { | |||
| data_type_size_ = sizeof(float16_t); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int StackFp16CPUKernel::Run() { | |||
| InitMallocFlags(); | |||
| auto ret = MallocAssignBuffer(); | |||
| if (ret != RET_OK) { | |||
| FreeBuffer(); | |||
| return ret; | |||
| } | |||
| auto input0_shape = input0->shape(); | |||
| DoStackFp16(buffers_.data(), inputs_num, input0_shape.data(), input0_shape.size(), axis_, out_buffer_); | |||
| Stack(buffers_.data(), reinterpret_cast<char *>(out_buffer_), in_tensors_.size(), copy_size_, outter_size_); | |||
| // if output tensor is fp32, we need to transform | |||
| if (malloc_out) { | |||
| if (malloc_out_) { | |||
| auto out_tensor = out_tensors_.at(0); | |||
| Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->MutableData()), out_tensor->ElementsNum()); | |||
| } | |||
| FreeBuffer(); | |||
| return RET_OK; | |||
| } | |||
| @@ -18,16 +18,16 @@ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/runtime/kernel/arm/fp32/stack_fp32.h" | |||
| #include "src/runtime/kernel/arm/base/stack_base.h" | |||
| namespace mindspore::kernel { | |||
| class StackFp16CPUKernel : public StackCPUKernel { | |||
| class StackFp16CPUKernel : public StackBaseCPUKernel { | |||
| public: | |||
| StackFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : StackCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~StackFp16CPUKernel() = default; | |||
| : StackBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~StackFp16CPUKernel() override = default; | |||
| int Init() override; | |||
| int Run() override; | |||
| @@ -38,9 +38,9 @@ class StackFp16CPUKernel : public StackCPUKernel { | |||
| private: | |||
| std::vector<bool> malloc_buffers_; | |||
| std::vector<float16_t *> buffers_; | |||
| std::vector<char *> buffers_; | |||
| float16_t *out_buffer_; | |||
| bool malloc_out; | |||
| bool malloc_out_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -114,7 +114,7 @@ int ConvolutionDelegateCPUKernel::Init() { | |||
| } | |||
| int ConvolutionDelegateCPUKernel::ReSize() { | |||
| // Updata shape info of input and output | |||
| // Update shape info of input and output | |||
| SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(), out_tensors_.front(), | |||
| context_); | |||
| if (conv_kernel_ == nullptr) { | |||
| @@ -1,81 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/stack_fp32.h" | |||
| #include <vector> | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "nnacl/fp32/stack_fp32.h" | |||
| #include "nnacl/stack_parameter.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Stack; | |||
| namespace mindspore::kernel { | |||
| int StackCPUKernel::ReSize() { | |||
| StackParameter *param = reinterpret_cast<StackParameter *>(op_parameter_); | |||
| auto input0_shape = in_tensors_.at(0)->shape(); | |||
| axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() + 1 : param->axis_; | |||
| return RET_OK; | |||
| } | |||
| int StackCPUKernel::Init() { | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int StackCPUKernel::Run() { | |||
| size_t inputs_num = in_tensors_.size(); | |||
| auto input0 = in_tensors_.at(0); | |||
| if (inputs_num == 1) { | |||
| auto *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData()); | |||
| MS_ASSERT(output_data); | |||
| auto *input_data = reinterpret_cast<const int8_t *>(input0->MutableData()); | |||
| MS_ASSERT(input_data); | |||
| DoStackOneInput(input_data, output_data, input0->Size()); | |||
| return RET_OK; | |||
| } | |||
| auto input0_shape = in_tensors_.at(0)->shape(); | |||
| if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32 || in_tensors_.at(0)->data_type() == kNumberTypeFloat) { | |||
| auto *output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| MS_ASSERT(output_data); | |||
| float *inputs[inputs_num]; | |||
| for (size_t i = 0; i < inputs_num; ++i) { | |||
| inputs[i] = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData()); | |||
| MS_ASSERT(inputs[i]); | |||
| } | |||
| DoStack(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data); | |||
| } else { | |||
| auto *output_data = reinterpret_cast<int32_t *>(out_tensors_.at(0)->MutableData()); | |||
| MS_ASSERT(output_data); | |||
| int32_t *inputs[inputs_num]; | |||
| for (size_t i = 0; i < inputs_num; ++i) { | |||
| inputs[i] = reinterpret_cast<int32_t *>(in_tensors_.at(i)->MutableData()); | |||
| MS_ASSERT(inputs[i]); | |||
| } | |||
| DoStackInt32(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Stack, LiteKernelCreator<StackCPUKernel>) | |||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Stack, LiteKernelCreator<StackCPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/nnacl/fp32/stack_fp32.h" | |||
| #include "mindspore/lite/nnacl/base/stack_base.h" | |||
| namespace mindspore { | |||
| class StackTestFp32 : public mindspore::CommonTest { | |||
| @@ -26,16 +26,15 @@ TEST_F(StackTestFp32, StackTest1) { | |||
| float input0[6] = {1, 2, 3, 10, 20, 30}; | |||
| float input1[6] = {4, 5, 6, 40, 50, 60}; | |||
| float input2[6] = {7, 8, 9, 70, 80, 90}; | |||
| float *input[3]; | |||
| input[0] = input0; | |||
| input[1] = input1; | |||
| input[2] = input2; | |||
| char *input[3]; | |||
| input[0] = reinterpret_cast<char *>(input0); | |||
| input[1] = reinterpret_cast<char *>(input1); | |||
| input[2] = reinterpret_cast<char *>(input2); | |||
| std::vector<int> shape = {2, 3}; | |||
| int axis = 2; | |||
| constexpr int kOutSize = 18; | |||
| float expect_out[kOutSize] = {1, 4, 7, 2, 5, 8, 3, 6, 9, 10, 40, 70, 20, 50, 80, 30, 60, 90}; | |||
| float output[kOutSize]; | |||
| DoStack(input, 3, shape.data(), shape.size(), axis, output); | |||
| Stack(input, reinterpret_cast<char *>(output), 3, 4, 6); | |||
| for (float i : output) { | |||
| std::cout << i << " "; | |||
| } | |||