Merge pull request !3669 from chenjianping/lite_devtags/v0.7.0-beta
| @@ -20,21 +20,28 @@ | |||||
| #include "src/ir/tensor.h" | #include "src/ir/tensor.h" | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| int AddN::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_) { | |||||
| namespace { | |||||
| constexpr int kLeastInputNum = 2; | |||||
| } | |||||
| int AddN::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) { | |||||
| MS_ASSERT(this->primitive != nullptr); | MS_ASSERT(this->primitive != nullptr); | ||||
| auto input = inputs_.front(); | |||||
| auto input = inputs.front(); | |||||
| MS_ASSERT(input != nullptr); | MS_ASSERT(input != nullptr); | ||||
| auto output = outputs_.front(); | |||||
| auto output = outputs.front(); | |||||
| MS_ASSERT(output != nullptr); | MS_ASSERT(output != nullptr); | ||||
| if (inputs_.size() < kDoubleNum) { | |||||
| MS_LOG(ERROR) << "input size is error"; | |||||
| if (inputs.size() < kLeastInputNum) { | |||||
| MS_LOG(ERROR) << "input size" << inputs.size() << " is error!"; | |||||
| return RET_INPUT_TENSOR_ERROR; | return RET_INPUT_TENSOR_ERROR; | ||||
| } | } | ||||
| for (int i = 1; i < inputs_.size(); ++i) { | |||||
| if (inputs_.at(i)->shape() != inputs_.at(0)->shape()) { | |||||
| for (int i = 1; i < inputs.size(); ++i) { | |||||
| if (inputs.at(i)->shape() != inputs.at(0)->shape()) { | |||||
| MS_LOG(ERROR) << "AddN inputs shape is not equal!"; | MS_LOG(ERROR) << "AddN inputs shape is not equal!"; | ||||
| return RET_INPUT_TENSOR_ERROR; | return RET_INPUT_TENSOR_ERROR; | ||||
| } | } | ||||
| if (inputs.at(i)->data_type() != inputs.at(0)->data_type()) { | |||||
| MS_LOG(ERROR) << "AddN all input data type should be the same!"; | |||||
| return RET_INPUT_TENSOR_ERROR; | |||||
| } | |||||
| } | } | ||||
| output->SetFormat(input->GetFormat()); | output->SetFormat(input->GetFormat()); | ||||
| output->set_shape(input->shape()); | output->set_shape(input->shape()); | ||||
| @@ -38,7 +38,11 @@ int ArgMax::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor | |||||
| MS_LOG(ERROR) << "Invalid axis " << argmax_prim->axis() << ", input shape size: " << input_shape_size; | MS_LOG(ERROR) << "Invalid axis " << argmax_prim->axis() << ", input shape size: " << input_shape_size; | ||||
| return RET_PARAM_INVALID; | return RET_PARAM_INVALID; | ||||
| } | } | ||||
| output_shape.erase(output_shape.begin() + axis); | |||||
| if (argmax_prim->topK() == -1) { | |||||
| output_shape.erase(output_shape.begin() + axis); | |||||
| } else if (argmax_prim->axisType() == 1) { | |||||
| output_shape[axis] = argmax_prim->topK(); | |||||
| } | |||||
| output->SetFormat(input->GetFormat()); | output->SetFormat(input->GetFormat()); | ||||
| output->set_shape(output_shape); | output->set_shape(output_shape); | ||||
| @@ -37,7 +37,11 @@ int ArgMin::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor | |||||
| return RET_PARAM_INVALID; | return RET_PARAM_INVALID; | ||||
| } | } | ||||
| std::vector<int> output_shape(input->shape()); | std::vector<int> output_shape(input->shape()); | ||||
| output_shape.erase(output_shape.begin() + axis); | |||||
| if (argmin_prim->topK() == -1) { | |||||
| output_shape.erase(output_shape.begin() + axis); | |||||
| } else if (argmin_prim->axisType() == 1) { | |||||
| output_shape[axis] = argmin_prim->topK(); | |||||
| } | |||||
| output->SetFormat(input->GetFormat()); | output->SetFormat(input->GetFormat()); | ||||
| output->set_shape(output_shape); | output->set_shape(output_shape); | ||||
| @@ -485,7 +485,7 @@ PowerParameter *PopulatePowerParameter(const lite::Primitive *primitive) { | |||||
| return parameter; | return parameter; | ||||
| } | } | ||||
| ArgMinMaxParameter *PopulateArgMinMaxParam(const lite::Primitive *primitive) { | |||||
| ArgMinMaxParameter *PopulateArgMaxParam(const lite::Primitive *primitive) { | |||||
| ArgMinMaxParameter *parameter = new (std::nothrow) ArgMinMaxParameter(); | ArgMinMaxParameter *parameter = new (std::nothrow) ArgMinMaxParameter(); | ||||
| if (parameter == nullptr) { | if (parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "new ArgMinMaxParameter failed."; | MS_LOG(ERROR) << "new ArgMinMaxParameter failed."; | ||||
| @@ -501,6 +501,22 @@ ArgMinMaxParameter *PopulateArgMinMaxParam(const lite::Primitive *primitive) { | |||||
| return parameter; | return parameter; | ||||
| } | } | ||||
| ArgMinMaxParameter *PopulateArgMinParam(const lite::Primitive *primitive) { | |||||
| ArgMinMaxParameter *parameter = new (std::nothrow) ArgMinMaxParameter(); | |||||
| if (parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "new ArgMinMaxParameter failed."; | |||||
| return nullptr; | |||||
| } | |||||
| auto param = primitive->Value()->value_as_ArgMin(); | |||||
| parameter->op_parameter_.type_ = primitive->Type(); | |||||
| parameter->axis_ = param->axis(); | |||||
| parameter->topk_ = param->topK(); | |||||
| parameter->axis_type_ = param->axisType(); | |||||
| parameter->out_value_ = param->outMaxValue(); | |||||
| parameter->keep_dims_ = param->keepDims(); | |||||
| return parameter; | |||||
| } | |||||
| CastParameter *PopulateCastParam(const lite::Primitive *primitive) { | CastParameter *PopulateCastParam(const lite::Primitive *primitive) { | ||||
| CastParameter *parameter = new (std::nothrow) CastParameter(); | CastParameter *parameter = new (std::nothrow) CastParameter(); | ||||
| if (parameter == nullptr) { | if (parameter == nullptr) { | ||||
| @@ -962,6 +978,16 @@ StridedSliceParameter *PopulateStridedSliceParam(const lite::Primitive *primitiv | |||||
| return parameter; | return parameter; | ||||
| } | } | ||||
| OpParameter *PopulateAddNParam(const lite::Primitive *primitive) { | |||||
| auto parameter = new (std::nothrow) OpParameter(); | |||||
| if (parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "new OpParameter fail!"; | |||||
| return nullptr; | |||||
| } | |||||
| parameter->type_ = primitive->Type(); | |||||
| return parameter; | |||||
| } | |||||
| OpParameter *PopulateParameter(const lite::Primitive *primitive) { | OpParameter *PopulateParameter(const lite::Primitive *primitive) { | ||||
| MS_EXCEPTION_IF_NULL(primitive); | MS_EXCEPTION_IF_NULL(primitive); | ||||
| auto op_type = primitive->Type(); | auto op_type = primitive->Type(); | ||||
| @@ -1020,8 +1046,9 @@ OpParameter *PopulateParameter(const lite::Primitive *primitive) { | |||||
| case schema::PrimitiveType_Floor: | case schema::PrimitiveType_Floor: | ||||
| return reinterpret_cast<OpParameter *>(PopulateArithmeticSelf(primitive)); | return reinterpret_cast<OpParameter *>(PopulateArithmeticSelf(primitive)); | ||||
| case schema::PrimitiveType_ArgMax: | case schema::PrimitiveType_ArgMax: | ||||
| return reinterpret_cast<OpParameter *>(PopulateArgMaxParam(primitive)); | |||||
| case schema::PrimitiveType_ArgMin: | case schema::PrimitiveType_ArgMin: | ||||
| return reinterpret_cast<OpParameter *>(PopulateArgMinMaxParam(primitive)); | |||||
| return reinterpret_cast<OpParameter *>(PopulateArgMinParam(primitive)); | |||||
| case schema::PrimitiveType_Cast: | case schema::PrimitiveType_Cast: | ||||
| return reinterpret_cast<OpParameter *>(PopulateCastParam(primitive)); | return reinterpret_cast<OpParameter *>(PopulateCastParam(primitive)); | ||||
| case schema::PrimitiveType_Ceil: | case schema::PrimitiveType_Ceil: | ||||
| @@ -1078,6 +1105,8 @@ OpParameter *PopulateParameter(const lite::Primitive *primitive) { | |||||
| return reinterpret_cast<OpParameter *>(PopulateMatMulParameter(primitive)); | return reinterpret_cast<OpParameter *>(PopulateMatMulParameter(primitive)); | ||||
| case schema::PrimitiveType_OneHot: | case schema::PrimitiveType_OneHot: | ||||
| return reinterpret_cast<OpParameter *>(PopulateOneHotParameter(primitive)); | return reinterpret_cast<OpParameter *>(PopulateOneHotParameter(primitive)); | ||||
| case schema::PrimitiveType_AddN: | |||||
| return reinterpret_cast<OpParameter *>(PopulateAddNParam(primitive)); | |||||
| default: | default: | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -17,52 +17,100 @@ | |||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/kernel/arm/fp32/arithmetic.h" | #include "src/runtime/kernel/arm/fp32/arithmetic.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/runtime_api.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_NULL_PTR; | |||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::schema::PrimitiveType_AddN; | using mindspore::schema::PrimitiveType_AddN; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | namespace { | ||||
| constexpr int kLeastInputNum = 2; | |||||
| int AddNLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| if (cdata == nullptr) { | |||||
| MS_LOG(ERROR) << "Input cdata is nullptr!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata); | |||||
| return kernel->AddNParallelRun(thread_id); | |||||
| } | |||||
| } | } | ||||
| int AddNCPUKernel::Init() { return RET_OK; } | |||||
| int AddNCPUKernel::Init() { | |||||
| elements_num_ = inputs_[0]->ElementsNum(); | |||||
| return RET_OK; | |||||
| } | |||||
| int AddNCPUKernel::ReSize() { return RET_OK; } | int AddNCPUKernel::ReSize() { return RET_OK; } | ||||
| int AddNCPUKernel::AddNParallelRun(int thread_id) { | |||||
| int count_per_thread = UP_DIV(elements_num_, opParameter->thread_num_); | |||||
| int count = MSMIN(count_per_thread, elements_num_ - thread_id * count_per_thread); | |||||
| auto stride = count_per_thread * thread_id; | |||||
| auto ret = ElementAdd(in1_addr_ + stride, in2_addr_ + stride, out_addr_ + stride, count); | |||||
| if (ret != OPCLIB_OK) { | |||||
| MS_LOG(ERROR) << "ElementAdd fail! ret: " << ret; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int AddNCPUKernel::Run() { | int AddNCPUKernel::Run() { | ||||
| auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data()); | auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data()); | ||||
| auto input1_data = reinterpret_cast<float *>(inputs_[1]->Data()); | auto input1_data = reinterpret_cast<float *>(inputs_[1]->Data()); | ||||
| auto output_data = reinterpret_cast<float *>(outputs_[0]->Data()); | auto output_data = reinterpret_cast<float *>(outputs_[0]->Data()); | ||||
| auto element_num = inputs_[0]->ElementsNum(); | |||||
| ElementAdd(input0_data, input1_data, output_data, element_num); | |||||
| for (int i = 2; i < inputs_.size(); ++i) { | |||||
| ElementAdd(reinterpret_cast<float *>(inputs_[i]->Data()), output_data, output_data, element_num); | |||||
| if (elements_num_ < opParameter->thread_num_) { | |||||
| ElementAdd(input0_data, input1_data, output_data, elements_num_); | |||||
| for (int i = 2; i < inputs_.size(); ++i) { | |||||
| ElementAdd(reinterpret_cast<float *>(inputs_[i]->Data()), output_data, output_data, elements_num_); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| in1_addr_ = input0_data; | |||||
| in2_addr_ = input1_data; | |||||
| out_addr_ = output_data; | |||||
| int ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "addn launch fail!ret: " << ret; | |||||
| return RET_ERROR; | |||||
| } | |||||
| for (size_t i = 2; i < inputs_.size(); ++i) { | |||||
| in1_addr_ = reinterpret_cast<float *>(inputs_[i]->Data()); | |||||
| in2_addr_ = output_data; | |||||
| ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| OpParameter *op_parameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc) { | const kernel::KernelKey &desc) { | ||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||||
| if (op_parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) AddNCPUKernel(opParameter, inputs, outputs); | |||||
| if (ctx == nullptr) { | |||||
| MS_LOG(ERROR) << "Input context is nullptr!"; | |||||
| return nullptr; | |||||
| } | |||||
| op_parameter->thread_num_ = ctx->threadNum; | |||||
| auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new AddNCPUKernel fail!"; | MS_LOG(ERROR) << "new AddNCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init kernel failed! name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| MS_LOG(ERROR) << "Init kernel failed! name: " << op_parameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | |||||
| delete kernel; | delete kernel; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -71,4 +119,3 @@ kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Ten | |||||
| REG_KERNEL(kCPU, PrimitiveType_AddN, CpuAddNFp32KernelCreator) | REG_KERNEL(kCPU, PrimitiveType_AddN, CpuAddNFp32KernelCreator) | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -32,8 +32,13 @@ class AddNCPUKernel : public LiteKernel { | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int AddNParallelRun(int thread_id); | |||||
| private: | |||||
| float *in1_addr_; | |||||
| float *in2_addr_; | |||||
| float *out_addr_; | |||||
| size_t elements_num_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ADDN_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ADDN_H_ | ||||
| @@ -51,9 +51,11 @@ int BatchToSpaceCPUKernel::Run() { | |||||
| BatchToSpaceParameter *param = reinterpret_cast<BatchToSpaceParameter *>(this->opParameter); | BatchToSpaceParameter *param = reinterpret_cast<BatchToSpaceParameter *>(this->opParameter); | ||||
| if (no_crop_) { | if (no_crop_) { | ||||
| BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_); | |||||
| BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | |||||
| sizeof(float)); | |||||
| } else { | } else { | ||||
| BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_); | |||||
| BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_, | |||||
| sizeof(float)); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -61,13 +63,13 @@ int BatchToSpaceCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| OpParameter *op_parameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc) { | const kernel::KernelKey &desc) { | ||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||||
| if (op_parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(opParameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!"; | MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -76,8 +78,8 @@ kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::ten | |||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| @@ -50,13 +50,13 @@ int BroadcastToCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| OpParameter *op_parameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc) { | const kernel::KernelKey &desc) { | ||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||||
| if (op_parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) BroadcastToCPUKernel(opParameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!"; | MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -65,8 +65,8 @@ kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tens | |||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| @@ -95,6 +95,10 @@ kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Ten | |||||
| MS_LOG(ERROR) << "Input context is nullptr!"; | MS_LOG(ERROR) << "Input context is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (ctx->threadNum == 0) { | |||||
| MS_LOG(ERROR) << "context thread num is 0!"; | |||||
| return nullptr; | |||||
| } | |||||
| auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx); | auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx); | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new CastCPUKernel fail!"; | MS_LOG(ERROR) << "new CastCPUKernel fail!"; | ||||
| @@ -14,65 +14,85 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp32/crop.h" | #include "src/runtime/kernel/arm/fp32/crop.h" | ||||
| #include <vector> | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/kernel/arm/opclib/fp32/crop.h" | #include "src/runtime/kernel/arm/opclib/fp32/crop.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/runtime_api.h" | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_FORMAT_ERR; | using mindspore::lite::RET_FORMAT_ERR; | ||||
| using mindspore::lite::RET_NULL_PTR; | |||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::schema::PrimitiveType_Crop; | using mindspore::schema::PrimitiveType_Crop; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| int CropLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| if (cdata == nullptr) { | |||||
| MS_LOG(ERROR) << "Input cdata is nullptr!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| auto kernel = reinterpret_cast<CropCPUKernel *>(cdata); | |||||
| return kernel->CropParallelRun(thread_id); | |||||
| } | |||||
| } | |||||
| int CropCPUKernel::Init() { | int CropCPUKernel::Init() { | ||||
| schema::Format input0_format = inputs_[0]->GetFormat(); | schema::Format input0_format = inputs_[0]->GetFormat(); | ||||
| if (input0_format != schema::Format_NC4HW4) { | |||||
| outputs_[0]->SetFormat(input0_format); | |||||
| return RET_OK; | |||||
| } | |||||
| convert_function_ = LayoutTransform(inputs_[0]->data_type(), inputs_[0]->GetFormat(), schema::Format_NHWC); | |||||
| if (convert_function_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Can not convert format " << inputs_[0]->GetFormat() << " to " << schema::Format_NHWC; | |||||
| return RET_ERROR; | |||||
| if (input0_format != schema::Format_NCHW && input0_format != schema::Format_NHWC) { | |||||
| MS_LOG(ERROR) << "Unsupport format " << input0_format; | |||||
| return RET_FORMAT_ERR; | |||||
| } | } | ||||
| auto packed_input_size = inputs_[0]->Channel() * inputs_[0]->Batch() * inputs_[0]->Height() * inputs_[0]->Width(); | |||||
| packed_input_ = reinterpret_cast<float *>(malloc(packed_input_size * sizeof(float))); | |||||
| if (packed_input_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc memory fail!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| memset(packed_input_, 0, packed_input_size * sizeof(float)); | |||||
| outputs_[0]->SetFormat(input0_format); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int CropCPUKernel::Run() { | |||||
| int CropCPUKernel::CropParallelRun(int thread_id) { | |||||
| auto input = inputs_[0]; | auto input = inputs_[0]; | ||||
| auto output = outputs_[0]; | auto output = outputs_[0]; | ||||
| float *input_data = reinterpret_cast<float *>(input->Data()); | float *input_data = reinterpret_cast<float *>(input->Data()); | ||||
| if (convert_function_ != nullptr) { | |||||
| convert_function_(input_data, packed_input_, inputs_[0]->Batch(), inputs_[0]->Height() * inputs_[0]->Width(), | |||||
| inputs_[0]->Channel()); | |||||
| } else { | |||||
| packed_input_ = input_data; | |||||
| } | |||||
| float *output_data = reinterpret_cast<float *>(output->Data()); | float *output_data = reinterpret_cast<float *>(output->Data()); | ||||
| Crop4D(input_data, output_data, input->shape().data(), output->shape().data(), | Crop4D(input_data, output_data, input->shape().data(), output->shape().data(), | ||||
| reinterpret_cast<CropParameter *>(opParameter)); | reinterpret_cast<CropParameter *>(opParameter)); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int CropCPUKernel::Run() { | |||||
| auto input = inputs_[0]; | |||||
| auto output = outputs_[0]; | |||||
| auto param = reinterpret_cast<CropParameter *>(opParameter); | |||||
| if (output->shape()[1] < param->op_parameter_.thread_num_) { | |||||
| float *input_data = reinterpret_cast<float *>(input->Data()); | |||||
| float *output_data = reinterpret_cast<float *>(output->Data()); | |||||
| Crop4DNoParallel(input_data, output_data, input->shape().data(), output->shape().data(), param); | |||||
| return RET_OK; | |||||
| } | |||||
| int ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| OpParameter *op_parameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc) { | const kernel::KernelKey &desc) { | ||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||||
| if (op_parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||||
| return nullptr; | |||||
| } | |||||
| if (ctx == nullptr) { | |||||
| MS_LOG(ERROR) << "Input context is nullptr!"; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs); | |||||
| op_parameter->thread_num_ = ctx->threadNum; | |||||
| auto *kernel = new (std::nothrow) CropCPUKernel(op_parameter, inputs, outputs); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new CropCPUKernel fail!"; | MS_LOG(ERROR) << "new CropCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -81,8 +101,8 @@ kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Ten | |||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| @@ -90,4 +110,3 @@ kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Ten | |||||
| REG_KERNEL(kCPU, PrimitiveType_Crop, CpuCropFp32KernelCreator) | REG_KERNEL(kCPU, PrimitiveType_Crop, CpuCropFp32KernelCreator) | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -15,34 +15,20 @@ | |||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CROP_H_ | #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CROP_H_ | ||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CROP_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CROP_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "src/runtime/kernel/arm/base/layout_transform.h" | #include "src/runtime/kernel/arm/base/layout_transform.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class CropCPUKernel : public LiteKernel { | class CropCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| CropCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | CropCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : LiteKernel(parameter, inputs, outputs), packed_input_(nullptr), convert_function_(nullptr) {} | |||||
| ~CropCPUKernel() { | |||||
| if (packed_input_ != nullptr) { | |||||
| free(packed_input_); | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| } | |||||
| const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {} | |||||
| ~CropCPUKernel() = default; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override { return 0; } | int ReSize() override { return 0; } | ||||
| int Run() override; | int Run() override; | ||||
| private: | |||||
| float *packed_input_; | |||||
| LayoutConvertor convert_function_; | |||||
| int CropParallelRun(int thread_id); | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CROP_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CROP_H_ | ||||
| @@ -19,13 +19,25 @@ | |||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/kernel/arm/opclib/fp32/slice.h" | #include "src/runtime/kernel/arm/opclib/fp32/slice.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/runtime_api.h" | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::lite::RET_NULL_PTR; | |||||
| using mindspore::schema::PrimitiveType_Slice; | using mindspore::schema::PrimitiveType_Slice; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| int SliceLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| if (cdata == nullptr) { | |||||
| MS_LOG(ERROR) << "Input cdata is nullptr!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| auto kernel = reinterpret_cast<SliceCPUKernel *>(cdata); | |||||
| return kernel->SliceParallelRun(thread_id); | |||||
| } | |||||
| } | |||||
| int SliceCPUKernel::Init() { | int SliceCPUKernel::Init() { | ||||
| auto *param = reinterpret_cast<SliceParameter *>(opParameter); | auto *param = reinterpret_cast<SliceParameter *>(opParameter); | ||||
| @@ -35,34 +47,68 @@ int SliceCPUKernel::Init() { | |||||
| << input_shape.size(); | << input_shape.size(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (input_shape.size() > SLICE_SHAPE_MAX_SIZE) { | |||||
| MS_LOG(ERROR) << "input dimension num should <= " << SLICE_SHAPE_MAX_SIZE; | |||||
| if (input_shape.size() > DIMENSION_4D) { | |||||
| MS_LOG(ERROR) << "input dimension num should <= " << DIMENSION_4D; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| for (size_t i = 0; i < input_shape.size(); ++i) { | for (size_t i = 0; i < input_shape.size(); ++i) { | ||||
| param->shape_[i] = input_shape[i]; | param->shape_[i] = input_shape[i]; | ||||
| } | } | ||||
| outputs_[0]->SetFormat(inputs_[0]->GetFormat()); | |||||
| return RET_OK; | |||||
| } | |||||
| int SliceCPUKernel::SliceParallelRun(int thread_id) { | |||||
| const float *input_data = reinterpret_cast<const float *>(inputs_[0]->Data()); | |||||
| float *output_data = reinterpret_cast<float *>(outputs_[0]->Data()); | |||||
| SliceParameter *param = reinterpret_cast<SliceParameter *>(opParameter); | |||||
| DoSlice(input_data, output_data, param); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int SliceCPUKernel::Run() { | int SliceCPUKernel::Run() { | ||||
| SliceParameter *param = reinterpret_cast<SliceParameter *>(opParameter); | SliceParameter *param = reinterpret_cast<SliceParameter *>(opParameter); | ||||
| for (int i = 0; i < param->param_length_; ++i) { | |||||
| if (param->size_[i] < 0) { | |||||
| param->size_[i] = param->shape_[i] - param->begin_[i]; | |||||
| } | |||||
| param->end_[i] = param->begin_[i] + param->size_[i]; | |||||
| } | |||||
| if (param->param_length_ < DIMENSION_4D) { | |||||
| PadSliceParameterTo4D(param); | |||||
| } | |||||
| const float *input_data = reinterpret_cast<const float *>(inputs_[0]->Data()); | const float *input_data = reinterpret_cast<const float *>(inputs_[0]->Data()); | ||||
| float *output_data = reinterpret_cast<float *>(outputs_[0]->Data()); | float *output_data = reinterpret_cast<float *>(outputs_[0]->Data()); | ||||
| return DoSlice(input_data, param, output_data); | |||||
| if (param->size_[1] < param->op_parameter_.thread_num_) { | |||||
| DoSliceNoParallel(input_data, output_data, param); | |||||
| return RET_OK; | |||||
| } | |||||
| int ret = LiteBackendParallelLaunch(SliceLaunch, this, param->op_parameter_.thread_num_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "slice launch fail!ret: " << ret; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| OpParameter *op_parameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc) { | const kernel::KernelKey &desc) { | ||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||||
| if (op_parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||||
| return nullptr; | |||||
| } | |||||
| if (ctx == nullptr) { | |||||
| MS_LOG(ERROR) << "Input context is nullptr!"; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) SliceCPUKernel(opParameter, inputs, outputs); | |||||
| op_parameter->thread_num_ = ctx->threadNum; | |||||
| auto *kernel = new (std::nothrow) SliceCPUKernel(op_parameter, inputs, outputs); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new SliceCPUKernel fail!"; | MS_LOG(ERROR) << "new SliceCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -71,8 +117,8 @@ kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector<lite::tensor::Te | |||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| @@ -80,4 +126,3 @@ kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector<lite::tensor::Te | |||||
| REG_KERNEL(kCPU, PrimitiveType_Slice, CpuSliceFp32KernelCreator) | REG_KERNEL(kCPU, PrimitiveType_Slice, CpuSliceFp32KernelCreator) | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -32,8 +32,8 @@ class SliceCPUKernel : public LiteKernel { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| int Run() override; | int Run() override; | ||||
| int SliceParallelRun(int thread_id); | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SLICE_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SLICE_H_ | ||||
| @@ -86,13 +86,13 @@ int StackCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuStackFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuStackFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| OpParameter *op_parameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc) { | const kernel::KernelKey &desc) { | ||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||||
| if (op_parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) StackCPUKernel(opParameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) StackCPUKernel(op_parameter, inputs, outputs); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new StackCPUKernel fail!"; | MS_LOG(ERROR) << "new StackCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -101,8 +101,8 @@ kernel::LiteKernel *CpuStackFp32KernelCreator(const std::vector<lite::tensor::Te | |||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| @@ -17,7 +17,8 @@ | |||||
| #include "src/runtime/kernel/arm/opclib/fp32/batch_to_space.h" | #include "src/runtime/kernel/arm/opclib/fp32/batch_to_space.h" | ||||
| #include "src/runtime/kernel/arm/opclib/arithmetic_common.h" | #include "src/runtime/kernel/arm/opclib/arithmetic_common.h" | ||||
| void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_shape, int out_n, const int *block) { | |||||
| void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, | |||||
| int data_size) { | |||||
| int block_h = block[0]; | int block_h = block[0]; | ||||
| int block_w = block[1]; | int block_w = block[1]; | ||||
| int in_h = in_shape[1]; | int in_h = in_shape[1]; | ||||
| @@ -25,7 +26,7 @@ void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_ | |||||
| int in_c = in_shape[3]; | int in_c = in_shape[3]; | ||||
| size_t stride_h = block_w * out_n; | size_t stride_h = block_w * out_n; | ||||
| size_t output_offset = 0; | size_t output_offset = 0; | ||||
| size_t copy_size = in_c * 4; | |||||
| size_t copy_size = in_c * data_size; | |||||
| size_t in_stride_h = in_w * in_c; | size_t in_stride_h = in_w * in_c; | ||||
| size_t in_stride_n = in_stride_h * in_h; | size_t in_stride_n = in_stride_h * in_h; | ||||
| for (int n = 0; n < out_n; ++n) { | for (int n = 0; n < out_n; ++n) { | ||||
| @@ -36,8 +37,9 @@ void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_ | |||||
| size_t w_offset = w * in_c; | size_t w_offset = w * in_c; | ||||
| for (int bw = 0; bw < block_w; ++bw) { | for (int bw = 0; bw < block_w; ++bw) { | ||||
| size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; | size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; | ||||
| memcpy(output + output_offset, input + in_offset, copy_size); | |||||
| output_offset += in_c; | |||||
| memcpy(reinterpret_cast<int8_t *>(output) + output_offset, | |||||
| reinterpret_cast<const int8_t *>(input) + in_offset * data_size, copy_size); | |||||
| output_offset += copy_size; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -45,8 +47,8 @@ void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_ | |||||
| } | } | ||||
| } | } | ||||
| void BatchToSpaceForNHWC(const float *input, float *output, const int *in_shape, int out_n, const int *block, | |||||
| const int *crops) { | |||||
| void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, | |||||
| const int *crops, int data_size) { | |||||
| int block_h = block[0]; | int block_h = block[0]; | ||||
| int block_w = block[1]; | int block_w = block[1]; | ||||
| int in_n = in_shape[0]; | int in_n = in_shape[0]; | ||||
| @@ -64,7 +66,7 @@ void BatchToSpaceForNHWC(const float *input, float *output, const int *in_shape, | |||||
| size_t stride_h = block_w * out_n; | size_t stride_h = block_w * out_n; | ||||
| size_t output_offset = 0; | size_t output_offset = 0; | ||||
| size_t copy_size = in_c * 4; | |||||
| size_t copy_size = in_c * data_size; | |||||
| size_t in_stride_h = in_w * in_c; | size_t in_stride_h = in_w * in_c; | ||||
| size_t in_stride_n = in_stride_h * in_h; | size_t in_stride_n = in_stride_h * in_h; | ||||
| for (int n = 0; n < out_n; ++n) { | for (int n = 0; n < out_n; ++n) { | ||||
| @@ -83,12 +85,12 @@ void BatchToSpaceForNHWC(const float *input, float *output, const int *in_shape, | |||||
| continue; | continue; | ||||
| } | } | ||||
| size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; | size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; | ||||
| memcpy(output + output_offset, input + in_offset, copy_size); | |||||
| output_offset += in_c; | |||||
| memcpy(reinterpret_cast<int8_t *>(output) + output_offset, | |||||
| reinterpret_cast<const int8_t *>(input) + in_offset * data_size, copy_size); | |||||
| output_offset += copy_size; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -26,8 +26,8 @@ struct BatchToSpaceParameter { | |||||
| int32_t crops_[BATCH_TO_SPACE_CROPS_SIZE]; | int32_t crops_[BATCH_TO_SPACE_CROPS_SIZE]; | ||||
| }; | }; | ||||
| void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_shape, int out_n, const int *block); | |||||
| void BatchToSpaceForNHWC(const float *input, float *output, const int *in_shape, int out_n, const int *block, | |||||
| const int *crops); | |||||
| void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, | |||||
| int data_size); | |||||
| void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, | |||||
| const int *crops, int data_size); | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_BATCH_TO_SPACE_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_BATCH_TO_SPACE_H_ | ||||
| @@ -15,40 +15,74 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/opclib/fp32/crop.h" | #include "src/runtime/kernel/arm/opclib/fp32/crop.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | |||||
| void Pad4DOffset(CropParameter *crop_param) { | |||||
| int64_t offset_tmp[DIMENSION_4D]; | |||||
| void Pad4DOffset(CropParameter *crop_param, int64_t *offset) { | |||||
| int axis = crop_param->axis_; | int axis = crop_param->axis_; | ||||
| for (int i = 3; i >= 0; --i) { | |||||
| for (int i = DIMENSION_4D - 1; i >= 0; --i) { | |||||
| int offset_index = i - axis; | int offset_index = i - axis; | ||||
| if (offset_index >= 0) { | if (offset_index >= 0) { | ||||
| offset_tmp[i] = crop_param->offset_[offset_index]; | |||||
| offset[i] = crop_param->offset_[offset_index]; | |||||
| } else { | } else { | ||||
| offset_tmp[i] = 0; | |||||
| offset[i] = 0; | |||||
| } | } | ||||
| } | } | ||||
| for (int i = 0; i < DIMENSION_4D; ++i) { | |||||
| crop_param->offset_[i] = offset_tmp[i]; | |||||
| } | |||||
| } | } | ||||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param) { | void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param) { | ||||
| Pad4DOffset(crop_param); | |||||
| int64_t offset_pad[DIMENSION_4D]; | |||||
| Pad4DOffset(crop_param, offset_pad); | |||||
| int out_shape1 = out_shape[1]; | |||||
| int out_shape2 = out_shape[2]; | |||||
| int out_shape3 = out_shape[3]; | |||||
| size_t out_stride2 = out_shape3; | |||||
| size_t out_stride1 = out_stride2 * out_shape2; | |||||
| size_t out_stride0 = out_stride1 * out_shape1; | |||||
| size_t in_stride2 = in_shape[3]; | |||||
| size_t in_stride1 = in_stride2 * in_shape[2]; | |||||
| size_t in_stride0 = in_stride1 * in_shape[1]; | |||||
| size_t copy_size = out_shape3 * sizeof(float); | |||||
| size_t count_per_thread = UP_DIV(out_shape1, crop_param->op_parameter_.thread_num_); | |||||
| int thread_id = crop_param->thread_id_; | |||||
| size_t thread_stride = thread_id * count_per_thread; | |||||
| for (int i = 0; i < out_shape[0]; ++i) { | |||||
| size_t out_offset0 = i * out_stride0; | |||||
| size_t in_offset0 = (i + offset_pad[0]) * in_stride0 + offset_pad[3]; | |||||
| for (size_t j = 0; j < count_per_thread; ++j) { | |||||
| size_t k = j + thread_stride; | |||||
| if (k >= out_shape1) { | |||||
| break; | |||||
| } | |||||
| size_t out_offset1 = k * out_stride1 + out_offset0; | |||||
| size_t in_offset1 = (k + offset_pad[1]) * in_stride1 + in_offset0; | |||||
| for (int l = 0; l < out_shape2; ++l) { | |||||
| size_t out_offset = l * out_stride2 + out_offset1; | |||||
| size_t in_offset = (l + offset_pad[2]) * in_stride2 + in_offset1; | |||||
| memcpy(output + out_offset, input + in_offset, copy_size); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, | |||||
| CropParameter *crop_param) { | |||||
| int64_t offset_pad[DIMENSION_4D]; | |||||
| Pad4DOffset(crop_param, offset_pad); | |||||
| size_t in_dim2_stride = in_shape[3]; | size_t in_dim2_stride = in_shape[3]; | ||||
| size_t in_dim1_stride = in_shape[2] * in_dim2_stride; | size_t in_dim1_stride = in_shape[2] * in_dim2_stride; | ||||
| size_t in_dim0_stride = in_dim1_stride * in_shape[1]; | size_t in_dim0_stride = in_dim1_stride * in_shape[1]; | ||||
| size_t offset_3 = crop_param->offset_[3]; | |||||
| size_t offset_3 = offset_pad[3]; | |||||
| size_t out_offset = 0; | size_t out_offset = 0; | ||||
| size_t copy_num = out_shape[3]; | size_t copy_num = out_shape[3]; | ||||
| size_t copy_size = copy_num * sizeof(float); | size_t copy_size = copy_num * sizeof(float); | ||||
| size_t in_dim0_end = crop_param->offset_[0] + out_shape[0]; | |||||
| size_t in_dim1_end = crop_param->offset_[1] + out_shape[1]; | |||||
| size_t in_dim2_end = crop_param->offset_[2] + out_shape[2]; | |||||
| for (int i = crop_param->offset_[0]; i < in_dim0_end; ++i) { | |||||
| size_t in_dim0_end = offset_pad[0] + out_shape[0]; | |||||
| size_t in_dim1_end = offset_pad[1] + out_shape[1]; | |||||
| size_t in_dim2_end = offset_pad[2] + out_shape[2]; | |||||
| for (int i = offset_pad[0]; i < in_dim0_end; ++i) { | |||||
| size_t dim0_offset = i * in_dim0_stride + offset_3; | size_t dim0_offset = i * in_dim0_stride + offset_3; | ||||
| for (int j = crop_param->offset_[1]; j < in_dim1_end; ++j) { | |||||
| for (int j = offset_pad[1]; j < in_dim1_end; ++j) { | |||||
| size_t dim1_offset = j * in_dim1_stride + dim0_offset; | size_t dim1_offset = j * in_dim1_stride + dim0_offset; | ||||
| for (int k = crop_param->offset_[2]; k < in_dim2_end; ++k) { | |||||
| for (int k = offset_pad[2]; k < in_dim2_end; ++k) { | |||||
| size_t in_offset = dim1_offset + k * in_dim2_stride; | size_t in_offset = dim1_offset + k * in_dim2_stride; | ||||
| memcpy(output + out_offset, input + in_offset, copy_size); | memcpy(output + out_offset, input + in_offset, copy_size); | ||||
| out_offset += copy_num; | out_offset += copy_num; | ||||
| @@ -23,8 +23,11 @@ struct CropParameter { | |||||
| OpParameter op_parameter_; | OpParameter op_parameter_; | ||||
| int64_t offset_[CROP_OFFSET_MAX_SIZE]; | int64_t offset_[CROP_OFFSET_MAX_SIZE]; | ||||
| int64_t axis_; | int64_t axis_; | ||||
| int32_t thread_id_; | |||||
| }; | }; | ||||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param); | void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param); | ||||
| void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, | |||||
| CropParameter *crop_param); | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_CROP_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_CROP_H_ | ||||
| @@ -15,7 +15,9 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/opclib/fp32/slice.h" | #include "src/runtime/kernel/arm/opclib/fp32/slice.h" | ||||
| #include <string.h> | |||||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | #include "src/runtime/kernel/arm/opclib/op_base.h" | ||||
| #include "src/runtime/kernel/arm/opclib/errorcode.h" | |||||
| void PadSliceParameterTo4D(SliceParameter *param) { | void PadSliceParameterTo4D(SliceParameter *param) { | ||||
| int32_t begin[DIMENSION_4D]; | int32_t begin[DIMENSION_4D]; | ||||
| @@ -25,7 +27,7 @@ void PadSliceParameterTo4D(SliceParameter *param) { | |||||
| for (int32_t i = 0; i < param->param_length_; ++i) { | for (int32_t i = 0; i < param->param_length_; ++i) { | ||||
| begin[i] = param->begin_[i]; | begin[i] = param->begin_[i]; | ||||
| end[i] = param->end_[i]; | end[i] = param->end_[i]; | ||||
| slice_size[i] = param->size_[i]; | |||||
| slice_size[i] = param->size_[i] < 0 ? param->shape_[i] - begin[i] : param->size_[i]; | |||||
| data_shape[i] = param->shape_[i]; | data_shape[i] = param->shape_[i]; | ||||
| } | } | ||||
| int32_t real_index = param->param_length_ - 1; | int32_t real_index = param->param_length_ - 1; | ||||
| @@ -45,36 +47,54 @@ void PadSliceParameterTo4D(SliceParameter *param) { | |||||
| param->param_length_ = DIMENSION_4D; | param->param_length_ = DIMENSION_4D; | ||||
| } | } | ||||
| int DoSlice(const float *input, SliceParameter *param, float *output) { | |||||
| if (param->param_length_ > DIMENSION_4D) { | |||||
| return -1; | |||||
| } | |||||
| for (int i = 0; i < param->param_length_; ++i) { | |||||
| if (param->size_[i] < 0) { | |||||
| param->size_[i] = param->shape_[i] - param->begin_[i]; | |||||
| void DoSlice(const float *input, float *output, SliceParameter *param) { | |||||
| int32_t out_dim1 = param->size_[1]; | |||||
| int32_t out_dim2 = param->size_[2]; | |||||
| int32_t out_dim3 = param->size_[3]; | |||||
| size_t out_stride2 = out_dim3; | |||||
| size_t out_stride1 = out_stride2 * out_dim2; | |||||
| size_t out_stride0 = out_stride1 * out_dim1; | |||||
| size_t count_per_thread = UP_DIV(out_dim1, param->op_parameter_.thread_num_); | |||||
| int thread_id = param->thread_id_; | |||||
| size_t thread_stride = thread_id * count_per_thread; | |||||
| size_t copy_size = param->size_[3] * sizeof(float); | |||||
| size_t in_stride2 = param->shape_[3]; | |||||
| size_t in_stride1 = param->shape_[2] * in_stride2; | |||||
| size_t in_stride0 = param->shape_[1] * in_stride1; | |||||
| for (int i = 0; i < param->size_[0]; ++i) { | |||||
| size_t out_offset0 = i * out_stride0; | |||||
| size_t in_offset0 = (i + param->begin_[0]) * in_stride0 + param->begin_[3]; | |||||
| for (size_t j = 0; j < count_per_thread; ++j) { | |||||
| size_t k = j + thread_stride; | |||||
| if (k >= out_dim1) { | |||||
| break; | |||||
| } | |||||
| size_t out_offset1 = k * out_stride1 + out_offset0; | |||||
| size_t in_offset1 = (k + param->begin_[1]) * in_stride1 + in_offset0; | |||||
| for (int l = 0; l < out_dim2; ++l) { | |||||
| size_t out_offset = out_offset1 + l * out_stride2; | |||||
| size_t in_offset = in_offset1 + (l + param->begin_[2]) * in_stride2; | |||||
| memcpy(output + out_offset, input + in_offset, copy_size); | |||||
| } | |||||
| } | } | ||||
| param->end_[i] = param->begin_[i] + param->size_[i]; | |||||
| } | } | ||||
| } | |||||
| if (param->param_length_ < DIMENSION_4D) { | |||||
| PadSliceParameterTo4D(param); | |||||
| } | |||||
| size_t dim_offset[DIMENSION_4D - 1]; | |||||
| dim_offset[2] = param->shape_[3]; | |||||
| dim_offset[1] = dim_offset[2] * param->shape_[2]; | |||||
| dim_offset[0] = dim_offset[1] * param->shape_[1]; | |||||
| size_t output_index = 0; | |||||
| void DoSliceNoParallel(const float *input, float *output, SliceParameter *param) { | |||||
| size_t copy_size = param->size_[3] * sizeof(float); | |||||
| size_t in_stride2 = param->shape_[3]; | |||||
| size_t in_stride1 = param->shape_[2] * in_stride2; | |||||
| size_t in_stride0 = param->shape_[1] * in_stride1; | |||||
| size_t out_offset = 0; | |||||
| for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) { | for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) { | ||||
| for (int32_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) { | |||||
| size_t in_offset0 = dim0 * in_stride0 + param->begin_[3]; | |||||
| for (size_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) { | |||||
| size_t in_offset1 = dim1 * in_stride1 + in_offset0; | |||||
| for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) { | for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) { | ||||
| for (int32_t dim3 = param->begin_[3]; dim3 < param->end_[3]; ++dim3) { | |||||
| output[output_index++] = *(input + dim0 * dim_offset[0] | |||||
| + dim1 * dim_offset[1] + dim2 * dim_offset[2] + dim3); | |||||
| } | |||||
| size_t in_offset = in_offset1 + dim2 * in_stride2; | |||||
| memcpy(output + out_offset, input + in_offset, copy_size); | |||||
| out_offset += param->size_[3]; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return 0; | |||||
| } | } | ||||
| @@ -26,9 +26,11 @@ struct SliceParameter { | |||||
| int32_t size_[SLICE_SHAPE_MAX_SIZE]; | int32_t size_[SLICE_SHAPE_MAX_SIZE]; | ||||
| int32_t shape_[SLICE_SHAPE_MAX_SIZE]; | int32_t shape_[SLICE_SHAPE_MAX_SIZE]; | ||||
| int32_t param_length_; | int32_t param_length_; | ||||
| int32_t thread_id_; | |||||
| }; | }; | ||||
| int DoSlice(const float *input, SliceParameter *param, float *output); | |||||
| void PadSliceParameterTo4D(SliceParameter *param); | |||||
| void DoSlice(const float *input, float *output, SliceParameter *param); | |||||
| void DoSliceNoParallel(const float *input, float *output, SliceParameter *param); | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_SLICE_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_SLICE_H_ | ||||
| @@ -29,6 +29,11 @@ STATUS TfliteArgmaxParser::Parse(const std::unique_ptr<tflite::OperatorT> &tflit | |||||
| bool quantizedModel) { | bool quantizedModel) { | ||||
| MS_LOG(DEBUG) << "parse TfliteArgmaxParser"; | MS_LOG(DEBUG) << "parse TfliteArgmaxParser"; | ||||
| std::unique_ptr<schema::ArgMaxT> attr(new schema::ArgMaxT()); | std::unique_ptr<schema::ArgMaxT> attr(new schema::ArgMaxT()); | ||||
| // These are caffe attributes, set to default value. | |||||
| attr->axisType = 1; | |||||
| attr->outMaxValue = false; | |||||
| attr->topK = -1; | |||||
| attr->keepDims = false; | |||||
| if (op != nullptr) { | if (op != nullptr) { | ||||
| op->primitive = std::make_unique<schema::PrimitiveT>(); | op->primitive = std::make_unique<schema::PrimitiveT>(); | ||||