| @@ -64,11 +64,17 @@ class MS_API Context { | |||
| /// \brief Destructor of MindSpore Lite Context. | |||
| virtual ~Context(); | |||
| void InferShapeInterrupt() { | |||
| infer_shape_interrupt_ = true; | |||
| } | |||
| public: | |||
| DeviceContext device_ctx_{DT_CPU}; | |||
| int thread_num_ = 2; /**< thread number config for thread pool */ | |||
| std::shared_ptr<Allocator> allocator = nullptr; | |||
| CpuBindMode cpu_bind_mode_ = MID_CPU; | |||
| bool infer_shape_interrupt_ = false; | |||
| bool running_ = false; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_INCLUDE_CONTEXT_H_ | |||
| @@ -48,8 +48,11 @@ constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution operator. | |||
| /* Tensor error code, range: [-401,-500] */ | |||
| constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */ | |||
| /* InferShape error code, range: [-501,-600] */ | |||
| constexpr int RET_INFER_ERR = -501; /**< Failed to infer shape. */ | |||
| constexpr int RET_INFER_INVALID = -502; /**< Invalid to infer shape before runtime. */ | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_INCLUDE_ERRORCODE_H_ | |||
| @@ -37,11 +37,6 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten | |||
| kernel::LiteKernelUtil::InitTensorRefCount(kernels); | |||
| for (auto *kernel : kernels) { | |||
| MS_ASSERT(nullptr != kernel); | |||
| auto &outputs = kernel->GetOutputs(); | |||
| for (auto *output : outputs) { | |||
| MS_ASSERT(nullptr != output); | |||
| output->MallocData(); | |||
| } | |||
| session::CallBackParam callbackParam; | |||
| callbackParam.name_callback_param = kernel->Name(); | |||
| callbackParam.type_callback_param = kernel->type_str(); | |||
| @@ -45,7 +45,7 @@ LiteKernel *KernelFactory::GetKernel(const std::vector<tensor::Tensor *> &inputs | |||
| } | |||
| auto creator = KernelRegistry::GetInstance()->GetCreator(key); | |||
| if (creator != nullptr) { | |||
| auto kernel = creator(inputs, outputs, parameter, ctx, key); | |||
| auto kernel = creator(inputs, outputs, parameter, ctx, key, primitive); | |||
| return kernel; | |||
| } | |||
| return nullptr; | |||
| @@ -45,7 +45,6 @@ class KernelRegistry { | |||
| int device_type_length_; | |||
| int data_type_length_; | |||
| int op_type_length_; | |||
| std::mutex lock_; | |||
| }; | |||
| class KernelRegistrar { | |||
| @@ -25,6 +25,7 @@ | |||
| #include "include/context.h" | |||
| #include "src/ir/tensor.h" | |||
| #include "src/ops/ops.h" | |||
| #include "include/errorcode.h" | |||
| #ifdef ENABLE_FP16 | |||
| using FLOAT_t = float16_t; | |||
| @@ -34,6 +35,8 @@ using FLOAT_t = float; | |||
| // using mindspore::kernel::AddressPtr; | |||
| namespace mindspore::kernel { | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| enum KERNEL_ARCH { kCPU, kGPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU }; | |||
| struct KernelKey { | |||
| KERNEL_ARCH arch; | |||
| @@ -55,15 +58,30 @@ class LiteKernel { | |||
| public: | |||
| LiteKernel() = default; | |||
| explicit LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false), primitive_(primitive), | |||
| context_(ctx) { | |||
| this->in_kernel_.clear(); | |||
| this->out_kernel_.clear(); | |||
| } | |||
| virtual ~LiteKernel() { delete opParameter; } | |||
| virtual int Prepare() { return -1; } | |||
| virtual int Prepare() { | |||
| if (primitive_ != nullptr && !primitive_->GetInferFlag()) { | |||
| (const_cast<lite::Primitive *>(primitive_))->InferShape(inputs_, outputs_); | |||
| } | |||
| if (need_reinit) { | |||
| Init(); | |||
| } | |||
| auto &outputs = this->GetOutputs(); | |||
| for (auto *output : outputs) { | |||
| MS_ASSERT(output != nullptr); | |||
| output->MallocData(); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| virtual int Init() { return -1; } | |||
| virtual int ReSize() { return -1; } | |||
| virtual int Run() { return -1; } | |||
| @@ -103,16 +121,23 @@ class LiteKernel { | |||
| void set_desc(const KernelKey kernel_key) { desc = kernel_key; } | |||
| void SetNeedReInit() { | |||
| need_reinit = true; | |||
| } | |||
| protected: | |||
| KernelKey desc; | |||
| std::string name; | |||
| OpParameter *opParameter = nullptr; | |||
| const lite::Primitive *primitive_; | |||
| const lite::Context *context_; | |||
| // tensor will free in ~lite_session() | |||
| std::vector<lite::tensor::Tensor *> inputs_; | |||
| std::vector<lite::tensor::Tensor *> outputs_; | |||
| std::vector<LiteKernel *> in_kernel_; | |||
| std::vector<LiteKernel *> out_kernel_; | |||
| bool train_mode; | |||
| bool need_reinit = false; | |||
| }; | |||
| class SubGraphKernel : public LiteKernel { | |||
| @@ -121,8 +146,9 @@ class SubGraphKernel : public LiteKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| const std::vector<kernel::LiteKernel *> &inKernels, | |||
| const std::vector<kernel::LiteKernel *> &outKernels, | |||
| const std::vector<kernel::LiteKernel *> &nodes) | |||
| : LiteKernel(nullptr, inputs, outputs), | |||
| const std::vector<kernel::LiteKernel *> &nodes, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(nullptr, inputs, outputs, ctx, primitive), | |||
| inputs_(inputs), | |||
| outputs_(outputs), | |||
| inkernels_(inKernels), | |||
| @@ -144,7 +170,7 @@ class SubGraphKernel : public LiteKernel { | |||
| typedef LiteKernel *(*KernelCreator)(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | |||
| const lite::Context *ctx, const KernelKey &desc); | |||
| const lite::Context *ctx, const KernelKey &desc, const lite::Primitive *primitive); | |||
| class LiteKernelUtil { | |||
| public: | |||
| @@ -168,6 +168,7 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const { | |||
| int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) { | |||
| MS_EXCEPTION_IF_NULL(this->context_); | |||
| SetMaxWokerNum(context_->thread_num_); | |||
| context_->running_ = true; | |||
| Executor executor; | |||
| if (before == nullptr && after == nullptr) { | |||
| return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get()); | |||
| @@ -40,7 +40,7 @@ int Cast::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor:: | |||
| MS_LOG(ERROR) << "Unsupport input data type " << input->data_type(); | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (cast_prim->dstT() != kNumberTypeFloat || cast_prim->dstT() != kNumberTypeFloat32) { | |||
| if (cast_prim->dstT() != kNumberTypeFloat && cast_prim->dstT() != kNumberTypeFloat32) { | |||
| MS_LOG(ERROR) << "Invalid output datatype " << cast_prim->dstT(); | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| @@ -45,12 +45,15 @@ class Primitive { | |||
| static Primitive *CreatePrimitive(schema::Primitive *primitive); | |||
| virtual ~Primitive() {} | |||
| const schema::Primitive *Value() const { return this->primitive; } | |||
| const bool GetInferFlag() const { return this->infer_flag_; } | |||
| void SetInferFlag(bool flag) { this->infer_flag_ = flag; } | |||
| schema::PrimitiveType Type() const { return this->primitive->value_type(); } | |||
| const void *Attribute() const { return this->primitive->value(); } | |||
| virtual int InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_); | |||
| protected: | |||
| schema::Primitive *primitive; | |||
| bool infer_flag_ = true; | |||
| }; | |||
| class Conv2D : public Primitive { | |||
| @@ -34,11 +34,11 @@ int Reshape::CalNewShape(const tensor::Tensor *in_tensor, std::vector<int> *out_ | |||
| inferIndex = i; | |||
| } else { | |||
| MS_LOG(ERROR) << "output shape should has no more than one dim which need infer"; | |||
| return RET_ERROR; | |||
| return RET_INFER_ERR; | |||
| } | |||
| } else if (out_shape->at(i) < 0) { | |||
| MS_LOG(ERROR) << "output shape dim should be non-negative"; | |||
| return RET_ERROR; | |||
| return RET_INFER_ERR; | |||
| } else if (out_shape->at(i) == 0) { | |||
| out_shape->at(i) = in_tensor->shape().at(i); | |||
| out_shapeSize *= out_shape->at(i); | |||
| @@ -49,7 +49,7 @@ int Reshape::CalNewShape(const tensor::Tensor *in_tensor, std::vector<int> *out_ | |||
| if (inferIndex == -1 && out_shapeSize != in_shape_size) { | |||
| MS_LOG(ERROR) << "output shapeSize: " << out_shapeSize << " should be equal to input shapeSize: " << in_shape_size; | |||
| return RET_ERROR; | |||
| return RET_INFER_ERR; | |||
| } | |||
| if (inferIndex != -1) { | |||
| out_shape->at(inferIndex) = in_shape_size / out_shapeSize; | |||
| @@ -88,7 +88,11 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso | |||
| std::vector<int> out_shape; | |||
| if (inputs_.size() == kDoubleNum) { | |||
| auto shape_tensor = inputs_.at(1); | |||
| size_t shape_size = shape_tensor->ElementsNum(); | |||
| if (shape_tensor->Data() == nullptr) { | |||
| MS_LOG(INFO) << "Do infer shape in runtime."; | |||
| return RET_INFER_INVALID; | |||
| } | |||
| size_t shape_size = shape_tensor->shape().size(); | |||
| switch (shape_tensor->data_type()) { | |||
| case kNumberTypeInt8: { | |||
| auto data = reinterpret_cast<int8_t *>(shape_tensor->Data()); | |||
| @@ -108,13 +112,14 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso | |||
| } break; | |||
| default: { | |||
| MS_LOG(ERROR) << "Reshape weight tensor has unsupported dataType: " << shape_tensor->data_type(); | |||
| return RET_ERROR; | |||
| return RET_INFER_ERR; | |||
| } | |||
| } | |||
| } else if (inputs_.size() == kSingleNum) { | |||
| std::copy(reshape_prim->shape()->begin(), reshape_prim->shape()->end(), std::back_inserter(out_shape)); | |||
| } else { | |||
| MS_LOG(ERROR) << "inputs tensor size invalid."; | |||
| return RET_INFER_ERR; | |||
| } | |||
| auto ret = CalNewShape(inputs_.front(), &out_shape); | |||
| @@ -24,14 +24,18 @@ | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_PARAM_INVALID; | |||
| using mindspore::lite::RET_FORMAT_ERR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::lite::RET_PARAM_INVALID; | |||
| using mindspore::schema::PrimitiveType_ArgMax; | |||
| using mindspore::schema::PrimitiveType_ArgMin; | |||
| namespace mindspore::kernel { | |||
| int ArgMinMaxBaseCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter); | |||
| switch (opParameter->type_) { | |||
| case PrimitiveType_ArgMax: | |||
| @@ -44,6 +48,7 @@ int ArgMinMaxBaseCPUKernel::Init() { | |||
| MS_LOG(ERROR) << "Unexpected type " << opParameter->type_; | |||
| return RET_ERROR; | |||
| } | |||
| auto in_shape = inputs_.at(0)->shape(); | |||
| auto dims_size = in_shape.size(); | |||
| int axis = param->axis_ < 0 ? param->axis_ + dims_size : param->axis_; | |||
| @@ -56,9 +61,9 @@ int ArgMinMaxBaseCPUKernel::Init() { | |||
| param->topk_ = MSMIN(param->topk_, in_shape[axis]); | |||
| if (param->topk_ > 1) { | |||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||
| param->arg_elements_ | |||
| = reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis])); | |||
| data_from_allocator_ = true; | |||
| param->arg_elements_ = | |||
| reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis])); | |||
| data_from_allocator_ = true; | |||
| } else { | |||
| param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis])); | |||
| } | |||
| @@ -98,12 +103,12 @@ void ArgMinMaxBaseCPUKernel::FreeTmpMemory() { | |||
| kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| auto kernel = new (std::nothrow) ArgMinMaxInt8CPUKernel(op_parameter, inputs, outputs, ctx); | |||
| auto kernel = new (std::nothrow) ArgMinMaxInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new ArgMinMaxInt8CPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -122,12 +127,12 @@ kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor | |||
| kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx); | |||
| auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new ArgMinMaxCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -24,8 +24,9 @@ namespace mindspore::kernel { | |||
| class ArgMinMaxBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), context_(ctx), data_from_allocator_(false) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), data_from_allocator_(false) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| } | |||
| @@ -40,7 +41,6 @@ class ArgMinMaxBaseCPUKernel : public LiteKernel { | |||
| void FreeTmpMemory(); | |||
| private: | |||
| const lite::Context *context_; | |||
| bool data_from_allocator_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -46,13 +46,13 @@ int BatchToSpaceBaseCPUKernel::Init() { | |||
| kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace); | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) BatchToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) BatchToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -71,13 +71,13 @@ kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::ten | |||
| kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace); | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||
| class BatchToSpaceBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| BatchToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| } | |||
| @@ -30,21 +30,24 @@ using mindspore::schema::PrimitiveType_Concat; | |||
| namespace mindspore::kernel { | |||
| int ConcatBaseCPUKernel::Init() { | |||
| auto axis = concat_param_->axis_; | |||
| axis_ = axis >= 0 ? axis : inputs_.front()->shape().size() + axis; | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ : inputs_.front()->shape().size() + concat_param_->axis_; | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | |||
| auto *kernel = new(std::nothrow) ConcatInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) ConcatInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -60,15 +63,15 @@ kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::T | |||
| } | |||
| kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | |||
| auto *kernel = new(std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -84,15 +87,15 @@ kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor:: | |||
| } | |||
| kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | |||
| auto *kernel = new(std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -111,4 +114,3 @@ REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, CpuConcatInt8KernelCreat | |||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Concat, CpuConcatInt32KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Concat, CpuConcatFp32KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class ConcatBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| ConcatBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| concat_param_ = reinterpret_cast<ConcatParameter *>(opParameter); | |||
| } | |||
| @@ -41,6 +42,7 @@ class ConcatBaseCPUKernel : public LiteKernel { | |||
| int ReSize() override { return 0; } | |||
| int Run() override { return 0; } | |||
| protected: | |||
| int thread_count_; | |||
| int axis_; | |||
| @@ -50,4 +52,3 @@ class ConcatBaseCPUKernel : public LiteKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CONCAT_BASE_H_ | |||
| @@ -37,8 +37,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| conv_param_ = reinterpret_cast<ConvParameter *>(opParameter); | |||
| } | |||
| @@ -31,15 +31,15 @@ namespace mindspore::kernel { | |||
| int CropBaseCPUKernel::Init() { return RET_OK; } | |||
| kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | |||
| auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new CropCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -55,15 +55,15 @@ kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Ten | |||
| } | |||
| kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | |||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new CropCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -79,15 +79,15 @@ kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Te | |||
| } | |||
| kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | |||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new CropCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||
| class CropBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| } | |||
| ~CropBaseCPUKernel() = default; | |||
| @@ -39,7 +40,6 @@ class CropBaseCPUKernel : public LiteKernel { | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -25,13 +25,17 @@ | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_PARAM_INVALID; | |||
| using mindspore::lite::RET_FORMAT_ERR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::lite::RET_PARAM_INVALID; | |||
| using mindspore::schema::PrimitiveType_DepthToSpace; | |||
| namespace mindspore::kernel { | |||
| int DepthToSpaceBaseCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| if (inputs_[0]->GetFormat() != schema::Format_NHWC) { | |||
| MS_LOG(ERROR) << "depth_to_space only support NHWC now!"; | |||
| return RET_FORMAT_ERR; | |||
| @@ -62,13 +66,13 @@ int DepthToSpaceBaseCPUKernel::Init() { | |||
| kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace); | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) DepthToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) DepthToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -87,13 +91,13 @@ kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::ten | |||
| kernel::LiteKernel *CpuDepthToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace); | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) DepthToSpaceCPUKernel(op_parameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) DepthToSpaceCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new DepthToSpaceCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||
| class DepthToSpaceBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| } | |||
| @@ -35,10 +35,11 @@ int FullconnectionBaseCPUKernel::Init() { | |||
| kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | |||
| auto kernel = new (std::nothrow) FullconnectionInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto kernel = new (std::nothrow) FullconnectionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (!kernel) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| @@ -56,10 +57,11 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::t | |||
| kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | |||
| auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (!kernel) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class FullconnectionBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| FullconnectionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| fc_param_ = reinterpret_cast<MatMulParameter *>(opParameter); | |||
| } | |||
| ~FullconnectionBaseCPUKernel() = default; | |||
| @@ -28,7 +28,8 @@ using mindspore::schema::PrimitiveType_MatMul; | |||
| namespace mindspore::kernel { | |||
| kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||
| const lite::Context *ctx, const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | |||
| auto input_tensor = inputs.at(kInputIndex); | |||
| @@ -37,7 +38,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso | |||
| switch (data_type) { | |||
| case kNumberTypeInt8: | |||
| case kNumberTypeUInt8: { | |||
| kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (!kernel) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| @@ -46,7 +47,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso | |||
| } | |||
| case kNumberTypeFloat32: { | |||
| kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (!kernel) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class MatmulBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| params_ = reinterpret_cast<MatMulParameter *>(opParameter); | |||
| } | |||
| ~MatmulBaseCPUKernel() = default; | |||
| @@ -31,10 +31,10 @@ namespace mindspore::kernel { | |||
| kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Pad); | |||
| auto *kernel = new (std::nothrow) PadInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) PadInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new PadCPUKernel failed."; | |||
| return nullptr; | |||
| @@ -52,10 +52,10 @@ kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tens | |||
| kernel::LiteKernel *CpuPadFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Pad); | |||
| auto *kernel = new (std::nothrow) PadCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) PadCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new PadCPUKernel failed."; | |||
| return nullptr; | |||
| @@ -56,6 +56,10 @@ void PoolingBaseCPUKernel::FreeQuantParam() { | |||
| } | |||
| int PoolingBaseCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| MS_ASSERT(inputs_.size() == 1); | |||
| MS_ASSERT(outputs_.size() == 1); | |||
| pooling_param_->thread_num_ = thread_count_; | |||
| @@ -78,13 +82,13 @@ int PoolingBaseCPUKernel::Init() { | |||
| kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Pooling); | |||
| auto *kernel = new (std::nothrow) PoolingInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) PoolingInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new PoolingInt8CPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -102,13 +106,13 @@ kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor:: | |||
| kernel::LiteKernel *CpuPoolingFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Pooling); | |||
| auto *kernel = new (std::nothrow) PoolingCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) PoolingCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new PoolingCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -29,8 +29,9 @@ namespace mindspore::kernel { | |||
| class PoolingBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| pooling_param_ = reinterpret_cast<PoolingParameter *>(opParameter); | |||
| } | |||
| ~PoolingBaseCPUKernel() = default; | |||
| @@ -32,13 +32,13 @@ int PreluBaseCPUKernel::Init() {return RET_OK;} | |||
| kernel::LiteKernel *CpuPreluInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Prelu); | |||
| auto *kernel = new(std::nothrow) PreluInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new(std::nothrow) PreluInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new PreluCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class PreluBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| PreluBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| prelu_param_ = reinterpret_cast<PreluParameter *>(opParameter); | |||
| } | |||
| @@ -39,6 +39,11 @@ int PriorBoxCPUKernel::Init() { | |||
| MS_LOG(ERROR) << "PriorBoxParameter nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| MS_ASSERT(inputs_.size() == kInputNum); | |||
| MS_ASSERT(outputs_.size() == kOutputNum); | |||
| @@ -164,7 +169,7 @@ int PriorBoxCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| @@ -173,7 +178,7 @@ kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Ten | |||
| MS_LOG(ERROR) << "PriorBox invalid desc type " << desc.type; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) PriorBoxCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) PriorBoxCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new PriorBoxCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class PriorBoxCPUKernel : public LiteKernel { | |||
| public: | |||
| PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(opParameter); | |||
| } | |||
| ~PriorBoxCPUKernel() = default; | |||
| @@ -34,6 +34,10 @@ constexpr int kQuantDTypeCastOutputNum = 1; | |||
| } // namespace | |||
| int QuantDTypeCastCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| if (inputs_.size() != 1) { | |||
| MS_LOG(ERROR) << "inputs number should be 1, but " << inputs_.size() << " is given."; | |||
| return RET_ERROR; | |||
| @@ -83,8 +87,8 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) { | |||
| ret = DequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, | |||
| num_unit_thread); | |||
| } else { | |||
| ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale, | |||
| quant_arg.zeroPoint, num_unit_thread); | |||
| ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, | |||
| num_unit_thread); | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]"; | |||
| @@ -124,12 +128,13 @@ int QuantDTypeCastCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) QuantDTypeCastCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) QuantDTypeCastCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new QuantDTypeCastCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -24,8 +24,9 @@ namespace mindspore::kernel { | |||
| class QuantDTypeCastCPUKernel : public LiteKernel { | |||
| public: | |||
| QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {} | |||
| ~QuantDTypeCastCPUKernel() = default; | |||
| int Init() override; | |||
| @@ -36,13 +36,13 @@ int ReshapeBaseCPUKernel::Init() { | |||
| kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | |||
| auto *kernel = new (std::nothrow) ReshapeInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) ReshapeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new ReshapeInt8CPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -60,13 +60,13 @@ kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor:: | |||
| kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | |||
| auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new ReshapeCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -84,13 +84,13 @@ kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor: | |||
| kernel::LiteKernel *CpuReshapeFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | |||
| auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new ReshapeCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||
| class ReshapeBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| reshape_param_ = reinterpret_cast<ReshapeParameter *>(opParameter); | |||
| } | |||
| ~ReshapeBaseCPUKernel() = default; | |||
| @@ -45,4 +46,3 @@ class ReshapeBaseCPUKernel : public LiteKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_ | |||
| @@ -53,13 +53,13 @@ int SoftmaxBaseCPUKernel::Init() { | |||
| kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax); | |||
| auto *kernel = new (std::nothrow) SoftmaxInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| SoftmaxInt8CPUKernel *kernel = new (std::nothrow) SoftmaxInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -77,13 +77,13 @@ kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor:: | |||
| kernel::LiteKernel *CpuSoftmaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax); | |||
| auto *kernel = new (std::nothrow) SoftmaxCPUKernel(opParameter, inputs, outputs, ctx); | |||
| SoftmaxCPUKernel *kernel = new (std::nothrow) SoftmaxCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||
| class SoftmaxBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| softmax_param_ = reinterpret_cast<SoftmaxParameter *>(opParameter); | |||
| } | |||
| @@ -61,13 +61,13 @@ int SplitBaseCPUKernel::Init() { | |||
| kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Split); | |||
| auto *kernel = new (std::nothrow) SplitInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) SplitInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -85,13 +85,13 @@ kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Te | |||
| kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Split); | |||
| auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -109,13 +109,13 @@ kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::T | |||
| kernel::LiteKernel *CpuSplitFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Split); | |||
| auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||
| class SplitBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| param = reinterpret_cast<SplitParameter *>(opParameter); | |||
| } | |||
| ~SplitBaseCPUKernel() = default; | |||
| @@ -32,13 +32,13 @@ int SqueezeBaseCPUKernel::Init() { return RET_OK; } | |||
| kernel::LiteKernel *CpuSqueezeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Squeeze); | |||
| auto *kernel = new (std::nothrow) SqueezeInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) SqueezeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new SqueezeCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class SqueezeBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| } | |||
| @@ -42,12 +42,18 @@ int StridedSliceCPUKernel::Init() { | |||
| int StridedSliceCPUKernel::ReSize() { return 0; } | |||
| int StridedSliceCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto input = inputs_.at(0); | |||
| auto output = outputs_.at(0); | |||
| MS_ASSERT(input); | |||
| MS_ASSERT(output); | |||
| auto ret = DoStridedSlice(input->Data(), output->Data(), reinterpret_cast<StridedSliceParameter *>(opParameter)); | |||
| ret = DoStridedSlice(input->Data(), output->Data(), reinterpret_cast<StridedSliceParameter *>(opParameter)); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "StridedSlice error error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -58,13 +64,13 @@ int StridedSliceCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_StridedSlice); | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "opParameter null pointer dereferencing."; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) StridedSliceCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) StridedSliceCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "New kernel fails."; | |||
| return nullptr; | |||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||
| class StridedSliceCPUKernel : public LiteKernel { | |||
| public: | |||
| StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {} | |||
| ~StridedSliceCPUKernel() override = default; | |||
| int Init() override; | |||
| @@ -183,10 +183,14 @@ void Convolution3x3FP16CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int Convolution3x3FP16CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| return ret; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| @@ -228,7 +232,7 @@ int Convolution3x3FP16CPUKernel::ReSize() { | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| return ret; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| @@ -256,7 +260,11 @@ int Convolution3x3Fp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) | |||
| } | |||
| int Convolution3x3FP16CPUKernel::Run() { | |||
| // cast fp32 input data to fp16 | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto input_tensor = inputs_.at(kInputIndex); | |||
| auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data()); | |||
| for (int i = 0; i < input_tensor->ElementsNum(); ++i) { | |||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||
| class Convolution3x3FP16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| Convolution3x3FP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~Convolution3x3FP16CPUKernel() override { | |||
| if (fp16_input_ != nullptr) { | |||
| free(fp16_input_); | |||
| @@ -78,4 +79,3 @@ void ProcessFilterFp16(float16_t *origin_weight, float16_t *dst_weight, ConvPara | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_3x3_FP16_H_ | |||
| @@ -85,14 +85,20 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| // init sliding_ window param | |||
| sliding_ = new SlidingWindowParam; | |||
| InitSlidingParam(sliding_, conv_param_, C8NUM); | |||
| auto ret = InitWeightBias(); | |||
| ret = InitWeightBias(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed."; | |||
| return RET_ERROR; | |||
| @@ -138,6 +144,11 @@ int ConvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||
| return RET_ERROR; | |||
| @@ -149,7 +160,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() { | |||
| PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_, | |||
| conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | |||
| auto ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_); | |||
| ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -165,10 +176,10 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() { | |||
| kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D); | |||
| auto kernel = new (std::nothrow) ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto kernel = new (std::nothrow) ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionDepthwiseFp16CPUKernel() override { | |||
| delete sliding_; | |||
| free(packed_weight_); | |||
| @@ -154,10 +154,14 @@ void ConvolutionFP16CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionFP16CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| @@ -193,7 +197,7 @@ int ConvolutionFP16CPUKernel::ReSize() { | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| return ret; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| @@ -220,7 +224,11 @@ int ConvolutionFp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int ConvolutionFP16CPUKernel::Run() { | |||
| // cast fp32 input data to fp16 | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto input_tensor = inputs_.at(kInputIndex); | |||
| auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data()); | |||
| for (int i = 0; i < input_tensor->ElementsNum(); ++i) { | |||
| @@ -251,7 +259,7 @@ int ConvolutionFP16CPUKernel::Run() { | |||
| kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); | |||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | |||
| @@ -267,7 +275,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten | |||
| conv_param->output_w_ = outputs.front()->Width(); | |||
| kernel::LiteKernel *kernel = nullptr; | |||
| if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { | |||
| kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| } else { | |||
| bool use_winograd = false; | |||
| int out_unit; | |||
| @@ -275,7 +283,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten | |||
| OutputTransformUnitFunc output_trans_func = nullptr; | |||
| CheckIfUseWinograd(&use_winograd, &out_unit, conv_param, input_trans_func, output_trans_func); | |||
| if (kernel_h != 1 && kernel_w != 1 && !use_winograd) { | |||
| kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| } | |||
| } | |||
| if (kernel == nullptr) { | |||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionFP16CPUKernel() override { | |||
| if (fp16_input_ != nullptr) { | |||
| free(fp16_input_); | |||
| @@ -99,12 +99,19 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||
| } | |||
| int DeconvolutionDepthwiseFp16CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| sliding_ = new SlidingWindowParam; | |||
| InitSlideParam(); | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| auto ret = InitWeightBias(); | |||
| ret = InitWeightBias(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed."; | |||
| return RET_ERROR; | |||
| @@ -150,6 +157,11 @@ int DeconvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||
| return RET_ERROR; | |||
| @@ -161,7 +173,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||
| PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_, | |||
| conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | |||
| auto ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_); | |||
| ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -176,10 +188,10 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||
| kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D); | |||
| auto kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||
| class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~DeconvolutionDepthwiseFp16CPUKernel() override { | |||
| delete sliding_; | |||
| free(packed_weight_); | |||
| @@ -19,6 +19,7 @@ | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/ops/ops.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| @@ -78,6 +79,11 @@ int ActivationRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int ActivationCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| int error_code = LiteBackendParallelLaunch(ActivationRun, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | |||
| @@ -89,10 +95,10 @@ int ActivationCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuActivationFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Activation); | |||
| auto *kernel = new (std::nothrow) ActivationCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) ActivationCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||
| class ActivationCPUKernel : public LiteKernel { | |||
| public: | |||
| ActivationCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(param, inputs, outputs), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||
| type_ = (reinterpret_cast<ActivationParameter *>(param))->type_; | |||
| alpha_ = (reinterpret_cast<ActivationParameter *>(param))->alpha_; | |||
| } | |||
| @@ -20,8 +20,8 @@ | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::ActivationGradType_HSWISH; | |||
| @@ -32,8 +32,8 @@ using mindspore::schema::PrimitiveType_ActivationGrad; | |||
| namespace mindspore::kernel { | |||
| int ActivationGradCPUKernel::Init() { | |||
| outputs_[0]->set_shape(inputs_[0]->shape()); | |||
| return RET_OK; | |||
| outputs_[0]->set_shape(inputs_[0]->shape()); | |||
| return RET_OK; | |||
| } | |||
| int ActivationGradCPUKernel::ReSize() { return RET_OK; } | |||
| @@ -58,7 +58,7 @@ int ActivationGradCPUKernel::DoActivation(int task_id) { | |||
| error_code = TanhGrad(yt_addr, input_addr, length, output_addr); | |||
| } else if (type_ == schema::ActivationGradType_HSWISH) { | |||
| error_code = HSwishGrad(yt_addr, input_addr, length, output_addr); | |||
| } else if (type_ == schema::ActivationGradType_HSIGMOID) { | |||
| } else if (type_ == schema::ActivationGradType_HSIGMOID) { | |||
| error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr); | |||
| } else { | |||
| MS_LOG(ERROR) << "Activation type error"; | |||
| @@ -90,17 +90,17 @@ int ActivationGradCPUKernel::Run() { | |||
| } | |||
| kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_ActivationGrad); | |||
| auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs); | |||
| auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_ | |||
| << ", type: " | |||
| MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| } | |||
| return kernel; | |||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||
| class ActivationGradCPUKernel : public LiteKernel { | |||
| public: | |||
| explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(param, inputs, outputs) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(param, inputs, outputs, ctx, primitive) { | |||
| ActivationGradParameter *param_act_grad = reinterpret_cast<ActivationGradParameter *>(param); | |||
| type_ = param_act_grad->type_; | |||
| alpha_ = param_act_grad->alpha_; | |||
| @@ -36,12 +36,9 @@ int AddNLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata); | |||
| return kernel->AddNParallelRun(thread_id); | |||
| } | |||
| } | |||
| } // namespace | |||
| int AddNCPUKernel::Init() { | |||
| elements_num_ = inputs_[0]->ElementsNum(); | |||
| return RET_OK; | |||
| } | |||
| int AddNCPUKernel::Init() { return RET_OK; } | |||
| int AddNCPUKernel::ReSize() { return RET_OK; } | |||
| @@ -58,6 +55,12 @@ int AddNCPUKernel::AddNParallelRun(int thread_id) { | |||
| } | |||
| int AddNCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| elements_num_ = inputs_[0]->ElementsNum(); | |||
| auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data()); | |||
| auto input1_data = reinterpret_cast<float *>(inputs_[1]->Data()); | |||
| auto output_data = reinterpret_cast<float *>(outputs_[0]->Data()); | |||
| @@ -71,7 +74,7 @@ int AddNCPUKernel::Run() { | |||
| in1_addr_ = input0_data; | |||
| in2_addr_ = input1_data; | |||
| out_addr_ = output_data; | |||
| int ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_); | |||
| ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "addn launch fail!ret: " << ret; | |||
| return RET_ERROR; | |||
| @@ -91,7 +94,7 @@ int AddNCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||
| return nullptr; | |||
| @@ -102,7 +105,7 @@ kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Ten | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_AddN); | |||
| op_parameter->thread_num_ = ctx->thread_num_; | |||
| auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs); | |||
| auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new AddNCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -21,18 +21,20 @@ | |||
| #include "src/lite_kernel.h" | |||
| #include "schema/model_generated.h" | |||
| namespace mindspore::kernel { | |||
| class AddNCPUKernel : public LiteKernel { | |||
| public: | |||
| AddNCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~AddNCPUKernel() = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int AddNParallelRun(int thread_id); | |||
| private: | |||
| float *in1_addr_; | |||
| float *in2_addr_; | |||
| @@ -40,7 +40,12 @@ int ArgMinMaxCPUKernel::Init() { | |||
| } | |||
| int ArgMinMaxCPUKernel::Run() { | |||
| auto ret = ArgMinMaxBaseCPUKernel::Run(); | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = ArgMinMaxBaseCPUKernel::Run(); | |||
| ArgMinMaxBaseCPUKernel::FreeTmpMemory(); | |||
| return ret; | |||
| } | |||
| @@ -23,8 +23,9 @@ namespace mindspore::kernel { | |||
| class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel { | |||
| public: | |||
| ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ArgMinMaxCPUKernel() = default; | |||
| @@ -35,4 +36,3 @@ class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARGMINMAX_H_ | |||
| @@ -41,6 +41,10 @@ ArithmeticCPUKernel::~ArithmeticCPUKernel() { | |||
| } | |||
| } | |||
| int ArithmeticCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto element_num = outputs_[0]->ElementsNum(); | |||
| tile_data0_ = new float[element_num]; | |||
| @@ -92,6 +96,11 @@ int ArithmeticsRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int ArithmeticCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| if (arithmeticParameter_->broadcasting_) { | |||
| auto input_data0 = reinterpret_cast<float *>(inputs_[0]->Data()); | |||
| auto input_data1 = reinterpret_cast<float *>(inputs_[1]->Data()); | |||
| @@ -108,9 +117,9 @@ int ArithmeticCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(parameter != nullptr); | |||
| auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx); | |||
| auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_; | |||
| return nullptr; | |||
| @@ -48,8 +48,9 @@ class ArithmeticCPUKernel : public LiteKernel { | |||
| public: | |||
| ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||
| arithmeticParameter_ = reinterpret_cast<ArithmeticParameter *>(parameter); | |||
| switch (parameter->type_) { | |||
| case PrimitiveType_Mul: | |||
| @@ -261,12 +261,13 @@ int ArithmeticGradCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_EXCEPTION_IF_NULL(opParameter); | |||
| if (opParameter == nullptr) { | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs); | |||
| auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| @@ -37,8 +37,9 @@ class ArithmeticGradCPUKernel : public LiteKernel { | |||
| public: | |||
| explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) { | |||
| switch (type()) { | |||
| case PrimitiveType_MulGrad: | |||
| arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape | |||
| @@ -27,6 +27,10 @@ using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| int ArithmeticSelfCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| int ret = ReSize(); | |||
| return ret; | |||
| } | |||
| @@ -68,11 +72,16 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) { | |||
| } | |||
| int ArithmeticSelfCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| auto input_tensor = inputs_.at(0); | |||
| auto out_tensor = outputs_.at(0); | |||
| in_ptr_ = reinterpret_cast<float *>(input_tensor->Data()); | |||
| out_ptr_ = reinterpret_cast<float *>(out_tensor->Data()); | |||
| int ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_); | |||
| ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | |||
| return ret; | |||
| @@ -83,13 +92,14 @@ int ArithmeticSelfCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuArithmeticSelfFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Creator failed, opParameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) ArithmeticSelfCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) ArithmeticSelfCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| @@ -24,9 +24,9 @@ | |||
| #include "schema/model_generated.h" | |||
| #include "include/context.h" | |||
| using mindspore::lite::Context; | |||
| using mindspore::schema::PrimitiveType_Abs; | |||
| using mindspore::schema::PrimitiveType_Ceil; | |||
| using mindspore::schema::PrimitiveType_Cos; | |||
| using mindspore::schema::PrimitiveType_Exp; | |||
| using mindspore::schema::PrimitiveType_Floor; | |||
| @@ -36,7 +36,6 @@ using mindspore::schema::PrimitiveType_Rsqrt; | |||
| using mindspore::schema::PrimitiveType_Sin; | |||
| using mindspore::schema::PrimitiveType_Sqrt; | |||
| using mindspore::schema::PrimitiveType_Square; | |||
| using mindspore::schema::PrimitiveType_Ceil; | |||
| namespace mindspore::kernel { | |||
| class ArithmeticSelfCPUKernel : public LiteKernel { | |||
| @@ -44,8 +43,9 @@ class ArithmeticSelfCPUKernel : public LiteKernel { | |||
| public: | |||
| explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| switch (parameter->type_) { | |||
| case PrimitiveType_Abs: | |||
| arithmeticSelf_run_ = ElementAbs; | |||
| @@ -106,4 +106,3 @@ class ArithmeticSelfCPUKernel : public LiteKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_ | |||
| @@ -28,6 +28,11 @@ int BatchToSpaceCPUKernel::Init() { | |||
| } | |||
| int BatchToSpaceCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = inputs_[0]; | |||
| auto output = outputs_[0]; | |||
| const float *input_data = reinterpret_cast<const float *>(input->Data()); | |||
| @@ -22,8 +22,9 @@ namespace mindspore::kernel { | |||
| class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel { | |||
| public: | |||
| BatchToSpaceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~BatchToSpaceCPUKernel() = default; | |||
| @@ -34,4 +35,3 @@ class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCH_TO_SPACE_H_ | |||
| @@ -53,6 +53,11 @@ int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int BatchnormCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| in_addr_ = reinterpret_cast<float *>(inputs_.at(0)->Data()); | |||
| mean_addr_ = reinterpret_cast<float *>(inputs_.at(1)->Data()); | |||
| var_addr_ = reinterpret_cast<float *>(inputs_.at(2)->Data()); | |||
| @@ -76,10 +81,10 @@ int BatchnormCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm); | |||
| auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new BatchNormCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class BatchnormCPUKernel : public LiteKernel { | |||
| public: | |||
| BatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||
| batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); | |||
| } | |||
| ~BatchnormCPUKernel() override { delete batchnorm_param_; } | |||
| @@ -31,6 +31,11 @@ namespace mindspore::kernel { | |||
| int BiasCPUKernel::ReSize() { return RET_OK; } | |||
| int BiasCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto in = reinterpret_cast<float *>(inputs_.at(0)->Data()); | |||
| auto bias = reinterpret_cast<float *>(inputs_.at(1)->Data()); | |||
| auto out = reinterpret_cast<float *>(outputs_.at(0)->Data()); | |||
| @@ -44,6 +49,10 @@ int BiasCPUKernel::Run() { | |||
| } | |||
| int BiasCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto dims = inputs_[0]->shape(); | |||
| MS_ASSERT(dims.size() <= 5); | |||
| bias_param_->ndim_ = dims.size(); | |||
| @@ -58,10 +67,11 @@ int BiasCPUKernel::Init() { | |||
| kernel::LiteKernel *CpuBiasFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | |||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||
| const lite::Context *ctx, const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(parameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BiasAdd); | |||
| auto kernel = new (std::nothrow) BiasCPUKernel(parameter, inputs, outputs); | |||
| auto kernel = new (std::nothrow) BiasCPUKernel(parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_; | |||
| return nullptr; | |||
| @@ -24,9 +24,10 @@ namespace mindspore::kernel { | |||
| class BiasCPUKernel : public LiteKernel { | |||
| public: | |||
| BiasCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs) { | |||
| bias_param_ = reinterpret_cast<ArithmeticParameter*>(parameter); | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| bias_param_ = reinterpret_cast<ArithmeticParameter *>(parameter); | |||
| } | |||
| ~BiasCPUKernel() override = default; | |||
| @@ -40,4 +41,3 @@ class BiasCPUKernel : public LiteKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_H_ | |||
| @@ -20,12 +20,11 @@ | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_BiasGrad; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_BiasGrad; | |||
| namespace mindspore::kernel { | |||
| int BiasGradCPUKernel::InferShape() { | |||
| @@ -68,10 +67,14 @@ int BiasGradCPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int BiasGradCPUKernel::ReSize() { return 0; } | |||
| int BiasGradCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto in = reinterpret_cast<float *>(inputs_.at(0)->Data()); | |||
| auto out = reinterpret_cast<float *>(outputs_.at(0)->Data()); | |||
| // size_t data_size = inputs_.at(0)->ElementsNum(); | |||
| @@ -91,14 +94,14 @@ int BiasGradCPUKernel::Run() { | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad); | |||
| auto *kernel = new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs); | |||
| auto *kernel = | |||
| new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto ret = kernel->Init(); | |||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||
| class BiasGradCPUKernel : public LiteKernel { | |||
| public: | |||
| explicit BiasGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| bias_param = reinterpret_cast<ArithmeticParameter *>(parameter); | |||
| } | |||
| ~BiasGradCPUKernel() override = default; | |||
| @@ -96,12 +96,12 @@ int BNGradInputCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BNGradInput); | |||
| // parameter->name = opDef.name()->str().data(); | |||
| // parameter->type = opDef.attr_type(); | |||
| auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs); | |||
| auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto ret = kernel->Init(); | |||
| if (RET_OK != ret) { | |||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||
| class BNGradInputCPUKernel : public LiteKernel { | |||
| public: | |||
| explicit BNGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~BNGradInputCPUKernel() override { delete workspace; } | |||
| int Init() override; | |||
| @@ -27,6 +27,10 @@ using mindspore::schema::PrimitiveType_BroadcastTo; | |||
| namespace mindspore::kernel { | |||
| int BroadcastToCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto input_shape = inputs_[0]->shape(); | |||
| for (size_t i = 0; i < input_shape.size(); ++i) { | |||
| shape_info_.input_shape_[i] = input_shape[i]; | |||
| @@ -42,6 +46,11 @@ int BroadcastToCPUKernel::Init() { | |||
| } | |||
| int BroadcastToCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_data = reinterpret_cast<float *>(inputs_.at(0)->Data()); | |||
| auto output_data = reinterpret_cast<float *>(outputs_.at(0)->Data()); | |||
| @@ -51,13 +60,13 @@ int BroadcastToCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BroadcastTo); | |||
| auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs); | |||
| auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -25,18 +25,18 @@ namespace mindspore::kernel { | |||
| class BroadcastToCPUKernel : public LiteKernel { | |||
| public: | |||
| BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~BroadcastToCPUKernel() = default; | |||
| int Init() override; | |||
| int ReSize() override { | |||
| return 0; | |||
| } | |||
| int ReSize() override { return 0; } | |||
| int Run() override; | |||
| private: | |||
| BroadcastShapeInfo shape_info_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BROADCAST_TO_H_ | |||
| @@ -30,9 +30,6 @@ using mindspore::schema::PrimitiveType_Cast; | |||
| namespace mindspore::kernel { | |||
| namespace { | |||
| constexpr int kInputNum = 1; | |||
| constexpr int kOutputNum = 1; | |||
| const std::vector<int> kSupportInputDataType = {kNumberTypeUInt8, kNumberTypeInt32}; | |||
| int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| if (cdata == nullptr) { | |||
| MS_LOG(ERROR) << "input cdata is nullptr!"; | |||
| @@ -44,12 +41,16 @@ int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } // namespace | |||
| int CastCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| data_num_ = inputs_[0]->ElementsNum(); | |||
| if (data_num_ == 0) { | |||
| return RET_OK; | |||
| } | |||
| thread_num_ = MSMIN(thread_num_, data_num_); | |||
| stride_ = UP_DIV(data_num_, thread_num_); | |||
| opParameter->thread_num_ = MSMIN(opParameter->thread_num_, data_num_); | |||
| stride_ = UP_DIV(data_num_, opParameter->thread_num_); | |||
| return RET_OK; | |||
| } | |||
| @@ -77,16 +78,21 @@ int CastCPUKernel::DoCast(int thread_id) { | |||
| } | |||
| int CastCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| if (data_num_ == 0) { | |||
| return RET_OK; | |||
| } | |||
| return LiteBackendParallelLaunch(CastRun, this, thread_num_); | |||
| return LiteBackendParallelLaunch(CastRun, this, opParameter->thread_num_); | |||
| } | |||
| kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| if (opParameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||
| return nullptr; | |||
| @@ -99,7 +105,7 @@ kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Ten | |||
| MS_LOG(ERROR) << "context thread num is 0!"; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx); | |||
| auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new CastCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -23,27 +23,23 @@ namespace mindspore::kernel { | |||
| class CastCPUKernel : public LiteKernel { | |||
| public: | |||
| CastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs) { | |||
| if (ctx != nullptr) { | |||
| thread_num_ = ctx->thread_num_; | |||
| } | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| } | |||
| ~CastCPUKernel() = default; | |||
| int Init() override; | |||
| int ReSize() override { | |||
| return 0; | |||
| }; | |||
| int ReSize() override { return 0; }; | |||
| int Run() override; | |||
| int DoCast(int thread_id); | |||
| private: | |||
| uint32_t thread_num_; | |||
| uint32_t stride_; | |||
| uint32_t data_num_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_ | |||
| @@ -28,44 +28,54 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Concat; | |||
| namespace mindspore::kernel { | |||
| int ConcatCPUKernel::Init() { | |||
| ConcatBaseCPUKernel::Init(); | |||
| schema::Format input0_format = inputs_[0]->GetFormat(); | |||
| bool need_convert_format = false; | |||
| for (size_t i = 1; i < inputs_.size(); ++i) { | |||
| if (inputs_[i]->GetFormat() != input0_format) { | |||
| need_convert_format = true; | |||
| } | |||
| } | |||
| if (!need_convert_format) { | |||
| outputs_[0]->SetFormat(input0_format); | |||
| return RET_OK; | |||
| } | |||
| MS_LOG(ERROR) << "All input format should be the same!"; | |||
| return RET_ERROR; | |||
| int ConcatCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConcatBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| schema::Format input0_format = inputs_[0]->GetFormat(); | |||
| bool need_convert_format = false; | |||
| for (size_t i = 1; i < inputs_.size(); ++i) { | |||
| if (inputs_[i]->GetFormat() != input0_format) { | |||
| need_convert_format = true; | |||
| } | |||
| } | |||
| if (!need_convert_format) { | |||
| outputs_[0]->SetFormat(input0_format); | |||
| return RET_OK; | |||
| } | |||
| MS_LOG(ERROR) << "All input format should be the same!"; | |||
| return RET_ERROR; | |||
| } | |||
| int ConcatCPUKernel::ReSize() { return RET_OK; } | |||
| int ConcatCPUKernel::ReSize() { return RET_OK; } | |||
| int ConcatCPUKernel::Run() { | |||
| auto input_num = inputs_.size(); | |||
| std::vector<void *> inputs_addr(input_num, nullptr); | |||
| std::vector<int *> inputs_output_shape(input_num + 1, nullptr); | |||
| int ConcatCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_num = inputs_.size(); | |||
| std::vector<void *> inputs_addr(input_num, nullptr); | |||
| std::vector<int *> inputs_output_shape(input_num + 1, nullptr); | |||
| std::vector <std::vector<int>> shapes; | |||
| for (size_t i = 0; i < input_num; ++i) { | |||
| inputs_addr[i] = inputs_[i]->Data(); | |||
| shapes.push_back(inputs_[i]->shape()); | |||
| inputs_output_shape[i] = shapes[i].data(); | |||
| } | |||
| auto output_shape = outputs_.at(0)->shape(); | |||
| inputs_output_shape[input_num] = output_shape.data(); | |||
| auto output_addr = outputs_.at(0)->Data(); | |||
| std::vector<std::vector<int>> shapes; | |||
| for (size_t i = 0; i < input_num; ++i) { | |||
| inputs_addr[i] = inputs_[i]->Data(); | |||
| shapes.push_back(inputs_[i]->shape()); | |||
| inputs_output_shape[i] = shapes[i].data(); | |||
| } | |||
| auto output_shape = outputs_.at(0)->shape(); | |||
| inputs_output_shape[input_num] = output_shape.data(); | |||
| auto output_addr = outputs_.at(0)->Data(); | |||
| Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(), | |||
| output_shape.size(), output_addr); | |||
| return RET_OK; | |||
| } | |||
| Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(), | |||
| output_shape.size(), output_addr); | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class ConcatCPUKernel : public ConcatBaseCPUKernel { | |||
| public: | |||
| ConcatCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConcatCPUKernel() = default; | |||
| @@ -42,4 +43,3 @@ class ConcatCPUKernel : public ConcatBaseCPUKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONCAT_H_ | |||
| @@ -29,6 +29,7 @@ using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::lite::RET_INFER_INVALID; | |||
| using mindspore::schema::PrimitiveType_Conv2D; | |||
| namespace mindspore::kernel { | |||
| @@ -136,6 +137,10 @@ void ConvolutionCPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| @@ -204,6 +209,11 @@ int ConvolutionImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int ConvolutionCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_tensor = inputs_.at(kInputIndex); | |||
| auto ori_input_data = input_tensor->Data(); | |||
| int in_batch = conv_param_->input_batch_; | |||
| @@ -223,7 +233,7 @@ int ConvolutionCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); | |||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | |||
| @@ -245,20 +255,21 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Ten | |||
| kernel::LiteKernel *kernel; | |||
| if (kernel_h == 1 && kernel_w == 1) { | |||
| // kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| } else if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { | |||
| kernel = new (std::nothrow) kernel::Convolution3x3CPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) kernel::Convolution3x3CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| } else if (use_winograd) { | |||
| kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(opParameter, inputs, outputs, ctx, out_unit); | |||
| kernel = | |||
| new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(opParameter, inputs, outputs, ctx, primitive, out_unit); | |||
| } else { | |||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| } | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| if (ret != RET_OK && ret != RET_INFER_INVALID) { | |||
| delete kernel; | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionCPUKernel() override { | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| @@ -136,6 +136,10 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) { | |||
| } | |||
| int Convolution1x1CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| InitConv1x1MatmulParam(); | |||
| @@ -178,6 +182,11 @@ int Convolution1x1Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int Convolution1x1CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto src_in = reinterpret_cast<float *>(inputs_[0]->Data()); | |||
| auto src_out = reinterpret_cast<float *>(outputs_[0]->Data()); | |||
| @@ -34,8 +34,9 @@ namespace mindspore::kernel { | |||
| class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| matmul_param_ = new MatMulParameter(); | |||
| } | |||
| ~Convolution1x1CPUKernel(); | |||
| @@ -166,6 +166,10 @@ void Convolution3x3CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int Convolution3x3CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| @@ -237,6 +241,11 @@ int Convolution3x3Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int Convolution3x3CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_tensor = inputs_.at(kInputIndex); | |||
| auto ori_input_data = input_tensor->Data(); | |||
| int in_batch = conv_param_->input_batch_; | |||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||
| class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| Convolution3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~Convolution3x3CPUKernel() override { | |||
| if (transformed_filter_addr_ != nullptr) { | |||
| free(transformed_filter_addr_); | |||
| @@ -25,6 +25,7 @@ using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::lite::RET_INFER_INVALID; | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| @@ -86,6 +87,10 @@ int ConvolutionDepthwiseCPUKernel::InitBuffer() { | |||
| } | |||
| int ConvolutionDepthwiseCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| @@ -144,6 +149,11 @@ int ConvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int ConvolutionDepthwiseCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||
| return RET_ERROR; | |||
| @@ -164,7 +174,7 @@ int ConvolutionDepthwiseCPUKernel::Run() { | |||
| packed_output_ = output_addr; | |||
| } | |||
| auto ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_); | |||
| ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -180,11 +190,11 @@ int ConvolutionDepthwiseCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D); | |||
| kernel::LiteKernel *kernel; | |||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx); | |||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| // auto param = reinterpret_cast<ConvParameter *>(opParameter); | |||
| // if (param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 && | |||
| // param->dilation_h_ == 1 && param->dilation_w_ == 1) { | |||
| @@ -192,12 +202,13 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::T | |||
| // } else { | |||
| // kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx); | |||
| // } | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| if (ret != RET_OK && ret != RET_INFER_INVALID) { | |||
| delete kernel; | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionDepthwiseCPUKernel() override { | |||
| delete sliding_; | |||
| free(packed_weight_); | |||
| @@ -55,4 +56,3 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_H_ | |||
| @@ -100,6 +100,10 @@ int ConvolutionDepthwise3x3CPUKernel::InitBuffer() { | |||
| } | |||
| int ConvolutionDepthwise3x3CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| @@ -164,6 +168,11 @@ int ConvDw3x3Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int ConvolutionDepthwise3x3CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||
| return RET_ERROR; | |||
| @@ -184,7 +193,7 @@ int ConvolutionDepthwise3x3CPUKernel::Run() { | |||
| packed_output_ = output_addr; | |||
| } | |||
| auto ret = LiteBackendParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_); | |||
| ret = LiteBackendParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionDepthwise3x3CPUKernel() override { | |||
| free(packed_weight_); | |||
| @@ -135,11 +135,12 @@ int ConvolutionGradFilterCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradFilter); | |||
| auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs); | |||
| auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto ret = kernel->Init(); | |||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionGradFilterCPUKernel : public LiteKernel { | |||
| public: | |||
| explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionGradFilterCPUKernel() override { delete workspace; } | |||
| int Init() override; | |||
| @@ -23,9 +23,9 @@ | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_Conv2DGradInput; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Conv2DGradInput; | |||
| namespace mindspore::kernel { | |||
| int ConvolutionGradInputCPUKernel::Init() { | |||
| @@ -115,11 +115,11 @@ int ConvolutionGradInputCPUKernel::Run() { | |||
| kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradInput); | |||
| auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs); | |||
| auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto ret = kernel->Init(); | |||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionGradInputCPUKernel : public LiteKernel { | |||
| public: | |||
| explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionGradInputCPUKernel() override { delete workspace; } | |||
| int Init() override; | |||
| @@ -247,6 +247,10 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionWinogradCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| @@ -339,6 +343,11 @@ int ConvolutionWinogradImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata | |||
| } | |||
| int ConvolutionWinogradCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_tensor = inputs_.at(kInputIndex); | |||
| auto ori_input_data = input_tensor->Data(); | |||
| int in_batch = conv_param_->input_batch_; | |||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||
| class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | |||
| public: | |||
| ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, int output_unit) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), output_unit_(output_unit) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive, int output_unit) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(output_unit) {} | |||
| ~ConvolutionWinogradCPUKernel() override { | |||
| if (tmp_data_ != nullptr) { | |||
| free(tmp_data_); | |||
| @@ -40,15 +40,7 @@ int CropLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| } // namespace | |||
| int CropCPUKernel::Init() { | |||
| schema::Format input0_format = inputs_[0]->GetFormat(); | |||
| if (input0_format != schema::Format_NCHW && input0_format != schema::Format_NHWC) { | |||
| MS_LOG(ERROR) << "Unsupport format " << input0_format; | |||
| return RET_FORMAT_ERR; | |||
| } | |||
| outputs_[0]->SetFormat(input0_format); | |||
| return RET_OK; | |||
| } | |||
| int CropCPUKernel::Init() { return RET_OK; } | |||
| int CropCPUKernel::CropParallelRun(int thread_id) { | |||
| auto input = inputs_[0]; | |||
| @@ -61,6 +53,11 @@ int CropCPUKernel::CropParallelRun(int thread_id) { | |||
| } | |||
| int CropCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = inputs_[0]; | |||
| auto output = outputs_[0]; | |||
| auto param = reinterpret_cast<CropParameter *>(opParameter); | |||
| @@ -71,7 +68,7 @@ int CropCPUKernel::Run() { | |||
| return RET_OK; | |||
| } | |||
| int ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_); | |||
| auto ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; | |||
| return RET_ERROR; | |||
| @@ -24,8 +24,9 @@ namespace mindspore::kernel { | |||
| class CropCPUKernel : public CropBaseCPUKernel { | |||
| public: | |||
| CropCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : CropBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : CropBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~CropCPUKernel() = default; | |||
| int Init() override; | |||
| int ReSize() override { return 0; } | |||