| @@ -64,11 +64,17 @@ class MS_API Context { | |||||
| /// \brief Destructor of MindSpore Lite Context. | /// \brief Destructor of MindSpore Lite Context. | ||||
| virtual ~Context(); | virtual ~Context(); | ||||
| void InferShapeInterrupt() { | |||||
| infer_shape_interrupt_ = true; | |||||
| } | |||||
| public: | public: | ||||
| DeviceContext device_ctx_{DT_CPU}; | DeviceContext device_ctx_{DT_CPU}; | ||||
| int thread_num_ = 2; /**< thread number config for thread pool */ | int thread_num_ = 2; /**< thread number config for thread pool */ | ||||
| std::shared_ptr<Allocator> allocator = nullptr; | std::shared_ptr<Allocator> allocator = nullptr; | ||||
| CpuBindMode cpu_bind_mode_ = MID_CPU; | CpuBindMode cpu_bind_mode_ = MID_CPU; | ||||
| bool infer_shape_interrupt_ = false; | |||||
| bool running_ = false; | |||||
| }; | }; | ||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| #endif // MINDSPORE_LITE_INCLUDE_CONTEXT_H_ | #endif // MINDSPORE_LITE_INCLUDE_CONTEXT_H_ | ||||
| @@ -48,8 +48,11 @@ constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution operator. | |||||
| /* Tensor error code, range: [-401,-500] */ | /* Tensor error code, range: [-401,-500] */ | ||||
| constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */ | constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */ | ||||
| /* InferShape error code, range: [-501,-600] */ | |||||
| constexpr int RET_INFER_ERR = -501; /**< Failed to infer shape. */ | |||||
| constexpr int RET_INFER_INVALID = -502; /**< Invalid to infer shape before runtime. */ | |||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_LITE_INCLUDE_ERRORCODE_H_ | #endif // MINDSPORE_LITE_INCLUDE_ERRORCODE_H_ | ||||
| @@ -37,11 +37,6 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten | |||||
| kernel::LiteKernelUtil::InitTensorRefCount(kernels); | kernel::LiteKernelUtil::InitTensorRefCount(kernels); | ||||
| for (auto *kernel : kernels) { | for (auto *kernel : kernels) { | ||||
| MS_ASSERT(nullptr != kernel); | MS_ASSERT(nullptr != kernel); | ||||
| auto &outputs = kernel->GetOutputs(); | |||||
| for (auto *output : outputs) { | |||||
| MS_ASSERT(nullptr != output); | |||||
| output->MallocData(); | |||||
| } | |||||
| session::CallBackParam callbackParam; | session::CallBackParam callbackParam; | ||||
| callbackParam.name_callback_param = kernel->Name(); | callbackParam.name_callback_param = kernel->Name(); | ||||
| callbackParam.type_callback_param = kernel->type_str(); | callbackParam.type_callback_param = kernel->type_str(); | ||||
| @@ -45,7 +45,7 @@ LiteKernel *KernelFactory::GetKernel(const std::vector<tensor::Tensor *> &inputs | |||||
| } | } | ||||
| auto creator = KernelRegistry::GetInstance()->GetCreator(key); | auto creator = KernelRegistry::GetInstance()->GetCreator(key); | ||||
| if (creator != nullptr) { | if (creator != nullptr) { | ||||
| auto kernel = creator(inputs, outputs, parameter, ctx, key); | |||||
| auto kernel = creator(inputs, outputs, parameter, ctx, key, primitive); | |||||
| return kernel; | return kernel; | ||||
| } | } | ||||
| return nullptr; | return nullptr; | ||||
| @@ -45,7 +45,6 @@ class KernelRegistry { | |||||
| int device_type_length_; | int device_type_length_; | ||||
| int data_type_length_; | int data_type_length_; | ||||
| int op_type_length_; | int op_type_length_; | ||||
| std::mutex lock_; | |||||
| }; | }; | ||||
| class KernelRegistrar { | class KernelRegistrar { | ||||
| @@ -25,6 +25,7 @@ | |||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "src/ir/tensor.h" | #include "src/ir/tensor.h" | ||||
| #include "src/ops/ops.h" | #include "src/ops/ops.h" | ||||
| #include "include/errorcode.h" | |||||
| #ifdef ENABLE_FP16 | #ifdef ENABLE_FP16 | ||||
| using FLOAT_t = float16_t; | using FLOAT_t = float16_t; | ||||
| @@ -34,6 +35,8 @@ using FLOAT_t = float; | |||||
| // using mindspore::kernel::AddressPtr; | // using mindspore::kernel::AddressPtr; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| using mindspore::lite::RET_ERROR; | |||||
| using mindspore::lite::RET_OK; | |||||
| enum KERNEL_ARCH { kCPU, kGPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU }; | enum KERNEL_ARCH { kCPU, kGPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU }; | ||||
| struct KernelKey { | struct KernelKey { | ||||
| KERNEL_ARCH arch; | KERNEL_ARCH arch; | ||||
| @@ -55,15 +58,30 @@ class LiteKernel { | |||||
| public: | public: | ||||
| LiteKernel() = default; | LiteKernel() = default; | ||||
| explicit LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false), primitive_(primitive), | |||||
| context_(ctx) { | |||||
| this->in_kernel_.clear(); | this->in_kernel_.clear(); | ||||
| this->out_kernel_.clear(); | this->out_kernel_.clear(); | ||||
| } | } | ||||
| virtual ~LiteKernel() { delete opParameter; } | virtual ~LiteKernel() { delete opParameter; } | ||||
| virtual int Prepare() { return -1; } | |||||
| virtual int Prepare() { | |||||
| if (primitive_ != nullptr && !primitive_->GetInferFlag()) { | |||||
| (const_cast<lite::Primitive *>(primitive_))->InferShape(inputs_, outputs_); | |||||
| } | |||||
| if (need_reinit) { | |||||
| Init(); | |||||
| } | |||||
| auto &outputs = this->GetOutputs(); | |||||
| for (auto *output : outputs) { | |||||
| MS_ASSERT(output != nullptr); | |||||
| output->MallocData(); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| virtual int Init() { return -1; } | virtual int Init() { return -1; } | ||||
| virtual int ReSize() { return -1; } | virtual int ReSize() { return -1; } | ||||
| virtual int Run() { return -1; } | virtual int Run() { return -1; } | ||||
| @@ -103,16 +121,23 @@ class LiteKernel { | |||||
| void set_desc(const KernelKey kernel_key) { desc = kernel_key; } | void set_desc(const KernelKey kernel_key) { desc = kernel_key; } | ||||
| void SetNeedReInit() { | |||||
| need_reinit = true; | |||||
| } | |||||
| protected: | protected: | ||||
| KernelKey desc; | KernelKey desc; | ||||
| std::string name; | std::string name; | ||||
| OpParameter *opParameter = nullptr; | OpParameter *opParameter = nullptr; | ||||
| const lite::Primitive *primitive_; | |||||
| const lite::Context *context_; | |||||
| // tensor will free in ~lite_session() | // tensor will free in ~lite_session() | ||||
| std::vector<lite::tensor::Tensor *> inputs_; | std::vector<lite::tensor::Tensor *> inputs_; | ||||
| std::vector<lite::tensor::Tensor *> outputs_; | std::vector<lite::tensor::Tensor *> outputs_; | ||||
| std::vector<LiteKernel *> in_kernel_; | std::vector<LiteKernel *> in_kernel_; | ||||
| std::vector<LiteKernel *> out_kernel_; | std::vector<LiteKernel *> out_kernel_; | ||||
| bool train_mode; | bool train_mode; | ||||
| bool need_reinit = false; | |||||
| }; | }; | ||||
| class SubGraphKernel : public LiteKernel { | class SubGraphKernel : public LiteKernel { | ||||
| @@ -121,8 +146,9 @@ class SubGraphKernel : public LiteKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| const std::vector<kernel::LiteKernel *> &inKernels, | const std::vector<kernel::LiteKernel *> &inKernels, | ||||
| const std::vector<kernel::LiteKernel *> &outKernels, | const std::vector<kernel::LiteKernel *> &outKernels, | ||||
| const std::vector<kernel::LiteKernel *> &nodes) | |||||
| : LiteKernel(nullptr, inputs, outputs), | |||||
| const std::vector<kernel::LiteKernel *> &nodes, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(nullptr, inputs, outputs, ctx, primitive), | |||||
| inputs_(inputs), | inputs_(inputs), | ||||
| outputs_(outputs), | outputs_(outputs), | ||||
| inkernels_(inKernels), | inkernels_(inKernels), | ||||
| @@ -144,7 +170,7 @@ class SubGraphKernel : public LiteKernel { | |||||
| typedef LiteKernel *(*KernelCreator)(const std::vector<lite::tensor::Tensor *> &inputs, | typedef LiteKernel *(*KernelCreator)(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | ||||
| const lite::Context *ctx, const KernelKey &desc); | |||||
| const lite::Context *ctx, const KernelKey &desc, const lite::Primitive *primitive); | |||||
| class LiteKernelUtil { | class LiteKernelUtil { | ||||
| public: | public: | ||||
| @@ -168,6 +168,7 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const { | |||||
| int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) { | int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) { | ||||
| MS_EXCEPTION_IF_NULL(this->context_); | MS_EXCEPTION_IF_NULL(this->context_); | ||||
| SetMaxWokerNum(context_->thread_num_); | SetMaxWokerNum(context_->thread_num_); | ||||
| context_->running_ = true; | |||||
| Executor executor; | Executor executor; | ||||
| if (before == nullptr && after == nullptr) { | if (before == nullptr && after == nullptr) { | ||||
| return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get()); | return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get()); | ||||
| @@ -40,7 +40,7 @@ int Cast::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor:: | |||||
| MS_LOG(ERROR) << "Unsupport input data type " << input->data_type(); | MS_LOG(ERROR) << "Unsupport input data type " << input->data_type(); | ||||
| return RET_INPUT_TENSOR_ERROR; | return RET_INPUT_TENSOR_ERROR; | ||||
| } | } | ||||
| if (cast_prim->dstT() != kNumberTypeFloat || cast_prim->dstT() != kNumberTypeFloat32) { | |||||
| if (cast_prim->dstT() != kNumberTypeFloat && cast_prim->dstT() != kNumberTypeFloat32) { | |||||
| MS_LOG(ERROR) << "Invalid output datatype " << cast_prim->dstT(); | MS_LOG(ERROR) << "Invalid output datatype " << cast_prim->dstT(); | ||||
| return RET_INPUT_TENSOR_ERROR; | return RET_INPUT_TENSOR_ERROR; | ||||
| } | } | ||||
| @@ -45,12 +45,15 @@ class Primitive { | |||||
| static Primitive *CreatePrimitive(schema::Primitive *primitive); | static Primitive *CreatePrimitive(schema::Primitive *primitive); | ||||
| virtual ~Primitive() {} | virtual ~Primitive() {} | ||||
| const schema::Primitive *Value() const { return this->primitive; } | const schema::Primitive *Value() const { return this->primitive; } | ||||
| const bool GetInferFlag() const { return this->infer_flag_; } | |||||
| void SetInferFlag(bool flag) { this->infer_flag_ = flag; } | |||||
| schema::PrimitiveType Type() const { return this->primitive->value_type(); } | schema::PrimitiveType Type() const { return this->primitive->value_type(); } | ||||
| const void *Attribute() const { return this->primitive->value(); } | const void *Attribute() const { return this->primitive->value(); } | ||||
| virtual int InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_); | virtual int InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_); | ||||
| protected: | protected: | ||||
| schema::Primitive *primitive; | schema::Primitive *primitive; | ||||
| bool infer_flag_ = true; | |||||
| }; | }; | ||||
| class Conv2D : public Primitive { | class Conv2D : public Primitive { | ||||
| @@ -34,11 +34,11 @@ int Reshape::CalNewShape(const tensor::Tensor *in_tensor, std::vector<int> *out_ | |||||
| inferIndex = i; | inferIndex = i; | ||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "output shape should has no more than one dim which need infer"; | MS_LOG(ERROR) << "output shape should has no more than one dim which need infer"; | ||||
| return RET_ERROR; | |||||
| return RET_INFER_ERR; | |||||
| } | } | ||||
| } else if (out_shape->at(i) < 0) { | } else if (out_shape->at(i) < 0) { | ||||
| MS_LOG(ERROR) << "output shape dim should be non-negative"; | MS_LOG(ERROR) << "output shape dim should be non-negative"; | ||||
| return RET_ERROR; | |||||
| return RET_INFER_ERR; | |||||
| } else if (out_shape->at(i) == 0) { | } else if (out_shape->at(i) == 0) { | ||||
| out_shape->at(i) = in_tensor->shape().at(i); | out_shape->at(i) = in_tensor->shape().at(i); | ||||
| out_shapeSize *= out_shape->at(i); | out_shapeSize *= out_shape->at(i); | ||||
| @@ -49,7 +49,7 @@ int Reshape::CalNewShape(const tensor::Tensor *in_tensor, std::vector<int> *out_ | |||||
| if (inferIndex == -1 && out_shapeSize != in_shape_size) { | if (inferIndex == -1 && out_shapeSize != in_shape_size) { | ||||
| MS_LOG(ERROR) << "output shapeSize: " << out_shapeSize << " should be equal to input shapeSize: " << in_shape_size; | MS_LOG(ERROR) << "output shapeSize: " << out_shapeSize << " should be equal to input shapeSize: " << in_shape_size; | ||||
| return RET_ERROR; | |||||
| return RET_INFER_ERR; | |||||
| } | } | ||||
| if (inferIndex != -1) { | if (inferIndex != -1) { | ||||
| out_shape->at(inferIndex) = in_shape_size / out_shapeSize; | out_shape->at(inferIndex) = in_shape_size / out_shapeSize; | ||||
| @@ -88,7 +88,11 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso | |||||
| std::vector<int> out_shape; | std::vector<int> out_shape; | ||||
| if (inputs_.size() == kDoubleNum) { | if (inputs_.size() == kDoubleNum) { | ||||
| auto shape_tensor = inputs_.at(1); | auto shape_tensor = inputs_.at(1); | ||||
| size_t shape_size = shape_tensor->ElementsNum(); | |||||
| if (shape_tensor->Data() == nullptr) { | |||||
| MS_LOG(INFO) << "Do infer shape in runtime."; | |||||
| return RET_INFER_INVALID; | |||||
| } | |||||
| size_t shape_size = shape_tensor->shape().size(); | |||||
| switch (shape_tensor->data_type()) { | switch (shape_tensor->data_type()) { | ||||
| case kNumberTypeInt8: { | case kNumberTypeInt8: { | ||||
| auto data = reinterpret_cast<int8_t *>(shape_tensor->Data()); | auto data = reinterpret_cast<int8_t *>(shape_tensor->Data()); | ||||
| @@ -108,13 +112,14 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso | |||||
| } break; | } break; | ||||
| default: { | default: { | ||||
| MS_LOG(ERROR) << "Reshape weight tensor has unsupported dataType: " << shape_tensor->data_type(); | MS_LOG(ERROR) << "Reshape weight tensor has unsupported dataType: " << shape_tensor->data_type(); | ||||
| return RET_ERROR; | |||||
| return RET_INFER_ERR; | |||||
| } | } | ||||
| } | } | ||||
| } else if (inputs_.size() == kSingleNum) { | } else if (inputs_.size() == kSingleNum) { | ||||
| std::copy(reshape_prim->shape()->begin(), reshape_prim->shape()->end(), std::back_inserter(out_shape)); | std::copy(reshape_prim->shape()->begin(), reshape_prim->shape()->end(), std::back_inserter(out_shape)); | ||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "inputs tensor size invalid."; | MS_LOG(ERROR) << "inputs tensor size invalid."; | ||||
| return RET_INFER_ERR; | |||||
| } | } | ||||
| auto ret = CalNewShape(inputs_.front(), &out_shape); | auto ret = CalNewShape(inputs_.front(), &out_shape); | ||||
| @@ -24,14 +24,18 @@ | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_PARAM_INVALID; | |||||
| using mindspore::lite::RET_FORMAT_ERR; | using mindspore::lite::RET_FORMAT_ERR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::lite::RET_PARAM_INVALID; | |||||
| using mindspore::schema::PrimitiveType_ArgMax; | using mindspore::schema::PrimitiveType_ArgMax; | ||||
| using mindspore::schema::PrimitiveType_ArgMin; | using mindspore::schema::PrimitiveType_ArgMin; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ArgMinMaxBaseCPUKernel::Init() { | int ArgMinMaxBaseCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter); | auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter); | ||||
| switch (opParameter->type_) { | switch (opParameter->type_) { | ||||
| case PrimitiveType_ArgMax: | case PrimitiveType_ArgMax: | ||||
| @@ -44,6 +48,7 @@ int ArgMinMaxBaseCPUKernel::Init() { | |||||
| MS_LOG(ERROR) << "Unexpected type " << opParameter->type_; | MS_LOG(ERROR) << "Unexpected type " << opParameter->type_; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto in_shape = inputs_.at(0)->shape(); | auto in_shape = inputs_.at(0)->shape(); | ||||
| auto dims_size = in_shape.size(); | auto dims_size = in_shape.size(); | ||||
| int axis = param->axis_ < 0 ? param->axis_ + dims_size : param->axis_; | int axis = param->axis_ < 0 ? param->axis_ + dims_size : param->axis_; | ||||
| @@ -56,9 +61,9 @@ int ArgMinMaxBaseCPUKernel::Init() { | |||||
| param->topk_ = MSMIN(param->topk_, in_shape[axis]); | param->topk_ = MSMIN(param->topk_, in_shape[axis]); | ||||
| if (param->topk_ > 1) { | if (param->topk_ > 1) { | ||||
| if (context_ != nullptr && context_->allocator != nullptr) { | if (context_ != nullptr && context_->allocator != nullptr) { | ||||
| param->arg_elements_ | |||||
| = reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis])); | |||||
| data_from_allocator_ = true; | |||||
| param->arg_elements_ = | |||||
| reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis])); | |||||
| data_from_allocator_ = true; | |||||
| } else { | } else { | ||||
| param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis])); | param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis])); | ||||
| } | } | ||||
| @@ -98,12 +103,12 @@ void ArgMinMaxBaseCPUKernel::FreeTmpMemory() { | |||||
| kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *op_parameter, const lite::Context *ctx, | OpParameter *op_parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (op_parameter == nullptr) { | if (op_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto kernel = new (std::nothrow) ArgMinMaxInt8CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| auto kernel = new (std::nothrow) ArgMinMaxInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ArgMinMaxInt8CPUKernel fail!"; | MS_LOG(ERROR) << "new ArgMinMaxInt8CPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -122,12 +127,12 @@ kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor | |||||
| kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *op_parameter, const lite::Context *ctx, | OpParameter *op_parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (op_parameter == nullptr) { | if (op_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ArgMinMaxCPUKernel fail!"; | MS_LOG(ERROR) << "new ArgMinMaxCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -24,8 +24,9 @@ namespace mindspore::kernel { | |||||
| class ArgMinMaxBaseCPUKernel : public LiteKernel { | class ArgMinMaxBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), context_(ctx), data_from_allocator_(false) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), data_from_allocator_(false) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| } | } | ||||
| @@ -40,7 +41,6 @@ class ArgMinMaxBaseCPUKernel : public LiteKernel { | |||||
| void FreeTmpMemory(); | void FreeTmpMemory(); | ||||
| private: | private: | ||||
| const lite::Context *context_; | |||||
| bool data_from_allocator_; | bool data_from_allocator_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -46,13 +46,13 @@ int BatchToSpaceBaseCPUKernel::Init() { | |||||
| kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *op_parameter, const lite::Context *ctx, | OpParameter *op_parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace); | MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace); | ||||
| if (op_parameter == nullptr) { | if (op_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) BatchToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) BatchToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!"; | MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -71,13 +71,13 @@ kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::ten | |||||
| kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *op_parameter, const lite::Context *ctx, | OpParameter *op_parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace); | MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace); | ||||
| if (op_parameter == nullptr) { | if (op_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!"; | MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||||
| class BatchToSpaceBaseCPUKernel : public LiteKernel { | class BatchToSpaceBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| BatchToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | BatchToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| } | } | ||||
| @@ -30,21 +30,24 @@ using mindspore::schema::PrimitiveType_Concat; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ConcatBaseCPUKernel::Init() { | int ConcatBaseCPUKernel::Init() { | ||||
| auto axis = concat_param_->axis_; | |||||
| axis_ = axis >= 0 ? axis : inputs_.front()->shape().size() + axis; | |||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ : inputs_.front()->shape().size() + concat_param_->axis_; | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | ||||
| auto *kernel = new(std::nothrow) ConcatInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) ConcatInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -60,15 +63,15 @@ kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::T | |||||
| } | } | ||||
| kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | ||||
| auto *kernel = new(std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -84,15 +87,15 @@ kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor:: | |||||
| } | } | ||||
| kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | ||||
| auto *kernel = new(std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -111,4 +114,3 @@ REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, CpuConcatInt8KernelCreat | |||||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Concat, CpuConcatInt32KernelCreator) | REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Concat, CpuConcatInt32KernelCreator) | ||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Concat, CpuConcatFp32KernelCreator) | REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Concat, CpuConcatFp32KernelCreator) | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class ConcatBaseCPUKernel : public LiteKernel { | class ConcatBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| ConcatBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConcatBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| concat_param_ = reinterpret_cast<ConcatParameter *>(opParameter); | concat_param_ = reinterpret_cast<ConcatParameter *>(opParameter); | ||||
| } | } | ||||
| @@ -41,6 +42,7 @@ class ConcatBaseCPUKernel : public LiteKernel { | |||||
| int ReSize() override { return 0; } | int ReSize() override { return 0; } | ||||
| int Run() override { return 0; } | int Run() override { return 0; } | ||||
| protected: | protected: | ||||
| int thread_count_; | int thread_count_; | ||||
| int axis_; | int axis_; | ||||
| @@ -50,4 +52,3 @@ class ConcatBaseCPUKernel : public LiteKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CONCAT_BASE_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CONCAT_BASE_H_ | ||||
| @@ -37,8 +37,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionBaseCPUKernel : public LiteKernel { | class ConvolutionBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| conv_param_ = reinterpret_cast<ConvParameter *>(opParameter); | conv_param_ = reinterpret_cast<ConvParameter *>(opParameter); | ||||
| } | } | ||||
| @@ -31,15 +31,15 @@ namespace mindspore::kernel { | |||||
| int CropBaseCPUKernel::Init() { return RET_OK; } | int CropBaseCPUKernel::Init() { return RET_OK; } | ||||
| kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | ||||
| auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new CropCPUKernel fail!"; | MS_LOG(ERROR) << "new CropCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -55,15 +55,15 @@ kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Ten | |||||
| } | } | ||||
| kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | ||||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new CropCPUKernel fail!"; | MS_LOG(ERROR) << "new CropCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -79,15 +79,15 @@ kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Te | |||||
| } | } | ||||
| kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | MS_ASSERT(desc.type == schema::PrimitiveType_Crop); | ||||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new CropCPUKernel fail!"; | MS_LOG(ERROR) << "new CropCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||||
| class CropBaseCPUKernel : public LiteKernel { | class CropBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| } | } | ||||
| ~CropBaseCPUKernel() = default; | ~CropBaseCPUKernel() = default; | ||||
| @@ -39,7 +40,6 @@ class CropBaseCPUKernel : public LiteKernel { | |||||
| protected: | protected: | ||||
| int thread_count_; | int thread_count_; | ||||
| const Context *ctx_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -25,13 +25,17 @@ | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_PARAM_INVALID; | |||||
| using mindspore::lite::RET_FORMAT_ERR; | using mindspore::lite::RET_FORMAT_ERR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::lite::RET_PARAM_INVALID; | |||||
| using mindspore::schema::PrimitiveType_DepthToSpace; | using mindspore::schema::PrimitiveType_DepthToSpace; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int DepthToSpaceBaseCPUKernel::Init() { | int DepthToSpaceBaseCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| if (inputs_[0]->GetFormat() != schema::Format_NHWC) { | if (inputs_[0]->GetFormat() != schema::Format_NHWC) { | ||||
| MS_LOG(ERROR) << "depth_to_space only support NHWC now!"; | MS_LOG(ERROR) << "depth_to_space only support NHWC now!"; | ||||
| return RET_FORMAT_ERR; | return RET_FORMAT_ERR; | ||||
| @@ -62,13 +66,13 @@ int DepthToSpaceBaseCPUKernel::Init() { | |||||
| kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *op_parameter, const lite::Context *ctx, | OpParameter *op_parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace); | MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace); | ||||
| if (op_parameter == nullptr) { | if (op_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) DepthToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) DepthToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!"; | MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -87,13 +91,13 @@ kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::ten | |||||
| kernel::LiteKernel *CpuDepthToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuDepthToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *op_parameter, const lite::Context *ctx, | OpParameter *op_parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace); | MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace); | ||||
| if (op_parameter == nullptr) { | if (op_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) DepthToSpaceCPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) DepthToSpaceCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new DepthToSpaceCPUKernel fail!"; | MS_LOG(ERROR) << "new DepthToSpaceCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||||
| class DepthToSpaceBaseCPUKernel : public LiteKernel { | class DepthToSpaceBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| } | } | ||||
| @@ -35,10 +35,11 @@ int FullconnectionBaseCPUKernel::Init() { | |||||
| kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | ||||
| auto kernel = new (std::nothrow) FullconnectionInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto kernel = new (std::nothrow) FullconnectionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (!kernel) { | if (!kernel) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -56,10 +57,11 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::t | |||||
| kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | ||||
| auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (!kernel) { | if (!kernel) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class FullconnectionBaseCPUKernel : public LiteKernel { | class FullconnectionBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| FullconnectionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | FullconnectionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| fc_param_ = reinterpret_cast<MatMulParameter *>(opParameter); | fc_param_ = reinterpret_cast<MatMulParameter *>(opParameter); | ||||
| } | } | ||||
| ~FullconnectionBaseCPUKernel() = default; | ~FullconnectionBaseCPUKernel() = default; | ||||
| @@ -28,7 +28,8 @@ using mindspore::schema::PrimitiveType_MatMul; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter, | ||||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | ||||
| auto input_tensor = inputs.at(kInputIndex); | auto input_tensor = inputs.at(kInputIndex); | ||||
| @@ -37,7 +38,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso | |||||
| switch (data_type) { | switch (data_type) { | ||||
| case kNumberTypeInt8: | case kNumberTypeInt8: | ||||
| case kNumberTypeUInt8: { | case kNumberTypeUInt8: { | ||||
| kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (!kernel) { | if (!kernel) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -46,7 +47,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso | |||||
| } | } | ||||
| case kNumberTypeFloat32: { | case kNumberTypeFloat32: { | ||||
| kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (!kernel) { | if (!kernel) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class MatmulBaseCPUKernel : public LiteKernel { | class MatmulBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| params_ = reinterpret_cast<MatMulParameter *>(opParameter); | params_ = reinterpret_cast<MatMulParameter *>(opParameter); | ||||
| } | } | ||||
| ~MatmulBaseCPUKernel() = default; | ~MatmulBaseCPUKernel() = default; | ||||
| @@ -31,10 +31,10 @@ namespace mindspore::kernel { | |||||
| kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Pad); | MS_ASSERT(desc.type == schema::PrimitiveType_Pad); | ||||
| auto *kernel = new (std::nothrow) PadInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) PadInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new PadCPUKernel failed."; | MS_LOG(ERROR) << "new PadCPUKernel failed."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -52,10 +52,10 @@ kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tens | |||||
| kernel::LiteKernel *CpuPadFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuPadFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Pad); | MS_ASSERT(desc.type == schema::PrimitiveType_Pad); | ||||
| auto *kernel = new (std::nothrow) PadCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) PadCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new PadCPUKernel failed."; | MS_LOG(ERROR) << "new PadCPUKernel failed."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -56,6 +56,10 @@ void PoolingBaseCPUKernel::FreeQuantParam() { | |||||
| } | } | ||||
| int PoolingBaseCPUKernel::Init() { | int PoolingBaseCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| MS_ASSERT(inputs_.size() == 1); | MS_ASSERT(inputs_.size() == 1); | ||||
| MS_ASSERT(outputs_.size() == 1); | MS_ASSERT(outputs_.size() == 1); | ||||
| pooling_param_->thread_num_ = thread_count_; | pooling_param_->thread_num_ = thread_count_; | ||||
| @@ -78,13 +82,13 @@ int PoolingBaseCPUKernel::Init() { | |||||
| kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Pooling); | MS_ASSERT(desc.type == schema::PrimitiveType_Pooling); | ||||
| auto *kernel = new (std::nothrow) PoolingInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) PoolingInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new PoolingInt8CPUKernel fail!"; | MS_LOG(ERROR) << "new PoolingInt8CPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -102,13 +106,13 @@ kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor:: | |||||
| kernel::LiteKernel *CpuPoolingFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuPoolingFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Pooling); | MS_ASSERT(desc.type == schema::PrimitiveType_Pooling); | ||||
| auto *kernel = new (std::nothrow) PoolingCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) PoolingCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new PoolingCPUKernel fail!"; | MS_LOG(ERROR) << "new PoolingCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -29,8 +29,9 @@ namespace mindspore::kernel { | |||||
| class PoolingBaseCPUKernel : public LiteKernel { | class PoolingBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| pooling_param_ = reinterpret_cast<PoolingParameter *>(opParameter); | pooling_param_ = reinterpret_cast<PoolingParameter *>(opParameter); | ||||
| } | } | ||||
| ~PoolingBaseCPUKernel() = default; | ~PoolingBaseCPUKernel() = default; | ||||
| @@ -32,13 +32,13 @@ int PreluBaseCPUKernel::Init() {return RET_OK;} | |||||
| kernel::LiteKernel *CpuPreluInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuPreluInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Prelu); | MS_ASSERT(desc.type == schema::PrimitiveType_Prelu); | ||||
| auto *kernel = new(std::nothrow) PreluInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new(std::nothrow) PreluInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new PreluCPUKernel fail!"; | MS_LOG(ERROR) << "new PreluCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class PreluBaseCPUKernel : public LiteKernel { | class PreluBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| PreluBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | PreluBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| prelu_param_ = reinterpret_cast<PreluParameter *>(opParameter); | prelu_param_ = reinterpret_cast<PreluParameter *>(opParameter); | ||||
| } | } | ||||
| @@ -39,6 +39,11 @@ int PriorBoxCPUKernel::Init() { | |||||
| MS_LOG(ERROR) << "PriorBoxParameter nullptr"; | MS_LOG(ERROR) << "PriorBoxParameter nullptr"; | ||||
| return RET_NULL_PTR; | return RET_NULL_PTR; | ||||
| } | } | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| MS_ASSERT(inputs_.size() == kInputNum); | MS_ASSERT(inputs_.size() == kInputNum); | ||||
| MS_ASSERT(outputs_.size() == kOutputNum); | MS_ASSERT(outputs_.size() == kOutputNum); | ||||
| @@ -164,7 +169,7 @@ int PriorBoxCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -173,7 +178,7 @@ kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Ten | |||||
| MS_LOG(ERROR) << "PriorBox invalid desc type " << desc.type; | MS_LOG(ERROR) << "PriorBox invalid desc type " << desc.type; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) PriorBoxCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) PriorBoxCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new PriorBoxCPUKernel fail!"; | MS_LOG(ERROR) << "new PriorBoxCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class PriorBoxCPUKernel : public LiteKernel { | class PriorBoxCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(opParameter); | prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(opParameter); | ||||
| } | } | ||||
| ~PriorBoxCPUKernel() = default; | ~PriorBoxCPUKernel() = default; | ||||
| @@ -34,6 +34,10 @@ constexpr int kQuantDTypeCastOutputNum = 1; | |||||
| } // namespace | } // namespace | ||||
| int QuantDTypeCastCPUKernel::Init() { | int QuantDTypeCastCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| if (inputs_.size() != 1) { | if (inputs_.size() != 1) { | ||||
| MS_LOG(ERROR) << "inputs number should be 1, but " << inputs_.size() << " is given."; | MS_LOG(ERROR) << "inputs number should be 1, but " << inputs_.size() << " is given."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -83,8 +87,8 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) { | |||||
| ret = DequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, | ret = DequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, | ||||
| num_unit_thread); | num_unit_thread); | ||||
| } else { | } else { | ||||
| ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale, | |||||
| quant_arg.zeroPoint, num_unit_thread); | |||||
| ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, | |||||
| num_unit_thread); | |||||
| } | } | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]"; | MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]"; | ||||
| @@ -124,12 +128,13 @@ int QuantDTypeCastCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) QuantDTypeCastCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) QuantDTypeCastCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new QuantDTypeCastCPUKernel fail!"; | MS_LOG(ERROR) << "new QuantDTypeCastCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -24,8 +24,9 @@ namespace mindspore::kernel { | |||||
| class QuantDTypeCastCPUKernel : public LiteKernel { | class QuantDTypeCastCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {} | |||||
| ~QuantDTypeCastCPUKernel() = default; | ~QuantDTypeCastCPUKernel() = default; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -36,13 +36,13 @@ int ReshapeBaseCPUKernel::Init() { | |||||
| kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | ||||
| auto *kernel = new (std::nothrow) ReshapeInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) ReshapeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ReshapeInt8CPUKernel fail!"; | MS_LOG(ERROR) << "new ReshapeInt8CPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -60,13 +60,13 @@ kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor:: | |||||
| kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | ||||
| auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ReshapeCPUKernel fail!"; | MS_LOG(ERROR) << "new ReshapeCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -84,13 +84,13 @@ kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor: | |||||
| kernel::LiteKernel *CpuReshapeFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuReshapeFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); | ||||
| auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ReshapeCPUKernel fail!"; | MS_LOG(ERROR) << "new ReshapeCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||||
| class ReshapeBaseCPUKernel : public LiteKernel { | class ReshapeBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| reshape_param_ = reinterpret_cast<ReshapeParameter *>(opParameter); | reshape_param_ = reinterpret_cast<ReshapeParameter *>(opParameter); | ||||
| } | } | ||||
| ~ReshapeBaseCPUKernel() = default; | ~ReshapeBaseCPUKernel() = default; | ||||
| @@ -45,4 +46,3 @@ class ReshapeBaseCPUKernel : public LiteKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_ | ||||
| @@ -53,13 +53,13 @@ int SoftmaxBaseCPUKernel::Init() { | |||||
| kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax); | MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax); | ||||
| auto *kernel = new (std::nothrow) SoftmaxInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| SoftmaxInt8CPUKernel *kernel = new (std::nothrow) SoftmaxInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!"; | MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -77,13 +77,13 @@ kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor:: | |||||
| kernel::LiteKernel *CpuSoftmaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSoftmaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax); | MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax); | ||||
| auto *kernel = new (std::nothrow) SoftmaxCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| SoftmaxCPUKernel *kernel = new (std::nothrow) SoftmaxCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!"; | MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||||
| class SoftmaxBaseCPUKernel : public LiteKernel { | class SoftmaxBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| softmax_param_ = reinterpret_cast<SoftmaxParameter *>(opParameter); | softmax_param_ = reinterpret_cast<SoftmaxParameter *>(opParameter); | ||||
| } | } | ||||
| @@ -61,13 +61,13 @@ int SplitBaseCPUKernel::Init() { | |||||
| kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Split); | MS_ASSERT(desc.type == schema::PrimitiveType_Split); | ||||
| auto *kernel = new (std::nothrow) SplitInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) SplitInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -85,13 +85,13 @@ kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Te | |||||
| kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Split); | MS_ASSERT(desc.type == schema::PrimitiveType_Split); | ||||
| auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -109,13 +109,13 @@ kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::T | |||||
| kernel::LiteKernel *CpuSplitFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSplitFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Split); | MS_ASSERT(desc.type == schema::PrimitiveType_Split); | ||||
| auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | MS_LOG(ERROR) << "new SplitCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||||
| class SplitBaseCPUKernel : public LiteKernel { | class SplitBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| param = reinterpret_cast<SplitParameter *>(opParameter); | param = reinterpret_cast<SplitParameter *>(opParameter); | ||||
| } | } | ||||
| ~SplitBaseCPUKernel() = default; | ~SplitBaseCPUKernel() = default; | ||||
| @@ -32,13 +32,13 @@ int SqueezeBaseCPUKernel::Init() { return RET_OK; } | |||||
| kernel::LiteKernel *CpuSqueezeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSqueezeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Squeeze); | MS_ASSERT(desc.type == schema::PrimitiveType_Squeeze); | ||||
| auto *kernel = new (std::nothrow) SqueezeInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) SqueezeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new SqueezeCPUKernel fail!"; | MS_LOG(ERROR) << "new SqueezeCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class SqueezeBaseCPUKernel : public LiteKernel { | class SqueezeBaseCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | opParameter->thread_num_ = ctx->thread_num_; | ||||
| } | } | ||||
| @@ -42,12 +42,18 @@ int StridedSliceCPUKernel::Init() { | |||||
| int StridedSliceCPUKernel::ReSize() { return 0; } | int StridedSliceCPUKernel::ReSize() { return 0; } | ||||
| int StridedSliceCPUKernel::Run() { | int StridedSliceCPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto input = inputs_.at(0); | auto input = inputs_.at(0); | ||||
| auto output = outputs_.at(0); | auto output = outputs_.at(0); | ||||
| MS_ASSERT(input); | MS_ASSERT(input); | ||||
| MS_ASSERT(output); | MS_ASSERT(output); | ||||
| auto ret = DoStridedSlice(input->Data(), output->Data(), reinterpret_cast<StridedSliceParameter *>(opParameter)); | |||||
| ret = DoStridedSlice(input->Data(), output->Data(), reinterpret_cast<StridedSliceParameter *>(opParameter)); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "StridedSlice error error_code[" << ret << "]"; | MS_LOG(ERROR) << "StridedSlice error error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -58,13 +64,13 @@ int StridedSliceCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_StridedSlice); | MS_ASSERT(desc.type == schema::PrimitiveType_StridedSlice); | ||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "opParameter null pointer dereferencing."; | MS_LOG(ERROR) << "opParameter null pointer dereferencing."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) StridedSliceCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) StridedSliceCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "New kernel fails."; | MS_LOG(ERROR) << "New kernel fails."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||||
| class StridedSliceCPUKernel : public LiteKernel { | class StridedSliceCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {} | |||||
| ~StridedSliceCPUKernel() override = default; | ~StridedSliceCPUKernel() override = default; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -183,10 +183,14 @@ void Convolution3x3FP16CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int Convolution3x3FP16CPUKernel::Init() { | int Convolution3x3FP16CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| return RET_ERROR; | |||||
| return ret; | |||||
| } | } | ||||
| ret = InitWeightBias(); | ret = InitWeightBias(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -228,7 +232,7 @@ int Convolution3x3FP16CPUKernel::ReSize() { | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| return RET_ERROR; | |||||
| return ret; | |||||
| } | } | ||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -256,7 +260,11 @@ int Convolution3x3Fp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) | |||||
| } | } | ||||
| int Convolution3x3FP16CPUKernel::Run() { | int Convolution3x3FP16CPUKernel::Run() { | ||||
| // cast fp32 input data to fp16 | |||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto input_tensor = inputs_.at(kInputIndex); | auto input_tensor = inputs_.at(kInputIndex); | ||||
| auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data()); | auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data()); | ||||
| for (int i = 0; i < input_tensor->ElementsNum(); ++i) { | for (int i = 0; i < input_tensor->ElementsNum(); ++i) { | ||||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||||
| class Convolution3x3FP16CPUKernel : public ConvolutionBaseCPUKernel { | class Convolution3x3FP16CPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| Convolution3x3FP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | Convolution3x3FP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~Convolution3x3FP16CPUKernel() override { | ~Convolution3x3FP16CPUKernel() override { | ||||
| if (fp16_input_ != nullptr) { | if (fp16_input_ != nullptr) { | ||||
| free(fp16_input_); | free(fp16_input_); | ||||
| @@ -78,4 +79,3 @@ void ProcessFilterFp16(float16_t *origin_weight, float16_t *dst_weight, ConvPara | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_3x3_FP16_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_3x3_FP16_H_ | ||||
| @@ -85,14 +85,20 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseFp16CPUKernel::Init() { | int ConvolutionDepthwiseFp16CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| // conv base init | // conv base init | ||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| // init sliding_ window param | // init sliding_ window param | ||||
| sliding_ = new SlidingWindowParam; | sliding_ = new SlidingWindowParam; | ||||
| InitSlidingParam(sliding_, conv_param_, C8NUM); | InitSlidingParam(sliding_, conv_param_, C8NUM); | ||||
| auto ret = InitWeightBias(); | |||||
| ret = InitWeightBias(); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed."; | MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -138,6 +144,11 @@ int ConvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseFp16CPUKernel::Run() { | int ConvolutionDepthwiseFp16CPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | if (conv_param_->input_channel_ != conv_param_->output_channel_) { | ||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | MS_LOG(ERROR) << "Only support input channel equals output channel."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -149,7 +160,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_, | PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_, | ||||
| conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | ||||
| auto ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_); | |||||
| ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -165,10 +176,10 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D); | MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D); | ||||
| auto kernel = new (std::nothrow) ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto kernel = new (std::nothrow) ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConvolutionDepthwiseFp16CPUKernel() override { | ~ConvolutionDepthwiseFp16CPUKernel() override { | ||||
| delete sliding_; | delete sliding_; | ||||
| free(packed_weight_); | free(packed_weight_); | ||||
| @@ -154,10 +154,14 @@ void ConvolutionFP16CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionFP16CPUKernel::Init() { | int ConvolutionFP16CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return RET_ERROR; | |||||
| MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | |||||
| return ret; | |||||
| } | } | ||||
| ret = InitWeightBias(); | ret = InitWeightBias(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -193,7 +197,7 @@ int ConvolutionFP16CPUKernel::ReSize() { | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| return RET_ERROR; | |||||
| return ret; | |||||
| } | } | ||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -220,7 +224,11 @@ int ConvolutionFp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionFP16CPUKernel::Run() { | int ConvolutionFP16CPUKernel::Run() { | ||||
| // cast fp32 input data to fp16 | |||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto input_tensor = inputs_.at(kInputIndex); | auto input_tensor = inputs_.at(kInputIndex); | ||||
| auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data()); | auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data()); | ||||
| for (int i = 0; i < input_tensor->ElementsNum(); ++i) { | for (int i = 0; i < input_tensor->ElementsNum(); ++i) { | ||||
| @@ -251,7 +259,7 @@ int ConvolutionFP16CPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); | ||||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | ||||
| @@ -267,7 +275,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten | |||||
| conv_param->output_w_ = outputs.front()->Width(); | conv_param->output_w_ = outputs.front()->Width(); | ||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { | if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { | ||||
| kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| } else { | } else { | ||||
| bool use_winograd = false; | bool use_winograd = false; | ||||
| int out_unit; | int out_unit; | ||||
| @@ -275,7 +283,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten | |||||
| OutputTransformUnitFunc output_trans_func = nullptr; | OutputTransformUnitFunc output_trans_func = nullptr; | ||||
| CheckIfUseWinograd(&use_winograd, &out_unit, conv_param, input_trans_func, output_trans_func); | CheckIfUseWinograd(&use_winograd, &out_unit, conv_param, input_trans_func, output_trans_func); | ||||
| if (kernel_h != 1 && kernel_w != 1 && !use_winograd) { | if (kernel_h != 1 && kernel_w != 1 && !use_winograd) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| } | } | ||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel { | class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConvolutionFP16CPUKernel() override { | ~ConvolutionFP16CPUKernel() override { | ||||
| if (fp16_input_ != nullptr) { | if (fp16_input_ != nullptr) { | ||||
| free(fp16_input_); | free(fp16_input_); | ||||
| @@ -99,12 +99,19 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseFp16CPUKernel::Init() { | int DeconvolutionDepthwiseFp16CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| sliding_ = new SlidingWindowParam; | sliding_ = new SlidingWindowParam; | ||||
| InitSlideParam(); | InitSlideParam(); | ||||
| // conv base init | // conv base init | ||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| auto ret = InitWeightBias(); | |||||
| ret = InitWeightBias(); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed."; | MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -150,6 +157,11 @@ int DeconvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseFp16CPUKernel::Run() { | int DeconvolutionDepthwiseFp16CPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | if (conv_param_->input_channel_ != conv_param_->output_channel_) { | ||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | MS_LOG(ERROR) << "Only support input channel equals output channel."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -161,7 +173,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_, | PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_, | ||||
| conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | ||||
| auto ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_); | |||||
| ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -176,10 +188,10 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D); | MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D); | ||||
| auto kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||||
| class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~DeconvolutionDepthwiseFp16CPUKernel() override { | ~DeconvolutionDepthwiseFp16CPUKernel() override { | ||||
| delete sliding_; | delete sliding_; | ||||
| free(packed_weight_); | free(packed_weight_); | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/ops/ops.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| @@ -78,6 +79,11 @@ int ActivationRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ActivationCPUKernel::Run() { | int ActivationCPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return ret; | |||||
| } | |||||
| int error_code = LiteBackendParallelLaunch(ActivationRun, this, thread_count_); | int error_code = LiteBackendParallelLaunch(ActivationRun, this, thread_count_); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | ||||
| @@ -89,10 +95,10 @@ int ActivationCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuActivationFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuActivationFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Activation); | MS_ASSERT(desc.type == schema::PrimitiveType_Activation); | ||||
| auto *kernel = new (std::nothrow) ActivationCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) ActivationCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||||
| class ActivationCPUKernel : public LiteKernel { | class ActivationCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| ActivationCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | ActivationCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(param, inputs, outputs), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||||
| type_ = (reinterpret_cast<ActivationParameter *>(param))->type_; | type_ = (reinterpret_cast<ActivationParameter *>(param))->type_; | ||||
| alpha_ = (reinterpret_cast<ActivationParameter *>(param))->alpha_; | alpha_ = (reinterpret_cast<ActivationParameter *>(param))->alpha_; | ||||
| } | } | ||||
| @@ -20,8 +20,8 @@ | |||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::schema::ActivationGradType_HSWISH; | using mindspore::schema::ActivationGradType_HSWISH; | ||||
| @@ -32,8 +32,8 @@ using mindspore::schema::PrimitiveType_ActivationGrad; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ActivationGradCPUKernel::Init() { | int ActivationGradCPUKernel::Init() { | ||||
| outputs_[0]->set_shape(inputs_[0]->shape()); | |||||
| return RET_OK; | |||||
| outputs_[0]->set_shape(inputs_[0]->shape()); | |||||
| return RET_OK; | |||||
| } | } | ||||
| int ActivationGradCPUKernel::ReSize() { return RET_OK; } | int ActivationGradCPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -58,7 +58,7 @@ int ActivationGradCPUKernel::DoActivation(int task_id) { | |||||
| error_code = TanhGrad(yt_addr, input_addr, length, output_addr); | error_code = TanhGrad(yt_addr, input_addr, length, output_addr); | ||||
| } else if (type_ == schema::ActivationGradType_HSWISH) { | } else if (type_ == schema::ActivationGradType_HSWISH) { | ||||
| error_code = HSwishGrad(yt_addr, input_addr, length, output_addr); | error_code = HSwishGrad(yt_addr, input_addr, length, output_addr); | ||||
| } else if (type_ == schema::ActivationGradType_HSIGMOID) { | |||||
| } else if (type_ == schema::ActivationGradType_HSIGMOID) { | |||||
| error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr); | error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr); | ||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Activation type error"; | MS_LOG(ERROR) << "Activation type error"; | ||||
| @@ -90,17 +90,17 @@ int ActivationGradCPUKernel::Run() { | |||||
| } | } | ||||
| kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_ActivationGrad); | MS_ASSERT(desc.type == schema::PrimitiveType_ActivationGrad); | ||||
| auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| MS_ASSERT(kernel != nullptr); | MS_ASSERT(kernel != nullptr); | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_ | |||||
| << ", type: " | |||||
| MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | ||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||||
| class ActivationGradCPUKernel : public LiteKernel { | class ActivationGradCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : LiteKernel(param, inputs, outputs) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(param, inputs, outputs, ctx, primitive) { | |||||
| ActivationGradParameter *param_act_grad = reinterpret_cast<ActivationGradParameter *>(param); | ActivationGradParameter *param_act_grad = reinterpret_cast<ActivationGradParameter *>(param); | ||||
| type_ = param_act_grad->type_; | type_ = param_act_grad->type_; | ||||
| alpha_ = param_act_grad->alpha_; | alpha_ = param_act_grad->alpha_; | ||||
| @@ -36,12 +36,9 @@ int AddNLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata); | auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata); | ||||
| return kernel->AddNParallelRun(thread_id); | return kernel->AddNParallelRun(thread_id); | ||||
| } | } | ||||
| } | |||||
| } // namespace | |||||
| int AddNCPUKernel::Init() { | |||||
| elements_num_ = inputs_[0]->ElementsNum(); | |||||
| return RET_OK; | |||||
| } | |||||
| int AddNCPUKernel::Init() { return RET_OK; } | |||||
| int AddNCPUKernel::ReSize() { return RET_OK; } | int AddNCPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -58,6 +55,12 @@ int AddNCPUKernel::AddNParallelRun(int thread_id) { | |||||
| } | } | ||||
| int AddNCPUKernel::Run() { | int AddNCPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return ret; | |||||
| } | |||||
| elements_num_ = inputs_[0]->ElementsNum(); | |||||
| auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data()); | auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data()); | ||||
| auto input1_data = reinterpret_cast<float *>(inputs_[1]->Data()); | auto input1_data = reinterpret_cast<float *>(inputs_[1]->Data()); | ||||
| auto output_data = reinterpret_cast<float *>(outputs_[0]->Data()); | auto output_data = reinterpret_cast<float *>(outputs_[0]->Data()); | ||||
| @@ -71,7 +74,7 @@ int AddNCPUKernel::Run() { | |||||
| in1_addr_ = input0_data; | in1_addr_ = input0_data; | ||||
| in2_addr_ = input1_data; | in2_addr_ = input1_data; | ||||
| out_addr_ = output_data; | out_addr_ = output_data; | ||||
| int ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_); | |||||
| ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "addn launch fail!ret: " << ret; | MS_LOG(ERROR) << "addn launch fail!ret: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -91,7 +94,7 @@ int AddNCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *op_parameter, const lite::Context *ctx, | OpParameter *op_parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (op_parameter == nullptr) { | if (op_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -102,7 +105,7 @@ kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Ten | |||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_AddN); | MS_ASSERT(desc.type == schema::PrimitiveType_AddN); | ||||
| op_parameter->thread_num_ = ctx->thread_num_; | op_parameter->thread_num_ = ctx->thread_num_; | ||||
| auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new AddNCPUKernel fail!"; | MS_LOG(ERROR) << "new AddNCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -21,18 +21,20 @@ | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class AddNCPUKernel : public LiteKernel { | class AddNCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| AddNCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | AddNCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~AddNCPUKernel() = default; | ~AddNCPUKernel() = default; | ||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int AddNParallelRun(int thread_id); | int AddNParallelRun(int thread_id); | ||||
| private: | private: | ||||
| float *in1_addr_; | float *in1_addr_; | ||||
| float *in2_addr_; | float *in2_addr_; | ||||
| @@ -40,7 +40,12 @@ int ArgMinMaxCPUKernel::Init() { | |||||
| } | } | ||||
| int ArgMinMaxCPUKernel::Run() { | int ArgMinMaxCPUKernel::Run() { | ||||
| auto ret = ArgMinMaxBaseCPUKernel::Run(); | |||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||||
| return ret; | |||||
| } | |||||
| ret = ArgMinMaxBaseCPUKernel::Run(); | |||||
| ArgMinMaxBaseCPUKernel::FreeTmpMemory(); | ArgMinMaxBaseCPUKernel::FreeTmpMemory(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -23,8 +23,9 @@ namespace mindspore::kernel { | |||||
| class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel { | class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel { | ||||
| public: | public: | ||||
| ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ArgMinMaxCPUKernel() = default; | ~ArgMinMaxCPUKernel() = default; | ||||
| @@ -35,4 +36,3 @@ class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARGMINMAX_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARGMINMAX_H_ | ||||
| @@ -41,6 +41,10 @@ ArithmeticCPUKernel::~ArithmeticCPUKernel() { | |||||
| } | } | ||||
| } | } | ||||
| int ArithmeticCPUKernel::Init() { | int ArithmeticCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto element_num = outputs_[0]->ElementsNum(); | auto element_num = outputs_[0]->ElementsNum(); | ||||
| tile_data0_ = new float[element_num]; | tile_data0_ = new float[element_num]; | ||||
| @@ -92,6 +96,11 @@ int ArithmeticsRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ArithmeticCPUKernel::Run() { | int ArithmeticCPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return ret; | |||||
| } | |||||
| if (arithmeticParameter_->broadcasting_) { | if (arithmeticParameter_->broadcasting_) { | ||||
| auto input_data0 = reinterpret_cast<float *>(inputs_[0]->Data()); | auto input_data0 = reinterpret_cast<float *>(inputs_[0]->Data()); | ||||
| auto input_data1 = reinterpret_cast<float *>(inputs_[1]->Data()); | auto input_data1 = reinterpret_cast<float *>(inputs_[1]->Data()); | ||||
| @@ -108,9 +117,9 @@ int ArithmeticCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *parameter, const lite::Context *ctx, | OpParameter *parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(parameter != nullptr); | MS_ASSERT(parameter != nullptr); | ||||
| auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx); | |||||
| auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_; | MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -48,8 +48,9 @@ class ArithmeticCPUKernel : public LiteKernel { | |||||
| public: | public: | ||||
| ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||||
| arithmeticParameter_ = reinterpret_cast<ArithmeticParameter *>(parameter); | arithmeticParameter_ = reinterpret_cast<ArithmeticParameter *>(parameter); | ||||
| switch (parameter->type_) { | switch (parameter->type_) { | ||||
| case PrimitiveType_Mul: | case PrimitiveType_Mul: | ||||
| @@ -261,12 +261,13 @@ int ArithmeticGradCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| MS_EXCEPTION_IF_NULL(opParameter); | MS_EXCEPTION_IF_NULL(opParameter); | ||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| MS_ASSERT(kernel != nullptr); | MS_ASSERT(kernel != nullptr); | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -37,8 +37,9 @@ class ArithmeticGradCPUKernel : public LiteKernel { | |||||
| public: | public: | ||||
| explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : LiteKernel(parameter, inputs, outputs), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) { | |||||
| switch (type()) { | switch (type()) { | ||||
| case PrimitiveType_MulGrad: | case PrimitiveType_MulGrad: | ||||
| arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape | arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape | ||||
| @@ -27,6 +27,10 @@ using mindspore::lite::RET_OK; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ArithmeticSelfCPUKernel::Init() { | int ArithmeticSelfCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| int ret = ReSize(); | int ret = ReSize(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -68,11 +72,16 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) { | |||||
| } | } | ||||
| int ArithmeticSelfCPUKernel::Run() { | int ArithmeticSelfCPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return ret; | |||||
| } | |||||
| auto input_tensor = inputs_.at(0); | auto input_tensor = inputs_.at(0); | ||||
| auto out_tensor = outputs_.at(0); | auto out_tensor = outputs_.at(0); | ||||
| in_ptr_ = reinterpret_cast<float *>(input_tensor->Data()); | in_ptr_ = reinterpret_cast<float *>(input_tensor->Data()); | ||||
| out_ptr_ = reinterpret_cast<float *>(out_tensor->Data()); | out_ptr_ = reinterpret_cast<float *>(out_tensor->Data()); | ||||
| int ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_); | |||||
| ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -83,13 +92,14 @@ int ArithmeticSelfCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuArithmeticSelfFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuArithmeticSelfFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Creator failed, opParameter is nullptr!"; | MS_LOG(ERROR) << "Creator failed, opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) ArithmeticSelfCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) ArithmeticSelfCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| MS_ASSERT(kernel != nullptr); | MS_ASSERT(kernel != nullptr); | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -24,9 +24,9 @@ | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "include/context.h" | #include "include/context.h" | ||||
| using mindspore::lite::Context; | using mindspore::lite::Context; | ||||
| using mindspore::schema::PrimitiveType_Abs; | using mindspore::schema::PrimitiveType_Abs; | ||||
| using mindspore::schema::PrimitiveType_Ceil; | |||||
| using mindspore::schema::PrimitiveType_Cos; | using mindspore::schema::PrimitiveType_Cos; | ||||
| using mindspore::schema::PrimitiveType_Exp; | using mindspore::schema::PrimitiveType_Exp; | ||||
| using mindspore::schema::PrimitiveType_Floor; | using mindspore::schema::PrimitiveType_Floor; | ||||
| @@ -36,7 +36,6 @@ using mindspore::schema::PrimitiveType_Rsqrt; | |||||
| using mindspore::schema::PrimitiveType_Sin; | using mindspore::schema::PrimitiveType_Sin; | ||||
| using mindspore::schema::PrimitiveType_Sqrt; | using mindspore::schema::PrimitiveType_Sqrt; | ||||
| using mindspore::schema::PrimitiveType_Square; | using mindspore::schema::PrimitiveType_Square; | ||||
| using mindspore::schema::PrimitiveType_Ceil; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ArithmeticSelfCPUKernel : public LiteKernel { | class ArithmeticSelfCPUKernel : public LiteKernel { | ||||
| @@ -44,8 +43,9 @@ class ArithmeticSelfCPUKernel : public LiteKernel { | |||||
| public: | public: | ||||
| explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| switch (parameter->type_) { | switch (parameter->type_) { | ||||
| case PrimitiveType_Abs: | case PrimitiveType_Abs: | ||||
| arithmeticSelf_run_ = ElementAbs; | arithmeticSelf_run_ = ElementAbs; | ||||
| @@ -106,4 +106,3 @@ class ArithmeticSelfCPUKernel : public LiteKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_ | ||||
| @@ -28,6 +28,11 @@ int BatchToSpaceCPUKernel::Init() { | |||||
| } | } | ||||
| int BatchToSpaceCPUKernel::Run() { | int BatchToSpaceCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto input = inputs_[0]; | auto input = inputs_[0]; | ||||
| auto output = outputs_[0]; | auto output = outputs_[0]; | ||||
| const float *input_data = reinterpret_cast<const float *>(input->Data()); | const float *input_data = reinterpret_cast<const float *>(input->Data()); | ||||
| @@ -22,8 +22,9 @@ namespace mindspore::kernel { | |||||
| class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel { | class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel { | ||||
| public: | public: | ||||
| BatchToSpaceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | BatchToSpaceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~BatchToSpaceCPUKernel() = default; | ~BatchToSpaceCPUKernel() = default; | ||||
| @@ -34,4 +35,3 @@ class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCH_TO_SPACE_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCH_TO_SPACE_H_ | ||||
| @@ -53,6 +53,11 @@ int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int BatchnormCPUKernel::Run() { | int BatchnormCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| in_addr_ = reinterpret_cast<float *>(inputs_.at(0)->Data()); | in_addr_ = reinterpret_cast<float *>(inputs_.at(0)->Data()); | ||||
| mean_addr_ = reinterpret_cast<float *>(inputs_.at(1)->Data()); | mean_addr_ = reinterpret_cast<float *>(inputs_.at(1)->Data()); | ||||
| var_addr_ = reinterpret_cast<float *>(inputs_.at(2)->Data()); | var_addr_ = reinterpret_cast<float *>(inputs_.at(2)->Data()); | ||||
| @@ -76,10 +81,10 @@ int BatchnormCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm); | MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm); | ||||
| auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new BatchNormCPUKernel fail!"; | MS_LOG(ERROR) << "new BatchNormCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class BatchnormCPUKernel : public LiteKernel { | class BatchnormCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| BatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | BatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); | batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); | ||||
| } | } | ||||
| ~BatchnormCPUKernel() override { delete batchnorm_param_; } | ~BatchnormCPUKernel() override { delete batchnorm_param_; } | ||||
| @@ -31,6 +31,11 @@ namespace mindspore::kernel { | |||||
| int BiasCPUKernel::ReSize() { return RET_OK; } | int BiasCPUKernel::ReSize() { return RET_OK; } | ||||
| int BiasCPUKernel::Run() { | int BiasCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto in = reinterpret_cast<float *>(inputs_.at(0)->Data()); | auto in = reinterpret_cast<float *>(inputs_.at(0)->Data()); | ||||
| auto bias = reinterpret_cast<float *>(inputs_.at(1)->Data()); | auto bias = reinterpret_cast<float *>(inputs_.at(1)->Data()); | ||||
| auto out = reinterpret_cast<float *>(outputs_.at(0)->Data()); | auto out = reinterpret_cast<float *>(outputs_.at(0)->Data()); | ||||
| @@ -44,6 +49,10 @@ int BiasCPUKernel::Run() { | |||||
| } | } | ||||
| int BiasCPUKernel::Init() { | int BiasCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto dims = inputs_[0]->shape(); | auto dims = inputs_[0]->shape(); | ||||
| MS_ASSERT(dims.size() <= 5); | MS_ASSERT(dims.size() <= 5); | ||||
| bias_param_->ndim_ = dims.size(); | bias_param_->ndim_ = dims.size(); | ||||
| @@ -58,10 +67,11 @@ int BiasCPUKernel::Init() { | |||||
| kernel::LiteKernel *CpuBiasFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBiasFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | ||||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| MS_ASSERT(parameter != nullptr); | MS_ASSERT(parameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_BiasAdd); | MS_ASSERT(desc.type == schema::PrimitiveType_BiasAdd); | ||||
| auto kernel = new (std::nothrow) BiasCPUKernel(parameter, inputs, outputs); | |||||
| auto kernel = new (std::nothrow) BiasCPUKernel(parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_; | MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -24,9 +24,10 @@ namespace mindspore::kernel { | |||||
| class BiasCPUKernel : public LiteKernel { | class BiasCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| BiasCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | BiasCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : LiteKernel(parameter, inputs, outputs) { | |||||
| bias_param_ = reinterpret_cast<ArithmeticParameter*>(parameter); | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| bias_param_ = reinterpret_cast<ArithmeticParameter *>(parameter); | |||||
| } | } | ||||
| ~BiasCPUKernel() override = default; | ~BiasCPUKernel() override = default; | ||||
| @@ -40,4 +41,3 @@ class BiasCPUKernel : public LiteKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_H_ | ||||
| @@ -20,12 +20,11 @@ | |||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::schema::PrimitiveType_BiasGrad; | |||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::schema::PrimitiveType_BiasGrad; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int BiasGradCPUKernel::InferShape() { | int BiasGradCPUKernel::InferShape() { | ||||
| @@ -68,10 +67,14 @@ int BiasGradCPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int BiasGradCPUKernel::ReSize() { return 0; } | int BiasGradCPUKernel::ReSize() { return 0; } | ||||
| int BiasGradCPUKernel::Run() { | int BiasGradCPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto in = reinterpret_cast<float *>(inputs_.at(0)->Data()); | auto in = reinterpret_cast<float *>(inputs_.at(0)->Data()); | ||||
| auto out = reinterpret_cast<float *>(outputs_.at(0)->Data()); | auto out = reinterpret_cast<float *>(outputs_.at(0)->Data()); | ||||
| // size_t data_size = inputs_.at(0)->ElementsNum(); | // size_t data_size = inputs_.at(0)->ElementsNum(); | ||||
| @@ -91,14 +94,14 @@ int BiasGradCPUKernel::Run() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad); | MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad); | ||||
| auto *kernel = new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs); | |||||
| auto *kernel = | |||||
| new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx, primitive); | |||||
| MS_ASSERT(kernel != nullptr); | MS_ASSERT(kernel != nullptr); | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||||
| class BiasGradCPUKernel : public LiteKernel { | class BiasGradCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| explicit BiasGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit BiasGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : LiteKernel(parameter, inputs, outputs) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| bias_param = reinterpret_cast<ArithmeticParameter *>(parameter); | bias_param = reinterpret_cast<ArithmeticParameter *>(parameter); | ||||
| } | } | ||||
| ~BiasGradCPUKernel() override = default; | ~BiasGradCPUKernel() override = default; | ||||
| @@ -96,12 +96,12 @@ int BNGradInputCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_BNGradInput); | MS_ASSERT(desc.type == schema::PrimitiveType_BNGradInput); | ||||
| // parameter->name = opDef.name()->str().data(); | // parameter->name = opDef.name()->str().data(); | ||||
| // parameter->type = opDef.attr_type(); | // parameter->type = opDef.attr_type(); | ||||
| auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| MS_ASSERT(kernel != nullptr); | MS_ASSERT(kernel != nullptr); | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (RET_OK != ret) { | if (RET_OK != ret) { | ||||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||||
| class BNGradInputCPUKernel : public LiteKernel { | class BNGradInputCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| explicit BNGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit BNGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : LiteKernel(parameter, inputs, outputs) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~BNGradInputCPUKernel() override { delete workspace; } | ~BNGradInputCPUKernel() override { delete workspace; } | ||||
| int Init() override; | int Init() override; | ||||
| @@ -27,6 +27,10 @@ using mindspore::schema::PrimitiveType_BroadcastTo; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int BroadcastToCPUKernel::Init() { | int BroadcastToCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto input_shape = inputs_[0]->shape(); | auto input_shape = inputs_[0]->shape(); | ||||
| for (size_t i = 0; i < input_shape.size(); ++i) { | for (size_t i = 0; i < input_shape.size(); ++i) { | ||||
| shape_info_.input_shape_[i] = input_shape[i]; | shape_info_.input_shape_[i] = input_shape[i]; | ||||
| @@ -42,6 +46,11 @@ int BroadcastToCPUKernel::Init() { | |||||
| } | } | ||||
| int BroadcastToCPUKernel::Run() { | int BroadcastToCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto input_data = reinterpret_cast<float *>(inputs_.at(0)->Data()); | auto input_data = reinterpret_cast<float *>(inputs_.at(0)->Data()); | ||||
| auto output_data = reinterpret_cast<float *>(outputs_.at(0)->Data()); | auto output_data = reinterpret_cast<float *>(outputs_.at(0)->Data()); | ||||
| @@ -51,13 +60,13 @@ int BroadcastToCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *op_parameter, const lite::Context *ctx, | OpParameter *op_parameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (op_parameter == nullptr) { | if (op_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | MS_LOG(ERROR) << "Input op_parameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_BroadcastTo); | MS_ASSERT(desc.type == schema::PrimitiveType_BroadcastTo); | ||||
| auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!"; | MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -25,18 +25,18 @@ namespace mindspore::kernel { | |||||
| class BroadcastToCPUKernel : public LiteKernel { | class BroadcastToCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~BroadcastToCPUKernel() = default; | ~BroadcastToCPUKernel() = default; | ||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override { | |||||
| return 0; | |||||
| } | |||||
| int ReSize() override { return 0; } | |||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| BroadcastShapeInfo shape_info_; | BroadcastShapeInfo shape_info_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BROADCAST_TO_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BROADCAST_TO_H_ | ||||
| @@ -30,9 +30,6 @@ using mindspore::schema::PrimitiveType_Cast; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | namespace { | ||||
| constexpr int kInputNum = 1; | |||||
| constexpr int kOutputNum = 1; | |||||
| const std::vector<int> kSupportInputDataType = {kNumberTypeUInt8, kNumberTypeInt32}; | |||||
| int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | ||||
| if (cdata == nullptr) { | if (cdata == nullptr) { | ||||
| MS_LOG(ERROR) << "input cdata is nullptr!"; | MS_LOG(ERROR) << "input cdata is nullptr!"; | ||||
| @@ -44,12 +41,16 @@ int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } // namespace | } // namespace | ||||
| int CastCPUKernel::Init() { | int CastCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| data_num_ = inputs_[0]->ElementsNum(); | data_num_ = inputs_[0]->ElementsNum(); | ||||
| if (data_num_ == 0) { | if (data_num_ == 0) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| thread_num_ = MSMIN(thread_num_, data_num_); | |||||
| stride_ = UP_DIV(data_num_, thread_num_); | |||||
| opParameter->thread_num_ = MSMIN(opParameter->thread_num_, data_num_); | |||||
| stride_ = UP_DIV(data_num_, opParameter->thread_num_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -77,16 +78,21 @@ int CastCPUKernel::DoCast(int thread_id) { | |||||
| } | } | ||||
| int CastCPUKernel::Run() { | int CastCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| if (data_num_ == 0) { | if (data_num_ == 0) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return LiteBackendParallelLaunch(CastRun, this, thread_num_); | |||||
| return LiteBackendParallelLaunch(CastRun, this, opParameter->thread_num_); | |||||
| } | } | ||||
| kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | MS_LOG(ERROR) << "Input opParameter is nullptr!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -99,7 +105,7 @@ kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Ten | |||||
| MS_LOG(ERROR) << "context thread num is 0!"; | MS_LOG(ERROR) << "context thread num is 0!"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new CastCPUKernel fail!"; | MS_LOG(ERROR) << "new CastCPUKernel fail!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -23,27 +23,23 @@ namespace mindspore::kernel { | |||||
| class CastCPUKernel : public LiteKernel { | class CastCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| CastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | CastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs) { | |||||
| if (ctx != nullptr) { | |||||
| thread_num_ = ctx->thread_num_; | |||||
| } | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| opParameter->thread_num_ = ctx->thread_num_; | |||||
| } | } | ||||
| ~CastCPUKernel() = default; | ~CastCPUKernel() = default; | ||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override { | |||||
| return 0; | |||||
| }; | |||||
| int ReSize() override { return 0; }; | |||||
| int Run() override; | int Run() override; | ||||
| int DoCast(int thread_id); | int DoCast(int thread_id); | ||||
| private: | private: | ||||
| uint32_t thread_num_; | |||||
| uint32_t stride_; | uint32_t stride_; | ||||
| uint32_t data_num_; | uint32_t data_num_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_ | ||||
| @@ -28,44 +28,54 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Concat; | using mindspore::schema::PrimitiveType_Concat; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ConcatCPUKernel::Init() { | |||||
| ConcatBaseCPUKernel::Init(); | |||||
| schema::Format input0_format = inputs_[0]->GetFormat(); | |||||
| bool need_convert_format = false; | |||||
| for (size_t i = 1; i < inputs_.size(); ++i) { | |||||
| if (inputs_[i]->GetFormat() != input0_format) { | |||||
| need_convert_format = true; | |||||
| } | |||||
| } | |||||
| if (!need_convert_format) { | |||||
| outputs_[0]->SetFormat(input0_format); | |||||
| return RET_OK; | |||||
| } | |||||
| MS_LOG(ERROR) << "All input format should be the same!"; | |||||
| return RET_ERROR; | |||||
| int ConcatCPUKernel::Init() { | |||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto ret = ConcatBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| schema::Format input0_format = inputs_[0]->GetFormat(); | |||||
| bool need_convert_format = false; | |||||
| for (size_t i = 1; i < inputs_.size(); ++i) { | |||||
| if (inputs_[i]->GetFormat() != input0_format) { | |||||
| need_convert_format = true; | |||||
| } | } | ||||
| } | |||||
| if (!need_convert_format) { | |||||
| outputs_[0]->SetFormat(input0_format); | |||||
| return RET_OK; | |||||
| } | |||||
| MS_LOG(ERROR) << "All input format should be the same!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| int ConcatCPUKernel::ReSize() { return RET_OK; } | |||||
| int ConcatCPUKernel::ReSize() { return RET_OK; } | |||||
| int ConcatCPUKernel::Run() { | |||||
| auto input_num = inputs_.size(); | |||||
| std::vector<void *> inputs_addr(input_num, nullptr); | |||||
| std::vector<int *> inputs_output_shape(input_num + 1, nullptr); | |||||
| int ConcatCPUKernel::Run() { | |||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto input_num = inputs_.size(); | |||||
| std::vector<void *> inputs_addr(input_num, nullptr); | |||||
| std::vector<int *> inputs_output_shape(input_num + 1, nullptr); | |||||
| std::vector <std::vector<int>> shapes; | |||||
| for (size_t i = 0; i < input_num; ++i) { | |||||
| inputs_addr[i] = inputs_[i]->Data(); | |||||
| shapes.push_back(inputs_[i]->shape()); | |||||
| inputs_output_shape[i] = shapes[i].data(); | |||||
| } | |||||
| auto output_shape = outputs_.at(0)->shape(); | |||||
| inputs_output_shape[input_num] = output_shape.data(); | |||||
| auto output_addr = outputs_.at(0)->Data(); | |||||
| std::vector<std::vector<int>> shapes; | |||||
| for (size_t i = 0; i < input_num; ++i) { | |||||
| inputs_addr[i] = inputs_[i]->Data(); | |||||
| shapes.push_back(inputs_[i]->shape()); | |||||
| inputs_output_shape[i] = shapes[i].data(); | |||||
| } | |||||
| auto output_shape = outputs_.at(0)->shape(); | |||||
| inputs_output_shape[input_num] = output_shape.data(); | |||||
| auto output_addr = outputs_.at(0)->Data(); | |||||
| Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(), | |||||
| output_shape.size(), output_addr); | |||||
| return RET_OK; | |||||
| } | |||||
| Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(), | |||||
| output_shape.size(), output_addr); | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class ConcatCPUKernel : public ConcatBaseCPUKernel { | class ConcatCPUKernel : public ConcatBaseCPUKernel { | ||||
| public: | public: | ||||
| ConcatCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConcatCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConcatCPUKernel() = default; | ~ConcatCPUKernel() = default; | ||||
| @@ -42,4 +43,3 @@ class ConcatCPUKernel : public ConcatBaseCPUKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONCAT_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONCAT_H_ | ||||
| @@ -29,6 +29,7 @@ using mindspore::kernel::KERNEL_ARCH::kCPU; | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::lite::RET_INFER_INVALID; | |||||
| using mindspore::schema::PrimitiveType_Conv2D; | using mindspore::schema::PrimitiveType_Conv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -136,6 +137,10 @@ void ConvolutionCPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionCPUKernel::Init() { | int ConvolutionCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| @@ -204,6 +209,11 @@ int ConvolutionImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionCPUKernel::Run() { | int ConvolutionCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto input_tensor = inputs_.at(kInputIndex); | auto input_tensor = inputs_.at(kInputIndex); | ||||
| auto ori_input_data = input_tensor->Data(); | auto ori_input_data = input_tensor->Data(); | ||||
| int in_batch = conv_param_->input_batch_; | int in_batch = conv_param_->input_batch_; | ||||
| @@ -223,7 +233,7 @@ int ConvolutionCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); | ||||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | ||||
| @@ -245,20 +255,21 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Ten | |||||
| kernel::LiteKernel *kernel; | kernel::LiteKernel *kernel; | ||||
| if (kernel_h == 1 && kernel_w == 1) { | if (kernel_h == 1 && kernel_w == 1) { | ||||
| // kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(opParameter, inputs, outputs, ctx); | // kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(opParameter, inputs, outputs, ctx); | ||||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| } else if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { | } else if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { | ||||
| kernel = new (std::nothrow) kernel::Convolution3x3CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::Convolution3x3CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| } else if (use_winograd) { | } else if (use_winograd) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(opParameter, inputs, outputs, ctx, out_unit); | |||||
| kernel = | |||||
| new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(opParameter, inputs, outputs, ctx, primitive, out_unit); | |||||
| } else { | } else { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | |||||
| if (ret != RET_OK && ret != RET_INFER_INVALID) { | |||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | ||||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConvolutionCPUKernel() override { | ~ConvolutionCPUKernel() override { | ||||
| if (packed_input_ != nullptr) { | if (packed_input_ != nullptr) { | ||||
| free(packed_input_); | free(packed_input_); | ||||
| @@ -136,6 +136,10 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) { | |||||
| } | } | ||||
| int Convolution1x1CPUKernel::Init() { | int Convolution1x1CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| InitConv1x1MatmulParam(); | InitConv1x1MatmulParam(); | ||||
| @@ -178,6 +182,11 @@ int Convolution1x1Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int Convolution1x1CPUKernel::Run() { | int Convolution1x1CPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto src_in = reinterpret_cast<float *>(inputs_[0]->Data()); | auto src_in = reinterpret_cast<float *>(inputs_[0]->Data()); | ||||
| auto src_out = reinterpret_cast<float *>(outputs_[0]->Data()); | auto src_out = reinterpret_cast<float *>(outputs_[0]->Data()); | ||||
| @@ -34,8 +34,9 @@ namespace mindspore::kernel { | |||||
| class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { | class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| matmul_param_ = new MatMulParameter(); | matmul_param_ = new MatMulParameter(); | ||||
| } | } | ||||
| ~Convolution1x1CPUKernel(); | ~Convolution1x1CPUKernel(); | ||||
| @@ -166,6 +166,10 @@ void Convolution3x3CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int Convolution3x3CPUKernel::Init() { | int Convolution3x3CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| @@ -237,6 +241,11 @@ int Convolution3x3Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int Convolution3x3CPUKernel::Run() { | int Convolution3x3CPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto input_tensor = inputs_.at(kInputIndex); | auto input_tensor = inputs_.at(kInputIndex); | ||||
| auto ori_input_data = input_tensor->Data(); | auto ori_input_data = input_tensor->Data(); | ||||
| int in_batch = conv_param_->input_batch_; | int in_batch = conv_param_->input_batch_; | ||||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||||
| class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel { | class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| Convolution3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | Convolution3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~Convolution3x3CPUKernel() override { | ~Convolution3x3CPUKernel() override { | ||||
| if (transformed_filter_addr_ != nullptr) { | if (transformed_filter_addr_ != nullptr) { | ||||
| free(transformed_filter_addr_); | free(transformed_filter_addr_); | ||||
| @@ -25,6 +25,7 @@ using mindspore::kernel::KERNEL_ARCH::kCPU; | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::lite::RET_INFER_INVALID; | |||||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | using mindspore::schema::PrimitiveType_DepthwiseConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -86,6 +87,10 @@ int ConvolutionDepthwiseCPUKernel::InitBuffer() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseCPUKernel::Init() { | int ConvolutionDepthwiseCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| // conv base init | // conv base init | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -144,6 +149,11 @@ int ConvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseCPUKernel::Run() { | int ConvolutionDepthwiseCPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return ret; | |||||
| } | |||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | if (conv_param_->input_channel_ != conv_param_->output_channel_) { | ||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | MS_LOG(ERROR) << "Only support input channel equals output channel."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -164,7 +174,7 @@ int ConvolutionDepthwiseCPUKernel::Run() { | |||||
| packed_output_ = output_addr; | packed_output_ = output_addr; | ||||
| } | } | ||||
| auto ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_); | |||||
| ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -180,11 +190,11 @@ int ConvolutionDepthwiseCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const Context *ctx, | OpParameter *opParameter, const Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D); | MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D); | ||||
| kernel::LiteKernel *kernel; | kernel::LiteKernel *kernel; | ||||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| // auto param = reinterpret_cast<ConvParameter *>(opParameter); | // auto param = reinterpret_cast<ConvParameter *>(opParameter); | ||||
| // if (param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 && | // if (param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 && | ||||
| // param->dilation_h_ == 1 && param->dilation_w_ == 1) { | // param->dilation_h_ == 1 && param->dilation_w_ == 1) { | ||||
| @@ -192,12 +202,13 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::T | |||||
| // } else { | // } else { | ||||
| // kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx); | // kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx); | ||||
| // } | // } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | |||||
| if (ret != RET_OK && ret != RET_INFER_INVALID) { | |||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | ||||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConvolutionDepthwiseCPUKernel() override { | ~ConvolutionDepthwiseCPUKernel() override { | ||||
| delete sliding_; | delete sliding_; | ||||
| free(packed_weight_); | free(packed_weight_); | ||||
| @@ -55,4 +56,3 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_H_ | ||||
| @@ -100,6 +100,10 @@ int ConvolutionDepthwise3x3CPUKernel::InitBuffer() { | |||||
| } | } | ||||
| int ConvolutionDepthwise3x3CPUKernel::Init() { | int ConvolutionDepthwise3x3CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| // conv base init | // conv base init | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -164,6 +168,11 @@ int ConvDw3x3Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionDepthwise3x3CPUKernel::Run() { | int ConvolutionDepthwise3x3CPUKernel::Run() { | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare failed."; | |||||
| return ret; | |||||
| } | |||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | if (conv_param_->input_channel_ != conv_param_->output_channel_) { | ||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | MS_LOG(ERROR) << "Only support input channel equals output channel."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -184,7 +193,7 @@ int ConvolutionDepthwise3x3CPUKernel::Run() { | |||||
| packed_output_ = output_addr; | packed_output_ = output_addr; | ||||
| } | } | ||||
| auto ret = LiteBackendParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_); | |||||
| ret = LiteBackendParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -26,8 +26,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { | class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConvolutionDepthwise3x3CPUKernel() override { | ~ConvolutionDepthwise3x3CPUKernel() override { | ||||
| free(packed_weight_); | free(packed_weight_); | ||||
| @@ -135,11 +135,12 @@ int ConvolutionGradFilterCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, | |||||
| const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradFilter); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradFilter); | ||||
| auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| MS_ASSERT(kernel != nullptr); | MS_ASSERT(kernel != nullptr); | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionGradFilterCPUKernel : public LiteKernel { | class ConvolutionGradFilterCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : LiteKernel(parameter, inputs, outputs) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConvolutionGradFilterCPUKernel() override { delete workspace; } | ~ConvolutionGradFilterCPUKernel() override { delete workspace; } | ||||
| int Init() override; | int Init() override; | ||||
| @@ -23,9 +23,9 @@ | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::schema::PrimitiveType_Conv2DGradInput; | |||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::schema::PrimitiveType_Conv2DGradInput; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ConvolutionGradInputCPUKernel::Init() { | int ConvolutionGradInputCPUKernel::Init() { | ||||
| @@ -115,11 +115,11 @@ int ConvolutionGradInputCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, | const std::vector<lite::tensor::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| const kernel::KernelKey &desc) { | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradInput); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradInput); | ||||
| auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs); | |||||
| auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| MS_ASSERT(kernel != nullptr); | MS_ASSERT(kernel != nullptr); | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| @@ -25,8 +25,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionGradInputCPUKernel : public LiteKernel { | class ConvolutionGradInputCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||||
| : LiteKernel(parameter, inputs, outputs) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConvolutionGradInputCPUKernel() override { delete workspace; } | ~ConvolutionGradInputCPUKernel() override { delete workspace; } | ||||
| int Init() override; | int Init() override; | ||||
| @@ -247,6 +247,10 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionWinogradCPUKernel::Init() { | int ConvolutionWinogradCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| SetNeedReInit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| @@ -339,6 +343,11 @@ int ConvolutionWinogradImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata | |||||
| } | } | ||||
| int ConvolutionWinogradCPUKernel::Run() { | int ConvolutionWinogradCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto input_tensor = inputs_.at(kInputIndex); | auto input_tensor = inputs_.at(kInputIndex); | ||||
| auto ori_input_data = input_tensor->Data(); | auto ori_input_data = input_tensor->Data(); | ||||
| int in_batch = conv_param_->input_batch_; | int in_batch = conv_param_->input_batch_; | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, int output_unit) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), output_unit_(output_unit) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive, int output_unit) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(output_unit) {} | |||||
| ~ConvolutionWinogradCPUKernel() override { | ~ConvolutionWinogradCPUKernel() override { | ||||
| if (tmp_data_ != nullptr) { | if (tmp_data_ != nullptr) { | ||||
| free(tmp_data_); | free(tmp_data_); | ||||
| @@ -40,15 +40,7 @@ int CropLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| int CropCPUKernel::Init() { | |||||
| schema::Format input0_format = inputs_[0]->GetFormat(); | |||||
| if (input0_format != schema::Format_NCHW && input0_format != schema::Format_NHWC) { | |||||
| MS_LOG(ERROR) << "Unsupport format " << input0_format; | |||||
| return RET_FORMAT_ERR; | |||||
| } | |||||
| outputs_[0]->SetFormat(input0_format); | |||||
| return RET_OK; | |||||
| } | |||||
| int CropCPUKernel::Init() { return RET_OK; } | |||||
| int CropCPUKernel::CropParallelRun(int thread_id) { | int CropCPUKernel::CropParallelRun(int thread_id) { | ||||
| auto input = inputs_[0]; | auto input = inputs_[0]; | ||||
| @@ -61,6 +53,11 @@ int CropCPUKernel::CropParallelRun(int thread_id) { | |||||
| } | } | ||||
| int CropCPUKernel::Run() { | int CropCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto input = inputs_[0]; | auto input = inputs_[0]; | ||||
| auto output = outputs_[0]; | auto output = outputs_[0]; | ||||
| auto param = reinterpret_cast<CropParameter *>(opParameter); | auto param = reinterpret_cast<CropParameter *>(opParameter); | ||||
| @@ -71,7 +68,7 @@ int CropCPUKernel::Run() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_); | |||||
| auto ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; | MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -24,8 +24,9 @@ namespace mindspore::kernel { | |||||
| class CropCPUKernel : public CropBaseCPUKernel { | class CropCPUKernel : public CropBaseCPUKernel { | ||||
| public: | public: | ||||
| CropCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | CropCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||||
| : CropBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : CropBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~CropCPUKernel() = default; | ~CropCPUKernel() = default; | ||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override { return 0; } | int ReSize() override { return 0; } | ||||