Merge pull request !3868 from 张学同/to_mergetags/v0.7.0-beta
| @@ -39,7 +39,7 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten | |||
| auto &outputs = kernel->GetOutputs(); | |||
| for (auto *output : outputs) { | |||
| MS_ASSERT(nullptr != output); | |||
| output->MallocData(allocator); | |||
| output->MallocData(); | |||
| } | |||
| kernel::CallBackParam callbackParam; | |||
| callbackParam.name_callback_aram = kernel->Name(); | |||
| @@ -62,7 +62,7 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten | |||
| } | |||
| for (auto input_kernel : kernel->GetInKernels()) { | |||
| MS_EXCEPTION_IF_NULL(input_kernel); | |||
| ret = input_kernel->DecOutTensorRefCount(allocator); | |||
| ret = input_kernel->DecOutTensorRefCount(); | |||
| if (0 != ret) { | |||
| MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->Name() << " failed"; | |||
| } | |||
| @@ -112,19 +112,24 @@ class Tensor : public mindspore::tensor::MetaTensor { | |||
| return 0; | |||
| } | |||
| size *= (format_ == schema::Format_NC4HW4 || format_ == schema::Format_NHWC4) ? ElementsC4Num() | |||
| : MetaTensor::ElementsNum(); | |||
| : MetaTensor::ElementsNum(); | |||
| return size; | |||
| } | |||
| void set_allocator(mindspore::lite::Allocator *allocator) { allocator_ = allocator; } | |||
| int MallocData(mindspore::lite::Allocator *allocator = nullptr) { | |||
| if (nullptr != this->data_) { | |||
| return 0; | |||
| } | |||
| if (nullptr == allocator) { | |||
| if (allocator != nullptr) { | |||
| allocator_ = allocator; | |||
| } | |||
| if (allocator_ == nullptr) { | |||
| this->data_ = malloc(this->Size()); | |||
| } else { | |||
| this->data_ = allocator->Malloc(this->Size()); | |||
| this->data_ = allocator_->Malloc(this->Size()); | |||
| } | |||
| if (nullptr == this->data_) { | |||
| MS_LOG(ERROR) << "Malloc tensor data failed, size=" << this->Size(); | |||
| @@ -134,14 +139,14 @@ class Tensor : public mindspore::tensor::MetaTensor { | |||
| return 0; | |||
| } | |||
| int FreeData(mindspore::lite::Allocator *allocator = nullptr) { | |||
| int FreeData() { | |||
| if (nullptr == this->data_) { | |||
| return 0; | |||
| } | |||
| if (nullptr == allocator) { | |||
| if (nullptr == allocator_) { | |||
| free(this->data_); | |||
| } else { | |||
| allocator->Free(this->data_); | |||
| allocator_->Free(this->data_); | |||
| this->data_ = nullptr; | |||
| } | |||
| @@ -177,6 +182,7 @@ class Tensor : public mindspore::tensor::MetaTensor { | |||
| schema::Format format_; | |||
| size_t refCount = 0; | |||
| std::vector<tensor::QuantArg> quant_params_; | |||
| mindspore::lite::Allocator *allocator_ = nullptr; | |||
| }; | |||
| class LiteTensor : public mindspore::tensor::MSTensor { | |||
| @@ -221,4 +227,3 @@ using TensorPtr = std::shared_ptr<tensor::Tensor>; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_IR_TENSOR_H_ | |||
| @@ -25,11 +25,11 @@ void LiteKernel::InitOutTensorRefCount() { | |||
| } | |||
| } | |||
| int LiteKernel::DecOutTensorRefCount(lite::Allocator *allocator) { | |||
| int LiteKernel::DecOutTensorRefCount() { | |||
| for (auto *tensor : this->outputs_) { | |||
| tensor->decRefCount(); | |||
| if (0 >= tensor->RefCount()) { | |||
| auto ret = tensor->FreeData(allocator); | |||
| auto ret = tensor->FreeData(); | |||
| if (0 != ret) { | |||
| MS_LOG(ERROR) << "Free tensor data failed"; | |||
| return ret; | |||
| @@ -141,4 +141,3 @@ void LiteKernelUtil::InitTensorRefCount(std::vector<kernel::LiteKernel *> &kerne | |||
| int LiteKernelUtil::SetInput(LiteKernel &kernelMod, std::vector<lite::tensor::Tensor *> inputs) { return -1; } | |||
| } // namespace mindspore::kernel | |||
| @@ -22,7 +22,6 @@ | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | |||
| // #include "backend/kernel_compiler/kernel.h" | |||
| #include "include/context.h" | |||
| #include "src/ir/tensor.h" | |||
| #include "src/ops/ops.h" | |||
| @@ -60,7 +59,6 @@ struct CallBackParam { | |||
| using KernelCallBack = std::function<bool(std::vector<lite::tensor::Tensor *> inputs, | |||
| std::vector<lite::tensor::Tensor *> outputs, const CallBackParam &opInfo)>; | |||
| // class LiteKernel : public KernelMod { | |||
| class LiteKernel { | |||
| public: | |||
| LiteKernel() = default; | |||
| @@ -73,17 +71,6 @@ class LiteKernel { | |||
| virtual ~LiteKernel() { delete opParameter; } | |||
| // bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| // const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| // return false; | |||
| // }; | |||
| // | |||
| // const std::vector<size_t> &GetInputSizeList() const override { return {}; } | |||
| // | |||
| // const std::vector<size_t> &GetOutputSizeList() const override { return {}; } | |||
| // | |||
| // const std::vector<size_t> &GetWorkspaceSizeList() const override { return {}; } | |||
| virtual int Prepare() { return -1; } | |||
| virtual int Init() { return -1; } | |||
| virtual int ReSize() { return -1; } | |||
| @@ -115,7 +102,7 @@ class LiteKernel { | |||
| void InitOutTensorRefCount(); | |||
| int DecOutTensorRefCount(lite::Allocator *allocator = nullptr); | |||
| int DecOutTensorRefCount(); | |||
| const KernelKey Desc() const { return desc; } | |||
| @@ -134,7 +134,7 @@ int LiteSession::CompileGraph(Model *model) { | |||
| } | |||
| auto ret = ConvertTensors(model); | |||
| if (0 != ret) { | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvertTensors failed: " << ret; | |||
| return ret; | |||
| } | |||
| @@ -142,9 +142,9 @@ int LiteSession::CompileGraph(Model *model) { | |||
| InitGraphInOutTensor(model); | |||
| // scheduler kernels | |||
| Scheduler scheduler(context); | |||
| Scheduler scheduler(context_); | |||
| ret = scheduler.Schedule(model, &tensors, &kernels); | |||
| if (0 != ret) { | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Schedule kernels failed: " << ret; | |||
| return ret; | |||
| } | |||
| @@ -166,15 +166,15 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() { | |||
| } | |||
| int LiteSession::RunGraph() { | |||
| MS_EXCEPTION_IF_NULL(this->context); | |||
| MS_EXCEPTION_IF_NULL(this->context_); | |||
| Executor executor; | |||
| return executor.Run(this->inputs, this->outputs, this->kernels, this->context->allocator.get()); | |||
| return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get()); | |||
| } | |||
| int LiteSession::RunGraph(const kernel::KernelCallBack &before, const kernel::KernelCallBack &after) { | |||
| MS_EXCEPTION_IF_NULL(this->context); | |||
| MS_EXCEPTION_IF_NULL(this->context_); | |||
| Executor executor; | |||
| return executor.Run(this->inputs, this->outputs, this->kernels, this->context->allocator.get(), before, after); | |||
| return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get(), before, after); | |||
| } | |||
| std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputs() { | |||
| @@ -190,30 +190,32 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputs() { | |||
| return ret; | |||
| } | |||
| void LiteSession::Init(Context *context) { | |||
| int LiteSession::Init(Context *context) { | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| this->context = new Context; | |||
| this->context->cpuBindMode = context->cpuBindMode; | |||
| this->context->threadNum = context->threadNum; | |||
| this->context->deviceCtx.type = context->deviceCtx.type; | |||
| this->context->allocator = std::make_shared<DefaultAllocator>(); | |||
| this->context_ = new (std::nothrow) Context(context->threadNum, context->allocator, context->deviceCtx); | |||
| if (this->context_ == nullptr) { | |||
| MS_LOG(ERROR) << "new context failed"; | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| this->context_->cpuBindMode = context->cpuBindMode; | |||
| ConfigThreadPool(context->cpuBindMode, context->threadNum); | |||
| auto ret = KernelRegistry::GetInstance()->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "KernelRegistry Init Failed."; | |||
| return; | |||
| return ret; | |||
| } | |||
| #if SUPPORT_GPU | |||
| if (context->deviceCtx.type == DT_GPU) { | |||
| if (context_->deviceCtx.type == DT_GPU) { | |||
| auto opencl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||
| opencl_runtime->Init(); | |||
| } | |||
| #endif | |||
| return RET_OK; | |||
| } | |||
| void LiteSession::BindThread(bool ifBind) { | |||
| if (this->context->cpuBindMode != NO_BIND) { | |||
| DoAllThreadBind(ifBind, static_cast<int>(this->context->cpuBindMode)); | |||
| if (this->context_->cpuBindMode != NO_BIND) { | |||
| DoAllThreadBind(ifBind, static_cast<int>(this->context_->cpuBindMode)); | |||
| } | |||
| } | |||
| @@ -234,17 +236,18 @@ LiteSession::~LiteSession() { | |||
| } | |||
| } | |||
| std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputsByName(std::string name) { | |||
| return input_map[name]; | |||
| } | |||
| std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputsByName(std::string name) { | |||
| return output_map[name]; | |||
| } | |||
| std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputsByName(std::string name) { return input_map[name]; } | |||
| std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputsByName(std::string name) { return output_map[name]; } | |||
| } // namespace lite | |||
| session::LiteSession *session::LiteSession::CreateSession(lite::Context *context) { | |||
| auto session = new lite::LiteSession(); | |||
| session->Init(context); | |||
| auto ret = session->Init(context); | |||
| if (ret != mindspore::lite::RET_OK) { | |||
| MS_LOG(ERROR) << "init sesssion failed"; | |||
| delete session; | |||
| return nullptr; | |||
| } | |||
| return session; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -36,7 +36,7 @@ class LiteSession : public session::LiteSession { | |||
| ~LiteSession() override; | |||
| void Init(Context *context); | |||
| int Init(Context *context); | |||
| void BindThread(bool ifBind) override; | |||
| @@ -60,7 +60,7 @@ class LiteSession : public session::LiteSession { | |||
| void InitGraphInOutTensor(const lite::Model *model); | |||
| protected: | |||
| Context *context = nullptr; | |||
| Context *context_ = nullptr; | |||
| std::vector<kernel::LiteKernel *> kernels; | |||
| std::vector<tensor::Tensor *> tensors; | |||
| // graph input tensors | |||
| @@ -25,10 +25,10 @@ SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); } | |||
| int SubGraphOpenCLKernel::Init() { | |||
| allocator_ = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||
| for (const auto tensor : inputs_) { | |||
| tensor->MallocData(allocator_); | |||
| tensor->set_allocator(allocator_); | |||
| } | |||
| for (const auto tensor : outputs_) { | |||
| tensor->MallocData(allocator_); | |||
| tensor->set_allocator(allocator_); | |||
| } | |||
| // Map buffer for write, it is not necessary for fine-grained | |||
| for (auto &tensor : inputs_) { | |||
| @@ -82,4 +82,3 @@ int SubGraphOpenCLKernel::Run() { | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -112,6 +112,11 @@ void Scheduler::ConstructSubgraphs(std::vector<kernel::LiteKernel *> *kernels) { | |||
| for (auto temp_kernels : sub_kernels_list) { | |||
| kernel::KERNEL_ARCH arch = temp_kernels.front()->Desc().arch; | |||
| if (arch == kernel::KERNEL_ARCH::kCPU) { | |||
| for (auto kernel : temp_kernels) { | |||
| for (auto tensor : kernel->GetOutputs()) { | |||
| tensor->set_allocator(context_->allocator.get()); | |||
| } | |||
| } | |||
| std::copy(temp_kernels.begin(), temp_kernels.end(), std::back_inserter(subgraph_kernels)); | |||
| } else { | |||
| auto subgraph_kernel = CreateSubKernel(temp_kernels, arch); | |||
| @@ -154,9 +159,9 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<tensor::Tensor *> | |||
| MS_ASSERT(nullptr != primitive); | |||
| auto data_type = inputs.front()->data_type(); | |||
| kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, primitive->Type()}; | |||
| if (context->deviceCtx.type == DT_GPU) { | |||
| if (context_->deviceCtx.type == DT_GPU) { | |||
| desc.arch = kernel::KERNEL_ARCH::kGPU; | |||
| auto *kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context, desc); | |||
| auto *kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context_, desc); | |||
| if (nullptr != kernel) { | |||
| kernel->set_desc(desc); | |||
| return kernel; | |||
| @@ -168,14 +173,14 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<tensor::Tensor *> | |||
| if (data_type == kNumberTypeFloat32) { | |||
| // check if support fp16 | |||
| kernel::KernelKey key{desc.arch, kNumberTypeFloat16, desc.type}; | |||
| kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context, key); | |||
| kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context_, key); | |||
| if (kernel != nullptr) { | |||
| kernel->set_desc(desc); | |||
| return kernel; | |||
| } | |||
| kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context, desc); | |||
| kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context_, desc); | |||
| } else { | |||
| kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context, desc); | |||
| kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context_, desc); | |||
| } | |||
| if (kernel != nullptr) { | |||
| kernel->set_desc(desc); | |||
| @@ -25,7 +25,7 @@ | |||
| namespace mindspore::lite { | |||
| class Scheduler { | |||
| public: | |||
| explicit Scheduler(const Context *ctx) : context(ctx) {} | |||
| explicit Scheduler(const Context *ctx) : context_(ctx) {} | |||
| int Schedule(const lite::Model *model, std::vector<tensor::Tensor *> *tensors, | |||
| std::vector<kernel::LiteKernel *> *kernels); | |||
| @@ -48,7 +48,7 @@ class Scheduler { | |||
| protected: | |||
| std::vector<std::vector<size_t>> markedKernelGroup; | |||
| const Context *context = nullptr; | |||
| const Context *context_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::lite | |||