!3868 set tensor allocator

Merge pull request !3868 from 张学同/to_merge
5 years ago · e9c4a697d5
--- a/mindspore/lite/src/executor.cc
+++ b/mindspore/lite/src/executor.cc
@@ -39,7 +39,7 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten
    auto &outputs = kernel->GetOutputs();
    for (auto *output : outputs) {
      MS_ASSERT(nullptr != output);
      output->MallocData(allocator);
      output->MallocData();
    }
    kernel::CallBackParam callbackParam;
    callbackParam.name_callback_aram = kernel->Name();
@@ -62,7 +62,7 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten
    }
    for (auto input_kernel : kernel->GetInKernels()) {
      MS_EXCEPTION_IF_NULL(input_kernel);
      ret = input_kernel->DecOutTensorRefCount(allocator);
      ret = input_kernel->DecOutTensorRefCount();
      if (0 != ret) {
        MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->Name() << " failed";
      }
--- a/mindspore/lite/src/ir/tensor.h
+++ b/mindspore/lite/src/ir/tensor.h
@@ -112,19 +112,24 @@ class Tensor : public mindspore::tensor::MetaTensor {
        return 0;
    }
    size *= (format_ == schema::Format_NC4HW4 || format_ == schema::Format_NHWC4) ? ElementsC4Num()
                                                                                         : MetaTensor::ElementsNum();
                                                                                  : MetaTensor::ElementsNum();

    return size;
  }

  void set_allocator(mindspore::lite::Allocator *allocator) { allocator_ = allocator; }

  int MallocData(mindspore::lite::Allocator *allocator = nullptr) {
    if (nullptr != this->data_) {
      return 0;
    }
    if (nullptr == allocator) {
    if (allocator != nullptr) {
      allocator_ = allocator;
    }
    if (allocator_ == nullptr) {
      this->data_ = malloc(this->Size());
    } else {
      this->data_ = allocator->Malloc(this->Size());
      this->data_ = allocator_->Malloc(this->Size());
    }
    if (nullptr == this->data_) {
      MS_LOG(ERROR) << "Malloc tensor data failed, size=" << this->Size();
@@ -134,14 +139,14 @@ class Tensor : public mindspore::tensor::MetaTensor {
    return 0;
  }

  int FreeData(mindspore::lite::Allocator *allocator = nullptr) {
  int FreeData() {
    if (nullptr == this->data_) {
      return 0;
    }
    if (nullptr == allocator) {
    if (nullptr == allocator_) {
      free(this->data_);
    } else {
      allocator->Free(this->data_);
      allocator_->Free(this->data_);
      this->data_ = nullptr;
    }

@@ -177,6 +182,7 @@ class Tensor : public mindspore::tensor::MetaTensor {
  schema::Format format_;
  size_t refCount = 0;
  std::vector<tensor::QuantArg> quant_params_;
  mindspore::lite::Allocator *allocator_ = nullptr;
 };

 class LiteTensor : public mindspore::tensor::MSTensor {
@@ -221,4 +227,3 @@ using TensorPtr = std::shared_ptr<tensor::Tensor>;
 }  // namespace mindspore

 #endif  // MINDSPORE_LITE_SRC_IR_TENSOR_H_

--- a/mindspore/lite/src/lite_kernel.cc
+++ b/mindspore/lite/src/lite_kernel.cc
@@ -25,11 +25,11 @@ void LiteKernel::InitOutTensorRefCount() {
  }
 }

 int LiteKernel::DecOutTensorRefCount(lite::Allocator *allocator) {
 int LiteKernel::DecOutTensorRefCount() {
  for (auto *tensor : this->outputs_) {
    tensor->decRefCount();
    if (0 >= tensor->RefCount()) {
      auto ret = tensor->FreeData(allocator);
      auto ret = tensor->FreeData();
      if (0 != ret) {
        MS_LOG(ERROR) << "Free tensor data failed";
        return ret;
@@ -141,4 +141,3 @@ void LiteKernelUtil::InitTensorRefCount(std::vector<kernel::LiteKernel *> &kerne

 int LiteKernelUtil::SetInput(LiteKernel &kernelMod, std::vector<lite::tensor::Tensor *> inputs) { return -1; }
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/lite_kernel.h
+++ b/mindspore/lite/src/lite_kernel.h
@@ -22,7 +22,6 @@
 #include <arm_neon.h>
 #endif
 #include "src/runtime/kernel/arm/opclib/op_base.h"
 // #include "backend/kernel_compiler/kernel.h"
 #include "include/context.h"
 #include "src/ir/tensor.h"
 #include "src/ops/ops.h"
@@ -60,7 +59,6 @@ struct CallBackParam {
 using KernelCallBack = std::function<bool(std::vector<lite::tensor::Tensor *> inputs,
                                          std::vector<lite::tensor::Tensor *> outputs, const CallBackParam &opInfo)>;

 // class LiteKernel : public KernelMod {
 class LiteKernel {
 public:
  LiteKernel() = default;
@@ -73,17 +71,6 @@ class LiteKernel {

  virtual ~LiteKernel() { delete opParameter; }

  //  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
  //              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
  //    return false;
  //  };
  //
  //  const std::vector<size_t> &GetInputSizeList() const override { return {}; }
  //
  //  const std::vector<size_t> &GetOutputSizeList() const override { return {}; }
  //
  //  const std::vector<size_t> &GetWorkspaceSizeList() const override { return {}; }

  virtual int Prepare() { return -1; }
  virtual int Init() { return -1; }
  virtual int ReSize() { return -1; }
@@ -115,7 +102,7 @@ class LiteKernel {

  void InitOutTensorRefCount();

  int DecOutTensorRefCount(lite::Allocator *allocator = nullptr);
  int DecOutTensorRefCount();

  const KernelKey Desc() const { return desc; }

--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@@ -134,7 +134,7 @@ int LiteSession::CompileGraph(Model *model) {
  }

  auto ret = ConvertTensors(model);
  if (0 != ret) {
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
    return ret;
  }
@@ -142,9 +142,9 @@ int LiteSession::CompileGraph(Model *model) {
  InitGraphInOutTensor(model);

  // scheduler kernels
  Scheduler scheduler(context);
  Scheduler scheduler(context_);
  ret = scheduler.Schedule(model, &tensors, &kernels);
  if (0 != ret) {
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
    return ret;
  }
@@ -166,15 +166,15 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() {
 }

 int LiteSession::RunGraph() {
  MS_EXCEPTION_IF_NULL(this->context);
  MS_EXCEPTION_IF_NULL(this->context_);
  Executor executor;
  return executor.Run(this->inputs, this->outputs, this->kernels, this->context->allocator.get());
  return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get());
 }

 int LiteSession::RunGraph(const kernel::KernelCallBack &before, const kernel::KernelCallBack &after) {
  MS_EXCEPTION_IF_NULL(this->context);
  MS_EXCEPTION_IF_NULL(this->context_);
  Executor executor;
  return executor.Run(this->inputs, this->outputs, this->kernels, this->context->allocator.get(), before, after);
  return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get(), before, after);
 }

 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputs() {
@@ -190,30 +190,32 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputs() {
  return ret;
 }

 void LiteSession::Init(Context *context) {
 int LiteSession::Init(Context *context) {
  MS_EXCEPTION_IF_NULL(context);
  this->context = new Context;
  this->context->cpuBindMode = context->cpuBindMode;
  this->context->threadNum = context->threadNum;
  this->context->deviceCtx.type = context->deviceCtx.type;
  this->context->allocator = std::make_shared<DefaultAllocator>();
  this->context_ = new (std::nothrow) Context(context->threadNum, context->allocator, context->deviceCtx);
  if (this->context_ == nullptr) {
    MS_LOG(ERROR) << "new context failed";
    return RET_MEMORY_FAILED;
  }
  this->context_->cpuBindMode = context->cpuBindMode;
  ConfigThreadPool(context->cpuBindMode, context->threadNum);
  auto ret = KernelRegistry::GetInstance()->Init();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "KernelRegistry Init Failed.";
    return;
    return ret;
  }
 #if SUPPORT_GPU
  if (context->deviceCtx.type == DT_GPU) {
  if (context_->deviceCtx.type == DT_GPU) {
    auto opencl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
    opencl_runtime->Init();
  }
 #endif
  return RET_OK;
 }

 void LiteSession::BindThread(bool ifBind) {
  if (this->context->cpuBindMode != NO_BIND) {
    DoAllThreadBind(ifBind, static_cast<int>(this->context->cpuBindMode));
  if (this->context_->cpuBindMode != NO_BIND) {
    DoAllThreadBind(ifBind, static_cast<int>(this->context_->cpuBindMode));
  }
 }

@@ -234,17 +236,18 @@ LiteSession::~LiteSession() {
  }
 }

 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputsByName(std::string name) {
  return input_map[name];
 }
 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputsByName(std::string name) {
  return output_map[name];
 }
 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputsByName(std::string name) { return input_map[name]; }
 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputsByName(std::string name) { return output_map[name]; }
 }  // namespace lite

 session::LiteSession *session::LiteSession::CreateSession(lite::Context *context) {
  auto session = new lite::LiteSession();
  session->Init(context);
  auto ret = session->Init(context);
  if (ret != mindspore::lite::RET_OK) {
      MS_LOG(ERROR) << "init sesssion failed";
      delete session;
      return nullptr;
  }
  return session;
 }
 }  // namespace mindspore
--- a/mindspore/lite/src/lite_session.h
+++ b/mindspore/lite/src/lite_session.h
@@ -36,7 +36,7 @@ class LiteSession : public session::LiteSession {

  ~LiteSession() override;

  void Init(Context *context);
  int Init(Context *context);

  void BindThread(bool ifBind) override;

@@ -60,7 +60,7 @@ class LiteSession : public session::LiteSession {
  void InitGraphInOutTensor(const lite::Model *model);

 protected:
  Context *context = nullptr;
  Context *context_ = nullptr;
  std::vector<kernel::LiteKernel *> kernels;
  std::vector<tensor::Tensor *> tensors;
  // graph input tensors
--- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
@@ -25,10 +25,10 @@ SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); }
 int SubGraphOpenCLKernel::Init() {
  allocator_ = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
  for (const auto tensor : inputs_) {
    tensor->MallocData(allocator_);
    tensor->set_allocator(allocator_);
  }
  for (const auto tensor : outputs_) {
    tensor->MallocData(allocator_);
    tensor->set_allocator(allocator_);
  }
  // Map buffer for write, it is not necessary for fine-grained
  for (auto &tensor : inputs_) {
@@ -82,4 +82,3 @@ int SubGraphOpenCLKernel::Run() {
 }

 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@@ -112,6 +112,11 @@ void Scheduler::ConstructSubgraphs(std::vector<kernel::LiteKernel *> *kernels) {
  for (auto temp_kernels : sub_kernels_list) {
    kernel::KERNEL_ARCH arch = temp_kernels.front()->Desc().arch;
    if (arch == kernel::KERNEL_ARCH::kCPU) {
      for (auto kernel : temp_kernels) {
        for (auto tensor : kernel->GetOutputs()) {
          tensor->set_allocator(context_->allocator.get());
        }
      }
      std::copy(temp_kernels.begin(), temp_kernels.end(), std::back_inserter(subgraph_kernels));
    } else {
      auto subgraph_kernel = CreateSubKernel(temp_kernels, arch);
@@ -154,9 +159,9 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<tensor::Tensor *>
  MS_ASSERT(nullptr != primitive);
  auto data_type = inputs.front()->data_type();
  kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, primitive->Type()};
  if (context->deviceCtx.type == DT_GPU) {
  if (context_->deviceCtx.type == DT_GPU) {
    desc.arch = kernel::KERNEL_ARCH::kGPU;
    auto *kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context, desc);
    auto *kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context_, desc);
    if (nullptr != kernel) {
      kernel->set_desc(desc);
      return kernel;
@@ -168,14 +173,14 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<tensor::Tensor *>
  if (data_type == kNumberTypeFloat32) {
    // check if support fp16
    kernel::KernelKey key{desc.arch, kNumberTypeFloat16, desc.type};
    kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context, key);
    kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context_, key);
    if (kernel != nullptr) {
      kernel->set_desc(desc);
      return kernel;
    }
    kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context, desc);
    kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context_, desc);
  } else {
    kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context, desc);
    kernel = KernelFactory::GetInstance()->GetKernel(inputs, outputs, primitive, context_, desc);
  }
  if (kernel != nullptr) {
    kernel->set_desc(desc);
--- a/mindspore/lite/src/scheduler.h
+++ b/mindspore/lite/src/scheduler.h
@@ -25,7 +25,7 @@
 namespace mindspore::lite {
 class Scheduler {
 public:
  explicit Scheduler(const Context *ctx) : context(ctx) {}
  explicit Scheduler(const Context *ctx) : context_(ctx) {}
  int Schedule(const lite::Model *model, std::vector<tensor::Tensor *> *tensors,
               std::vector<kernel::LiteKernel *> *kernels);

@@ -48,7 +48,7 @@ class Scheduler {

 protected:
  std::vector<std::vector<size_t>> markedKernelGroup;
  const Context *context = nullptr;
  const Context *context_ = nullptr;
 };
 }  // namespace mindspore::lite