executor and subgraph optimize

4 years ago · def805737c
--- a/mindspore/lite/src/common/tensor_util.cc
+++ b/mindspore/lite/src/common/tensor_util.cc
@@ -215,5 +215,26 @@ int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite
  return ret;
 }
 int CheckTensorsInvalid(const std::vector<Tensor *> &tensors) {
  for (auto &tensor : tensors) {
    if (tensor == nullptr) {
      MS_LOG(ERROR) << "check tensor is nullptr";
      return RET_ERROR;
    }
    if (tensor->data_type() != kObjectTypeTensorType && tensor->data_c() == nullptr) {
      MS_LOG(ERROR) << "check tensor data is nullptr " << tensors;
      return RET_ERROR;
    }
    auto shape = tensor->shape();
    bool valid = all_of(shape.begin(), shape.end(), [](int i) { return i >= 0; });
    if (!valid) {
      MS_LOG(ERROR) << "The shape of tensor contains negative dimension,"
                    << "check the model and assign the input shape with method Resize().";
      return RET_ERROR;
    }
  }
  return RET_OK;
 }
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/common/tensor_util.h
+++ b/mindspore/lite/src/common/tensor_util.h
@@ -39,6 +39,8 @@ int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite
                      std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *in_tensor_c);
 int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
                       std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *out_tensor_c);
 int CheckTensorsInvalid(const std::vector<Tensor *> &tensors);
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/executor.cc
+++ b/mindspore/lite/src/executor.cc
@@ -17,34 +17,14 @@
 #include "src/executor.h"
 #include <queue>
 #include "include/errorcode.h"
 #include "src/common/tensor_util.h"
 namespace mindspore::lite {
 int Executor::CheckInputs(const std::vector<Tensor *> &in_tensors) {
  for (auto &inTensor : in_tensors) {
    if (inTensor == nullptr) {
      MS_LOG(ERROR) << "Graph input tensor is nullptr";
      return RET_ERROR;
    }
    if (inTensor->data_type() != kObjectTypeTensorType && inTensor->data_c() == nullptr) {
      MS_LOG(ERROR) << "Graph input tensor data is nullptr " << in_tensors;
      return RET_ERROR;
    }
    auto shape = inTensor->shape();
    bool valid = all_of(shape.begin(), shape.end(), [](int i) { return i >= 0; });
    if (!valid) {
      MS_LOG(ERROR) << "The shape of input tensor contains negative dimension,"
                    << "check the model and assign the input shape with method Resize().";
      return RET_ERROR;
    }
  }
  return RET_OK;
 }
 int Executor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                  const std::vector<kernel::LiteKernel *> &kernels, mindspore::Allocator *allocator,
                  const KernelCallBack &before, const KernelCallBack &after) {
  MS_ASSERT(nullptr != allocator);
  auto ret = this->CheckInputs(in_tensors);
  auto ret = CheckTensorsInvalid(in_tensors);
  if (RET_OK != ret) {
    MS_LOG(ERROR) << "CheckInputs failed";
    return ret;
@@ -88,42 +68,4 @@ int Executor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Ten
  }
  return RET_OK;
 }
 int CpuExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                     const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
                     const KernelCallBack &before, const KernelCallBack &after) {
  MS_ASSERT(nullptr != allocator);
  //  not check input for merge. too hard
  if (kernels.front()->Type() != schema::PrimitiveType_Merge) {
    auto ret = this->CheckInputs(in_tensors);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "CheckInputs failed";
      return ret;
    }
  }
 #ifdef SUPPORT_TRAIN
  for (auto out_tensor : out_tensors) {  // increase RefCount of output tensors, such that Run will not free them
    out_tensor->set_ref_count(out_tensor->ref_count() + 1);
  }
 #endif
  for (auto *kernel : kernels) {
    MS_ASSERT(nullptr != kernel);
    auto ret = kernel->PreProcess();
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel->name();
      return ret;
    }
    ret = kernel->Run(before, after);
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
      return ret;
    }
    ret = kernel->PostProcess();
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name();
      return ret;
    }
  }
  return RET_OK;
 }
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/executor.h
+++ b/mindspore/lite/src/executor.h
@@ -33,20 +33,6 @@ class Executor {
  virtual int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                  const std::vector<kernel::LiteKernel *> &kernels, mindspore::Allocator *allocator = nullptr,
                  const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
 protected:
  static int CheckInputs(const std::vector<Tensor *> &in_tensors);
 };
 class CpuExecutor : public Executor {
 public:
  CpuExecutor() = default;
  virtual ~CpuExecutor() = default;
  int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
          const std::vector<kernel::LiteKernel *> &kernels, mindspore::Allocator *allocator = nullptr,
          const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override;
 };
 }  // namespace mindspore::lite
 #endif
--- a/mindspore/lite/src/mindrt_executor.cc
+++ b/mindspore/lite/src/mindrt_executor.cc
@@ -18,6 +18,7 @@
 #include "src/mindrt_executor.h"
 #include "src/lite_mindrt.h"
 #include "include/errorcode.h"
 #include "src/common/tensor_util.h"
 namespace mindspore::lite {
@@ -62,7 +63,7 @@ int MindrtExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vect
                        const KernelCallBack &before, const KernelCallBack &after) {
  MS_ASSERT(nullptr != allocator);
  if (kernels.front()->Type() != schema::PrimitiveType_Merge) {
    auto ret = this->CheckInputs(in_tensors);
    auto ret = CheckTensorsInvalid(in_tensors);
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "CheckInputs failed";
      return ret;
--- a/mindspore/lite/src/sub_graph_kernel.cc
+++ b/mindspore/lite/src/sub_graph_kernel.cc
@@ -22,6 +22,7 @@
 #endif
 #include "src/common/version_manager.h"
 #include "src/runtime/infer_manager.h"
 #include "src/common/tensor_util.h"
 namespace mindspore::kernel {
 using mindspore::lite::RET_ERROR;
@@ -70,19 +71,6 @@ std::string SubGraphKernel::ToString() const {
  return oss.str();
 }
 int SubGraphKernel::Run() {
  if (this->executor_ == nullptr) {
    MS_LOG(ERROR) << "executor is nullptr";
    return RET_ERROR;
  }
  auto ret = executor_->Run(this->in_tensors_, this->out_tensors_, this->nodes_, this->context_->allocator.get());
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Run sub graph failed: " << ret;
    return ret;
  }
  return RET_OK;
 }
 int SubGraphKernel::Run(const KernelCallBack &before, const KernelCallBack &after) {
  if (this->executor_ == nullptr) {
    MS_LOG(ERROR) << "executor is nullptr";
@@ -165,15 +153,42 @@ int CpuSubGraph::Prepare() {
      tensor->set_allocator(this->context_->allocator.get());
    }
  }
  this->executor_ = new (std::nothrow) mindspore::lite::CpuExecutor;
  if (this->executor_ == nullptr) {
    MS_LOG(ERROR) << "new CpuExecutor failed";
    return RET_ERROR;
  return RET_OK;
 }
 int CpuSubGraph::Run(const KernelCallBack &before, const KernelCallBack &after) {
  MS_ASSERT(nullptr != this->context_->allocator.get());
  if (nodes_.front()->Type() != schema::PrimitiveType_Merge) {
    auto ret = CheckTensorsInvalid(in_tensors_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "CheckInputs failed";
      return ret;
    }
  }
  ret = this->executor_->Prepare(this->nodes_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Prepare CpuExecutor failed";
    return ret;
 #ifdef SUPPORT_TRAIN
  for (auto out_tensor : out_tensors_) {  // increase RefCount of output tensors, such that Run will not free them
    out_tensor->set_ref_count(out_tensor->ref_count() + 1);
  }
 #endif
  for (auto *kernel : nodes_) {
    MS_ASSERT(nullptr != kernel);
    auto ret = kernel->PreProcess();
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel->name();
      return ret;
    }
    ret = kernel->Run(before, after);
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
      return ret;
    }
    ret = kernel->PostProcess();
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name();
      return ret;
    }
  }
  return RET_OK;
 }
--- a/mindspore/lite/src/sub_graph_kernel.h
+++ b/mindspore/lite/src/sub_graph_kernel.h
@@ -94,7 +94,7 @@ class SubGraphKernel : public LiteKernel {
  // called before Run
  int PreProcess() override { return mindspore::lite::RET_OK; }
  int Run() override;
  int Run() override { return Run(nullptr, nullptr); }
  int Run(const KernelCallBack &before, const KernelCallBack &after) override;
  // called after Run
@@ -134,10 +134,8 @@ class CpuSubGraph : public SubGraphKernel {
  int Prepare() override;
  int Init() override { return SubGraphKernel::Init(); }
  int PreProcess() override { return SubGraphKernel::PreProcess(); }
  int Run() override { return SubGraphKernel::Run(); }
  int Run(const KernelCallBack &before, const KernelCallBack &after) override {
    return SubGraphKernel::Run(before, after);
  };
  int Run() override { return Run(nullptr, nullptr); }
  int Run(const KernelCallBack &before, const KernelCallBack &after) override;
  int PostProcess() override { return SubGraphKernel::PostProcess(); }
 };
--- a/mindspore/lite/src/train/train_session.cc
+++ b/mindspore/lite/src/train/train_session.cc
@@ -35,6 +35,7 @@
 #include "src/kernel_registry.h"
 #include "src/runtime/kernel/arm/fp32_grad/convolution.h"
 #include "src/runtime/kernel/arm/fp32/batchnorm_fp32.h"
 #include "src/common/tensor_util.h"
 namespace mindspore {
 namespace lite {
@@ -188,17 +189,33 @@ int TrainSession::RunGraph(const KernelCallBack &before, const KernelCallBack &a
    return lite::RET_NULL_PTR;
  }
  auto run_kernel = (train_mode_) ? train_kernels_ : inference_kernels_;
  lite::CpuExecutor executor;
  auto ret = RET_OK;
  if (before == nullptr && after == nullptr) {
    ret = executor.Run(this->inputs_, this->outputs_, run_kernel, this->context_->allocator.get());
  } else {
    ret = executor.Run(this->inputs_, this->outputs_, run_kernel, this->context_->allocator.get(), before, after);
  }
  auto ret = CheckTensorsInvalid(inputs_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "failed to run model";
    MS_LOG(ERROR) << "CheckInputs failed";
    return ret;
  }
  for (auto out_tensor : outputs_) {  // increase RefCount of output tensors, such that Run will not free them
    out_tensor->set_ref_count(out_tensor->ref_count() + 1);
  }
  for (auto *kernel : run_kernel) {
    MS_ASSERT(nullptr != kernel);
    ret = kernel->PreProcess();
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel->name();
      return ret;
    }
    ret = kernel->Run(before, after);
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
      return ret;
    }
    ret = kernel->PostProcess();
    if (RET_OK != ret) {
      MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name();
      return ret;
    }
  }
  if (train_mode_ && virtual_batch_multiplier_) {
    virtual_batch_idx_++;