actor runtime code review modify

4 years ago · c02e2a4801
--- a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
+++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
@@ -25,7 +25,6 @@
 #include "ir/anf.h"
 #include "ir/dtype.h"
 #include "utils/utils.h"
 #include "frontend/operator/ops.h"
 #include "backend/kernel_compiler/kernel.h"
 #include "backend/session/kernel_graph.h"

--- a/mindspore/ccsrc/runtime/framework/actor/copy_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/copy_actor.cc
@@ -28,7 +28,7 @@ void CopyActor::RunOpData(OpDataPtr<DeviceTensor> input_data, OpContext<DeviceTe
  // When all the inputs are collected, then allocate memory and callback copy.
  if (CheckCopyCondition(context)) {
    FetchDeviceTensor(context);
    AllocateMemory(context);
    SendMemoryAllocReq(context);
  }
 }

@@ -39,17 +39,17 @@ void CopyActor::RunOpControl(AID *input_control, OpContext<DeviceTensor> *contex
  // When all the inputs are collected, then allocate memory and callback copy.
  if (CheckCopyCondition(context)) {
    FetchDeviceTensor(context);
    AllocateMemory(context);
    SendMemoryAllocReq(context);
  }
 }

 void CopyActor::AllocateMemory(OpContext<DeviceTensor> *context) {
 void CopyActor::SendMemoryAllocReq(OpContext<DeviceTensor> *context) {
  std::vector<DeviceTensor *> alloc_list({output_device_tensor_});
  Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, alloc_list, output_device_context_, context,
        GetAID());
 }

 void CopyActor::FreeMemory(OpContext<DeviceTensor> *context) {
 void CopyActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) {
  std::vector<DeviceTensor *> input_free_list({input_device_tensor_});
  std::vector<DeviceTensor *> output_free_list({output_device_tensor_});
  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, input_free_list, input_device_context_, context);
@@ -67,11 +67,11 @@ void CopyActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) {
  // The input is invalid and needs to be erased when finish copy.
  EraseInput(context);

  // Note that FreeMemory must be in front of SendOutput, because SendOutput will trigger AllocateMemory of the next
  // actor and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor
  // is in front of AllocateMemory of the next actor.  One is to reuse the memory more fully, the other is to ensure
  // the execution order and avoid the illegal memory timing problem.
  FreeMemory(context);
  // Note that SendMemoryFreeReq must be in front of SendOutput, because SendOutput will trigger SendMemoryAllocReq of
  // the next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the
  // current actor is in front of SendMemoryAllocReq of the next actor.  One is to reuse the memory more fully, the
  // other is to ensure the execution order and avoid the illegal memory timing problem.
  SendMemoryFreeReq(context);
  SendOutput(context);
 }

--- a/mindspore/ccsrc/runtime/framework/actor/copy_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/copy_actor.h
@@ -23,7 +23,7 @@
 #include <utility>
 #include <unordered_map>
 #include "runtime/framework/actor/actor_common.h"
 #include "runtime/framework/actor/memory_interface_actor.h"
 #include "runtime/framework/actor/memory_aware_actor.h"
 #include "runtime/hardware/device_context.h"
 #include "runtime/framework/device_tensor_store.h"

@@ -32,12 +32,12 @@ namespace runtime {
 using mindspore::device::DeviceContext;

 // The copy actor is used to receive the device tensors and control info to copy data between input device tensor and
 // output device tensor. The processing flow is RunOpData/RunOpControl -> CheckCopyCondition -> AllocateMemory
 // -> OnMemoryAllocFinish -> Copy -> FreeMemory -> SendOutput.
 class CopyActor : public MemoryInterfaceActor {
 // output device tensor. The processing flow is RunOpData/RunOpControl -> CheckCopyCondition -> SendMemoryAllocReq
 // -> OnMemoryAllocFinish -> Copy -> SendMemoryFreeReq -> SendOutput.
 class CopyActor : public MemoryAwareActor {
 public:
  CopyActor(const std::string &name, const AID &memory_manager_aid)
      : MemoryInterfaceActor(name),
      : MemoryAwareActor(name),
        memory_manager_aid_(memory_manager_aid),
        input_datas_num_(0),
        input_controls_num_(0),
@@ -51,8 +51,8 @@ class CopyActor : public MemoryInterfaceActor {
  void RunOpControl(AID *input_control, OpContext<DeviceTensor> *context) override;

  // The memory related operation interface.
  void AllocateMemory(OpContext<DeviceTensor> *context) override;
  void FreeMemory(OpContext<DeviceTensor> *context) override;
  void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override;
  void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override;
  // The copy processing after memory alloc finished.
  void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override;

--- a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
@@ -28,11 +28,11 @@ void DataSourceActor::FetchData(OpContext<DeviceTensor> *context) {
  MS_LOG(INFO) << "Data source actor(" << GetAID().Name() << ") fetches data.";
  MS_EXCEPTION_IF_NULL(context);
  if (buffers_.size() == buffer_capacity_) {
    //  Note that FreeMemory must be before SendOutput, because SendOutput will trigger AllocateMemory of the next actor
    //  and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor is
    //  before AllocateMemory of the next actor.  One is to reuse the memory more fully, the other is to ensure the
    //  execution order and avoid the illegal memory timing problem.
    FreeMemory(context);
    // Note that SendMemoryFreeReq must be before SendOutput, because SendOutput will trigger SendMemoryAllocReq of the
    // next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the
    // current actor is before SendMemoryAllocReq of the next actor.  One is to reuse the memory more fully, the other
    // is to ensure the execution order and avoid the illegal memory timing problem.
    SendMemoryFreeReq(context);
    SendOutput(context);
    buffers_.pop();
    return;
@@ -45,7 +45,7 @@ void DataSourceActor::FetchData(OpContext<DeviceTensor> *context) {
  }

  // Allocate memory for device tensors.
  AllocateMemory(context);
  SendMemoryAllocReq(context);
 }

 void DataSourceActor::SendOutput(OpContext<DeviceTensor> *context) {
@@ -88,12 +88,12 @@ void DeviceQueueDataSourceActor::FillDataBuffer() {
  buffers_.push(device_tensors);
 }

 void DeviceQueueDataSourceActor::AllocateMemory(OpContext<DeviceTensor> *context) {
 void DeviceQueueDataSourceActor::SendMemoryAllocReq(OpContext<DeviceTensor> *context) {
  auto device_tensors = buffers_.back();
  Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, device_tensors, device_context_, context, GetAID());
 }

 void DeviceQueueDataSourceActor::FreeMemory(OpContext<DeviceTensor> *context) {
 void DeviceQueueDataSourceActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) {
  auto device_tensors = buffers_.front();
  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, device_tensors, device_context_, context);
 }
@@ -122,11 +122,11 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co
    SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
  }

  //  Note that FreeMemory must be in front of SendOutput, because SendOutput will trigger AllocateMemory of the next
  //  actor and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor
  //  is in front of AllocateMemory of the next actor.  One is to reuse the memory more fully, the other is to ensure
  //  the execution order and avoid the illegal memory timing problem.
  FreeMemory(context);
  // Note that SendMemoryFreeReq must be in front of SendOutput, because SendOutput will trigger SendMemoryAllocReq of
  // the next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the
  // current actor is in front of SendMemoryAllocReq of the next actor.  One is to reuse the memory more fully, the
  // other is to ensure the execution order and avoid the illegal memory timing problem.
  SendMemoryFreeReq(context);
  SendOutput(context);
  buffers_.pop();
 }
@@ -151,7 +151,7 @@ void HostQueueDataSourceActor::FillDataBuffer() {
  buffers_.push(device_tensors);
 }

 void HostQueueDataSourceActor::AllocateMemory(OpContext<DeviceTensor> *context) {
 void HostQueueDataSourceActor::SendMemoryAllocReq(OpContext<DeviceTensor> *context) {
  auto device_tensors = buffers_.back();
  if (IsSameDeviceType()) {
    Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, device_tensors, device_contexts_[0], context,
@@ -165,7 +165,7 @@ void HostQueueDataSourceActor::AllocateMemory(OpContext<DeviceTensor> *context)
  }
 }

 void HostQueueDataSourceActor::FreeMemory(OpContext<DeviceTensor> *context) {
 void HostQueueDataSourceActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) {
  auto device_tensors = buffers_.front();
  if (IsSameDeviceType()) {
    Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, device_tensors, device_contexts_[0], context);
@@ -205,11 +205,11 @@ void HostQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *cont
    }
  }

  //  Note that FreeMemory must be in front of SendOutput, because SendOutput will trigger AllocateMemory of the next
  //  actor and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor
  //  is in front of AllocateMemory of the next actor.  One is to reuse the memory more fully, the other is to ensure
  //  the execution order and avoid the illegal memory timing problem.
  FreeMemory(context);
  // Note that SendMemoryFreeReq must be in front of SendOutput, because SendOutput will trigger SendMemoryAllocReq of
  // the next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the
  // current actor is in front of SendMemoryAllocReq of the next actor.  One is to reuse the memory more fully, the
  // other is to ensure the execution order and avoid the illegal memory timing problem.
  SendMemoryFreeReq(context);
  SendOutput(context);
  buffers_.pop();
 }
--- a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h
@@ -24,7 +24,7 @@
 #include <queue>
 #include <utility>
 #include "runtime/framework/actor/actor_common.h"
 #include "runtime/framework/actor/memory_interface_actor.h"
 #include "runtime/framework/actor/memory_aware_actor.h"
 #include "runtime/hardware/device_context.h"
 #include "runtime/framework/device_tensor_store.h"
 #include "runtime/framework/host_tensor_queue.h"
@@ -35,20 +35,20 @@ namespace runtime {
 using mindspore::device::DeviceContext;

 // The data source actor is used to fetch data from data source and process them into device tensors,
 // and then send them to kernel actor. The processing flow is FetchData -> FillDataBuffer -> AllocateMemory
 // -> OnMemoryAllocFinish -> FreeMemory -> SendOutput.
 class DataSourceActor : public MemoryInterfaceActor {
 // and then send them to kernel actor. The processing flow is FetchData -> FillDataBuffer -> SendMemoryAllocReq
 // -> OnMemoryAllocFinish -> SendMemoryFreeReq -> SendOutput.
 class DataSourceActor : public MemoryAwareActor {
 public:
  DataSourceActor(std::string name, size_t buffer_capacity, const AID memory_manager_aid)
      : MemoryInterfaceActor(name), buffer_capacity_(buffer_capacity), memory_manager_aid_(memory_manager_aid) {}
      : MemoryAwareActor(name), buffer_capacity_(buffer_capacity), memory_manager_aid_(memory_manager_aid) {}
  virtual ~DataSourceActor() = default;

  // The process entry of data processing.
  void FetchData(OpContext<DeviceTensor> *context);

  // The memory related operation interface.
  void AllocateMemory(OpContext<DeviceTensor> *context) override{};
  void FreeMemory(OpContext<DeviceTensor> *context) override{};
  void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override{};
  void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override{};
  // Copy data from data source to the device tensor buffer of actor after memory alloc finished.
  void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override{};

@@ -83,8 +83,8 @@ class DeviceQueueDataSourceActor : public DataSourceActor {
      : DataSourceActor(name, buffer_capacity, memory_manager_aid), device_context_(device_context) {}
  ~DeviceQueueDataSourceActor() override = default;

  void AllocateMemory(OpContext<DeviceTensor> *context) override;
  void FreeMemory(OpContext<DeviceTensor> *context) override;
  void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override;
  void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override;
  void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override;

 protected:
@@ -108,8 +108,8 @@ class HostQueueDataSourceActor : public DataSourceActor {
      : DataSourceActor(name, buffer_capacity, memory_manager_aid), host_queue_(host_queue) {}
  ~HostQueueDataSourceActor() override = default;

  void AllocateMemory(OpContext<DeviceTensor> *context) override;
  void FreeMemory(OpContext<DeviceTensor> *context) override;
  void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override;
  void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override;
  void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override;

  size_t FetchDataNodePosition(const AnfNodePtr &data_node) const;
@@ -121,6 +121,7 @@ class HostQueueDataSourceActor : public DataSourceActor {
 private:
  friend class GraphScheduler;

  // Judge all the data_nodes_ is from the same device.
  bool IsSameDeviceType() const;

  HostTensorQueuePtr host_queue_;
--- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
@@ -31,7 +31,7 @@ void KernelActor::RunOpData(OpDataPtr<DeviceTensor> input_data, OpContext<Device
    FetchInputDeviceTensor(context);
    FetchOutputDeviceTensor();
    FetchWorkspaceDeviceTensor();
    AllocateMemory(context);
    SendMemoryAllocReq(context);
  }
 }

@@ -44,7 +44,7 @@ void KernelActor::RunOpControl(AID *input_control, OpContext<DeviceTensor> *cont
    FetchInputDeviceTensor(context);
    FetchOutputDeviceTensor();
    FetchWorkspaceDeviceTensor();
    AllocateMemory(context);
    SendMemoryAllocReq(context);
  }
 }

@@ -60,17 +60,17 @@ void KernelActor::RunOpControlWithInputTensor(AID *input_control, OpContext<Devi
    FetchInputDeviceTensor(context);
    FetchOutputDeviceTensor();
    FetchWorkspaceDeviceTensor();
    AllocateMemory(context);
    SendMemoryAllocReq(context);
  }
 }

 void KernelActor::AllocateMemory(OpContext<DeviceTensor> *context) {
 void KernelActor::SendMemoryAllocReq(OpContext<DeviceTensor> *context) {
  std::vector<DeviceTensor *> alloc_list(output_device_tensors_);
  alloc_list.insert(alloc_list.end(), workspace_device_tensors_.begin(), workspace_device_tensors_.end());
  Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, alloc_list, device_context_, context, GetAID());
 }

 void KernelActor::FreeMemory(OpContext<DeviceTensor> *context) {
 void KernelActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) {
  std::vector<DeviceTensor *> free_list(input_device_tensors_);
  free_list.insert(free_list.end(), output_device_tensors_.begin(), output_device_tensors_.end());
  free_list.insert(free_list.end(), workspace_device_tensors_.begin(), workspace_device_tensors_.end());
@@ -95,11 +95,11 @@ void KernelActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) {
  // The input is invalid and needs to be erased when finish kernel launch.
  EraseInput(context);

  //  Note that FreeMemory must be in front of SendOutput, because SendOutput will trigger AllocateMemory of the next
  //  actor and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor
  //  is in front of AllocateMemory of the next actor.  One is to reuse the memory more fully, the other is to ensure
  //  the execution order and avoid the illegal memory timing problem.
  FreeMemory(context);
  // Note that SendMemoryFreeReq must be in front of SendOutput, because SendOutput will trigger SendMemoryAllocReq of
  // the next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the
  // current actor is in front of SendMemoryAllocReq of the next actor.  One is to reuse the memory more fully, the
  // other is to ensure the execution order and avoid the illegal memory timing problem.
  SendMemoryFreeReq(context);
  SendOutput(context);
 }

--- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h
@@ -23,7 +23,7 @@
 #include <utility>
 #include <unordered_map>
 #include "runtime/framework/actor/actor_common.h"
 #include "runtime/framework/actor/memory_interface_actor.h"
 #include "runtime/framework/actor/memory_aware_actor.h"
 #include "runtime/hardware/device_context.h"
 #include "runtime/framework/device_tensor_store.h"
 #include "backend/kernel_compiler/kernel.h"
@@ -38,12 +38,12 @@ using mindspore::kernel::AddressPtr;
 using mindspore::tensor::TensorPtr;

 // The kernel actor is used to receive the device tensors and control info to luanch kernel.
 // The processing flow is RunOpData/RunOpControl -> CheckLaunchCondition -> AllocateMemory
 // -> OnMemoryAllocFinish -> LaunchKernel -> FreeMemory -> SendOutput.
 class KernelActor : public MemoryInterfaceActor {
 // The processing flow is RunOpData/RunOpControl -> CheckLaunchCondition -> SendMemoryAllocReq
 // -> OnMemoryAllocFinish -> LaunchKernel -> SendMemoryFreeReq -> SendOutput.
 class KernelActor : public MemoryAwareActor {
 public:
  KernelActor(std::string name, CNodePtr kernel, const DeviceContext *device_context, const AID memory_manager_aid)
      : MemoryInterfaceActor(name),
      : MemoryAwareActor(name),
        kernel_(kernel),
        device_context_(device_context),
        memory_manager_aid_(memory_manager_aid),
@@ -60,8 +60,8 @@ class KernelActor : public MemoryInterfaceActor {
                                   const std::vector<TensorPtr> *input_tensors);

  // The memory related operation interface.
  void AllocateMemory(OpContext<DeviceTensor> *context) override;
  void FreeMemory(OpContext<DeviceTensor> *context) override;
  void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override;
  void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override;
  // The real kernel launch processing after memory alloc finished.
  void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override;

--- a/mindspore/ccsrc/runtime/framework/actor/memory_interface_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/memory_interface_actor.h
@@ -14,8 +14,8 @@
 * limitations under the License.
 */

 #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_
 #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_
 #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_AWARE_ACTOR_H_
 #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_AWARE_ACTOR_H_

 #include <utility>
 #include <string>
@@ -25,15 +25,15 @@
 namespace mindspore {
 namespace runtime {
 // The actor represents a set of common memory related operations of actor.
 class MemoryInterfaceActor : public OpActor<DeviceTensor> {
 class MemoryAwareActor : public OpActor<DeviceTensor> {
 public:
  explicit MemoryInterfaceActor(std::string name) : OpActor(name) {}
  virtual ~MemoryInterfaceActor() = default;
  virtual void AllocateMemory(OpContext<DeviceTensor> *context) = 0;
  virtual void FreeMemory(OpContext<DeviceTensor> *context) = 0;
  explicit MemoryAwareActor(std::string name) : OpActor(name) {}
  virtual ~MemoryAwareActor() = default;
  virtual void SendMemoryAllocReq(OpContext<DeviceTensor> *context) = 0;
  virtual void SendMemoryFreeReq(OpContext<DeviceTensor> *context) = 0;
  virtual void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) = 0;
 };
 }  // namespace runtime
 }  // namespace mindspore

 #endif  // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_
 #endif  // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_AWARE_ACTOR_H_
--- a/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.cc
@@ -41,7 +41,7 @@ void MemoryManagerActor::AllocateMemory(std::vector<DeviceTensor *> alloc_list,
  }

  // Call back to the from actor to process after memory allocation finished.
  Async(from_aid, &MemoryInterfaceActor::OnMemoryAllocFinish, op_context);
  Async(from_aid, &MemoryAwareActor::OnMemoryAllocFinish, op_context);
 }

 void MemoryManagerActor::FreeMemory(std::vector<DeviceTensor *> free_list, const DeviceContext *device_context,
--- a/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc
@@ -235,7 +235,7 @@ void SwitchActor::SendOutput(OpContext<DeviceTensor> *context) {
  }
 }

 void SwitchActor::FreeMemory(OpContext<DeviceTensor> *context) {
 void SwitchActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) {
  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, input_device_tensors_, device_context_, context);
 }

--- a/mindspore/ccsrc/runtime/framework/actor/switch_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/switch_actor.h
@@ -68,7 +68,7 @@ class SwitchActor : public SwitchActorBase<DeviceTensor> {
  // Fetch the args of switch branch.
  void FetchInputDeviceTensor(OpContext<DeviceTensor> *context);
  void SendOutput(OpContext<DeviceTensor> *context);
  void FreeMemory(OpContext<DeviceTensor> *context);
  void SendMemoryFreeReq(OpContext<DeviceTensor> *context);

  // All inputs of the switch actor, excluding weight and tensor.
  // Used to receive input data, the first input is the condition of switch.
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@@ -1579,8 +1579,10 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
      if (!AnfAlgo::OutputAddrExist(value_node, 0)) {
        continue;
      }
      ofs << "\t\tdevcie tensor key:" << value_node->fullname_with_scope() << "\n";
      const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(value_node.get());
      const auto &front_node = FetchFrontNodeByBackendNode(value_node, graph);
      const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(front_node.get());
      ofs << "\t\tdevcie tensor key:" << front_node->fullname_with_scope() << "\tvalue size:" << device_tensors.size()
          << "\n";
      for (const auto &device_tensor : device_tensors) {
        ofs << "\t\t\tdevcie tensor value:" << device_tensor << "\tptr:" << device_tensor->GetPtr()
            << "\tsize:" << device_tensor->GetSize() << "\toriginal_ref_count:" << device_tensor->original_ref_count()
@@ -1594,8 +1596,9 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
        continue;
      }
      const auto &front_node = FetchFrontNodeByBackendNode(input_node, graph);
      ofs << "\t\tdevcie tensor key:" << front_node->fullname_with_scope() << "\n";
      const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(front_node.get());
      ofs << "\t\tdevcie tensor key:" << front_node->fullname_with_scope() << "\tvalue size:" << device_tensors.size()
          << "\n";
      for (const auto &device_tensor : device_tensors) {
        ofs << "\t\t\tdevcie tensor value:" << device_tensor << "\tptr:" << device_tensor->GetPtr()
            << "\tsize:" << device_tensor->GetSize() << "\toriginal_ref_count:" << device_tensor->original_ref_count()
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@@ -409,7 +409,7 @@ VectorRef MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &
    input_tensors.emplace_back(input_tensor);
  }

  // Run actor DAG.
  // Run in the pynative mode.
  VectorRef outputs;
  auto ms_context = MsContext::GetInstance();
  const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
@@ -419,7 +419,7 @@ VectorRef MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &
  }

  mindspore::ScopedLongRunning long_running;

  // Run actor DAG.
  const auto &actor_set = runtime::GraphScheduler::GetInstance().Fetch(actor_info);
  MS_EXCEPTION_IF_NULL(actor_set);
  runtime::GraphScheduler::GetInstance().PrepareRun(actor_set, graph_compiler_info, input_tensors);