| @@ -25,7 +25,6 @@ | |||
| #include "ir/anf.h" | |||
| #include "ir/dtype.h" | |||
| #include "utils/utils.h" | |||
| #include "frontend/operator/ops.h" | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "backend/session/kernel_graph.h" | |||
| @@ -28,7 +28,7 @@ void CopyActor::RunOpData(OpDataPtr<DeviceTensor> input_data, OpContext<DeviceTe | |||
| // When all the inputs are collected, then allocate memory and callback copy. | |||
| if (CheckCopyCondition(context)) { | |||
| FetchDeviceTensor(context); | |||
| AllocateMemory(context); | |||
| SendMemoryAllocReq(context); | |||
| } | |||
| } | |||
| @@ -39,17 +39,17 @@ void CopyActor::RunOpControl(AID *input_control, OpContext<DeviceTensor> *contex | |||
| // When all the inputs are collected, then allocate memory and callback copy. | |||
| if (CheckCopyCondition(context)) { | |||
| FetchDeviceTensor(context); | |||
| AllocateMemory(context); | |||
| SendMemoryAllocReq(context); | |||
| } | |||
| } | |||
| void CopyActor::AllocateMemory(OpContext<DeviceTensor> *context) { | |||
| void CopyActor::SendMemoryAllocReq(OpContext<DeviceTensor> *context) { | |||
| std::vector<DeviceTensor *> alloc_list({output_device_tensor_}); | |||
| Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, alloc_list, output_device_context_, context, | |||
| GetAID()); | |||
| } | |||
| void CopyActor::FreeMemory(OpContext<DeviceTensor> *context) { | |||
| void CopyActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) { | |||
| std::vector<DeviceTensor *> input_free_list({input_device_tensor_}); | |||
| std::vector<DeviceTensor *> output_free_list({output_device_tensor_}); | |||
| Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, input_free_list, input_device_context_, context); | |||
| @@ -67,11 +67,11 @@ void CopyActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) { | |||
| // The input is invalid and needs to be erased when finish copy. | |||
| EraseInput(context); | |||
| // Note that FreeMemory must be in front of SendOutput, because SendOutput will trigger AllocateMemory of the next | |||
| // actor and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor | |||
| // is in front of AllocateMemory of the next actor. One is to reuse the memory more fully, the other is to ensure | |||
| // the execution order and avoid the illegal memory timing problem. | |||
| FreeMemory(context); | |||
| // Note that SendMemoryFreeReq must be in front of SendOutput, because SendOutput will trigger SendMemoryAllocReq of | |||
| // the next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the | |||
| // current actor is in front of SendMemoryAllocReq of the next actor. One is to reuse the memory more fully, the | |||
| // other is to ensure the execution order and avoid the illegal memory timing problem. | |||
| SendMemoryFreeReq(context); | |||
| SendOutput(context); | |||
| } | |||
| @@ -23,7 +23,7 @@ | |||
| #include <utility> | |||
| #include <unordered_map> | |||
| #include "runtime/framework/actor/actor_common.h" | |||
| #include "runtime/framework/actor/memory_interface_actor.h" | |||
| #include "runtime/framework/actor/memory_aware_actor.h" | |||
| #include "runtime/hardware/device_context.h" | |||
| #include "runtime/framework/device_tensor_store.h" | |||
| @@ -32,12 +32,12 @@ namespace runtime { | |||
| using mindspore::device::DeviceContext; | |||
| // The copy actor is used to receive the device tensors and control info to copy data between input device tensor and | |||
| // output device tensor. The processing flow is RunOpData/RunOpControl -> CheckCopyCondition -> AllocateMemory | |||
| // -> OnMemoryAllocFinish -> Copy -> FreeMemory -> SendOutput. | |||
| class CopyActor : public MemoryInterfaceActor { | |||
| // output device tensor. The processing flow is RunOpData/RunOpControl -> CheckCopyCondition -> SendMemoryAllocReq | |||
| // -> OnMemoryAllocFinish -> Copy -> SendMemoryFreeReq -> SendOutput. | |||
| class CopyActor : public MemoryAwareActor { | |||
| public: | |||
| CopyActor(const std::string &name, const AID &memory_manager_aid) | |||
| : MemoryInterfaceActor(name), | |||
| : MemoryAwareActor(name), | |||
| memory_manager_aid_(memory_manager_aid), | |||
| input_datas_num_(0), | |||
| input_controls_num_(0), | |||
| @@ -51,8 +51,8 @@ class CopyActor : public MemoryInterfaceActor { | |||
| void RunOpControl(AID *input_control, OpContext<DeviceTensor> *context) override; | |||
| // The memory related operation interface. | |||
| void AllocateMemory(OpContext<DeviceTensor> *context) override; | |||
| void FreeMemory(OpContext<DeviceTensor> *context) override; | |||
| void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override; | |||
| void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override; | |||
| // The copy processing after memory alloc finished. | |||
| void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override; | |||
| @@ -28,11 +28,11 @@ void DataSourceActor::FetchData(OpContext<DeviceTensor> *context) { | |||
| MS_LOG(INFO) << "Data source actor(" << GetAID().Name() << ") fetches data."; | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| if (buffers_.size() == buffer_capacity_) { | |||
| // Note that FreeMemory must be before SendOutput, because SendOutput will trigger AllocateMemory of the next actor | |||
| // and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor is | |||
| // before AllocateMemory of the next actor. One is to reuse the memory more fully, the other is to ensure the | |||
| // execution order and avoid the illegal memory timing problem. | |||
| FreeMemory(context); | |||
| // Note that SendMemoryFreeReq must be before SendOutput, because SendOutput will trigger SendMemoryAllocReq of the | |||
| // next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the | |||
| // current actor is before SendMemoryAllocReq of the next actor. One is to reuse the memory more fully, the other | |||
| // is to ensure the execution order and avoid the illegal memory timing problem. | |||
| SendMemoryFreeReq(context); | |||
| SendOutput(context); | |||
| buffers_.pop(); | |||
| return; | |||
| @@ -45,7 +45,7 @@ void DataSourceActor::FetchData(OpContext<DeviceTensor> *context) { | |||
| } | |||
| // Allocate memory for device tensors. | |||
| AllocateMemory(context); | |||
| SendMemoryAllocReq(context); | |||
| } | |||
| void DataSourceActor::SendOutput(OpContext<DeviceTensor> *context) { | |||
| @@ -88,12 +88,12 @@ void DeviceQueueDataSourceActor::FillDataBuffer() { | |||
| buffers_.push(device_tensors); | |||
| } | |||
| void DeviceQueueDataSourceActor::AllocateMemory(OpContext<DeviceTensor> *context) { | |||
| void DeviceQueueDataSourceActor::SendMemoryAllocReq(OpContext<DeviceTensor> *context) { | |||
| auto device_tensors = buffers_.back(); | |||
| Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, device_tensors, device_context_, context, GetAID()); | |||
| } | |||
| void DeviceQueueDataSourceActor::FreeMemory(OpContext<DeviceTensor> *context) { | |||
| void DeviceQueueDataSourceActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) { | |||
| auto device_tensors = buffers_.front(); | |||
| Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, device_tensors, device_context_, context); | |||
| } | |||
| @@ -122,11 +122,11 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co | |||
| SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); | |||
| } | |||
| // Note that FreeMemory must be in front of SendOutput, because SendOutput will trigger AllocateMemory of the next | |||
| // actor and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor | |||
| // is in front of AllocateMemory of the next actor. One is to reuse the memory more fully, the other is to ensure | |||
| // the execution order and avoid the illegal memory timing problem. | |||
| FreeMemory(context); | |||
| // Note that SendMemoryFreeReq must be in front of SendOutput, because SendOutput will trigger SendMemoryAllocReq of | |||
| // the next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the | |||
| // current actor is in front of SendMemoryAllocReq of the next actor. One is to reuse the memory more fully, the | |||
| // other is to ensure the execution order and avoid the illegal memory timing problem. | |||
| SendMemoryFreeReq(context); | |||
| SendOutput(context); | |||
| buffers_.pop(); | |||
| } | |||
| @@ -151,7 +151,7 @@ void HostQueueDataSourceActor::FillDataBuffer() { | |||
| buffers_.push(device_tensors); | |||
| } | |||
| void HostQueueDataSourceActor::AllocateMemory(OpContext<DeviceTensor> *context) { | |||
| void HostQueueDataSourceActor::SendMemoryAllocReq(OpContext<DeviceTensor> *context) { | |||
| auto device_tensors = buffers_.back(); | |||
| if (IsSameDeviceType()) { | |||
| Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, device_tensors, device_contexts_[0], context, | |||
| @@ -165,7 +165,7 @@ void HostQueueDataSourceActor::AllocateMemory(OpContext<DeviceTensor> *context) | |||
| } | |||
| } | |||
| void HostQueueDataSourceActor::FreeMemory(OpContext<DeviceTensor> *context) { | |||
| void HostQueueDataSourceActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) { | |||
| auto device_tensors = buffers_.front(); | |||
| if (IsSameDeviceType()) { | |||
| Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, device_tensors, device_contexts_[0], context); | |||
| @@ -205,11 +205,11 @@ void HostQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *cont | |||
| } | |||
| } | |||
| // Note that FreeMemory must be in front of SendOutput, because SendOutput will trigger AllocateMemory of the next | |||
| // actor and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor | |||
| // is in front of AllocateMemory of the next actor. One is to reuse the memory more fully, the other is to ensure | |||
| // the execution order and avoid the illegal memory timing problem. | |||
| FreeMemory(context); | |||
| // Note that SendMemoryFreeReq must be in front of SendOutput, because SendOutput will trigger SendMemoryAllocReq of | |||
| // the next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the | |||
| // current actor is in front of SendMemoryAllocReq of the next actor. One is to reuse the memory more fully, the | |||
| // other is to ensure the execution order and avoid the illegal memory timing problem. | |||
| SendMemoryFreeReq(context); | |||
| SendOutput(context); | |||
| buffers_.pop(); | |||
| } | |||
| @@ -24,7 +24,7 @@ | |||
| #include <queue> | |||
| #include <utility> | |||
| #include "runtime/framework/actor/actor_common.h" | |||
| #include "runtime/framework/actor/memory_interface_actor.h" | |||
| #include "runtime/framework/actor/memory_aware_actor.h" | |||
| #include "runtime/hardware/device_context.h" | |||
| #include "runtime/framework/device_tensor_store.h" | |||
| #include "runtime/framework/host_tensor_queue.h" | |||
| @@ -35,20 +35,20 @@ namespace runtime { | |||
| using mindspore::device::DeviceContext; | |||
| // The data source actor is used to fetch data from data source and process them into device tensors, | |||
| // and then send them to kernel actor. The processing flow is FetchData -> FillDataBuffer -> AllocateMemory | |||
| // -> OnMemoryAllocFinish -> FreeMemory -> SendOutput. | |||
| class DataSourceActor : public MemoryInterfaceActor { | |||
| // and then send them to kernel actor. The processing flow is FetchData -> FillDataBuffer -> SendMemoryAllocReq | |||
| // -> OnMemoryAllocFinish -> SendMemoryFreeReq -> SendOutput. | |||
| class DataSourceActor : public MemoryAwareActor { | |||
| public: | |||
| DataSourceActor(std::string name, size_t buffer_capacity, const AID memory_manager_aid) | |||
| : MemoryInterfaceActor(name), buffer_capacity_(buffer_capacity), memory_manager_aid_(memory_manager_aid) {} | |||
| : MemoryAwareActor(name), buffer_capacity_(buffer_capacity), memory_manager_aid_(memory_manager_aid) {} | |||
| virtual ~DataSourceActor() = default; | |||
| // The process entry of data processing. | |||
| void FetchData(OpContext<DeviceTensor> *context); | |||
| // The memory related operation interface. | |||
| void AllocateMemory(OpContext<DeviceTensor> *context) override{}; | |||
| void FreeMemory(OpContext<DeviceTensor> *context) override{}; | |||
| void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override{}; | |||
| void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override{}; | |||
| // Copy data from data source to the device tensor buffer of actor after memory alloc finished. | |||
| void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override{}; | |||
| @@ -83,8 +83,8 @@ class DeviceQueueDataSourceActor : public DataSourceActor { | |||
| : DataSourceActor(name, buffer_capacity, memory_manager_aid), device_context_(device_context) {} | |||
| ~DeviceQueueDataSourceActor() override = default; | |||
| void AllocateMemory(OpContext<DeviceTensor> *context) override; | |||
| void FreeMemory(OpContext<DeviceTensor> *context) override; | |||
| void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override; | |||
| void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override; | |||
| void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override; | |||
| protected: | |||
| @@ -108,8 +108,8 @@ class HostQueueDataSourceActor : public DataSourceActor { | |||
| : DataSourceActor(name, buffer_capacity, memory_manager_aid), host_queue_(host_queue) {} | |||
| ~HostQueueDataSourceActor() override = default; | |||
| void AllocateMemory(OpContext<DeviceTensor> *context) override; | |||
| void FreeMemory(OpContext<DeviceTensor> *context) override; | |||
| void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override; | |||
| void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override; | |||
| void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override; | |||
| size_t FetchDataNodePosition(const AnfNodePtr &data_node) const; | |||
| @@ -121,6 +121,7 @@ class HostQueueDataSourceActor : public DataSourceActor { | |||
| private: | |||
| friend class GraphScheduler; | |||
| // Judge all the data_nodes_ is from the same device. | |||
| bool IsSameDeviceType() const; | |||
| HostTensorQueuePtr host_queue_; | |||
| @@ -31,7 +31,7 @@ void KernelActor::RunOpData(OpDataPtr<DeviceTensor> input_data, OpContext<Device | |||
| FetchInputDeviceTensor(context); | |||
| FetchOutputDeviceTensor(); | |||
| FetchWorkspaceDeviceTensor(); | |||
| AllocateMemory(context); | |||
| SendMemoryAllocReq(context); | |||
| } | |||
| } | |||
| @@ -44,7 +44,7 @@ void KernelActor::RunOpControl(AID *input_control, OpContext<DeviceTensor> *cont | |||
| FetchInputDeviceTensor(context); | |||
| FetchOutputDeviceTensor(); | |||
| FetchWorkspaceDeviceTensor(); | |||
| AllocateMemory(context); | |||
| SendMemoryAllocReq(context); | |||
| } | |||
| } | |||
| @@ -60,17 +60,17 @@ void KernelActor::RunOpControlWithInputTensor(AID *input_control, OpContext<Devi | |||
| FetchInputDeviceTensor(context); | |||
| FetchOutputDeviceTensor(); | |||
| FetchWorkspaceDeviceTensor(); | |||
| AllocateMemory(context); | |||
| SendMemoryAllocReq(context); | |||
| } | |||
| } | |||
| void KernelActor::AllocateMemory(OpContext<DeviceTensor> *context) { | |||
| void KernelActor::SendMemoryAllocReq(OpContext<DeviceTensor> *context) { | |||
| std::vector<DeviceTensor *> alloc_list(output_device_tensors_); | |||
| alloc_list.insert(alloc_list.end(), workspace_device_tensors_.begin(), workspace_device_tensors_.end()); | |||
| Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, alloc_list, device_context_, context, GetAID()); | |||
| } | |||
| void KernelActor::FreeMemory(OpContext<DeviceTensor> *context) { | |||
| void KernelActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) { | |||
| std::vector<DeviceTensor *> free_list(input_device_tensors_); | |||
| free_list.insert(free_list.end(), output_device_tensors_.begin(), output_device_tensors_.end()); | |||
| free_list.insert(free_list.end(), workspace_device_tensors_.begin(), workspace_device_tensors_.end()); | |||
| @@ -95,11 +95,11 @@ void KernelActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) { | |||
| // The input is invalid and needs to be erased when finish kernel launch. | |||
| EraseInput(context); | |||
| // Note that FreeMemory must be in front of SendOutput, because SendOutput will trigger AllocateMemory of the next | |||
| // actor and the actor is asynchronous execution. So it is necessary to ensure that FreeMemory of the current actor | |||
| // is in front of AllocateMemory of the next actor. One is to reuse the memory more fully, the other is to ensure | |||
| // the execution order and avoid the illegal memory timing problem. | |||
| FreeMemory(context); | |||
| // Note that SendMemoryFreeReq must be in front of SendOutput, because SendOutput will trigger SendMemoryAllocReq of | |||
| // the next actor and the actor is asynchronous execution. So it is necessary to ensure that SendMemoryFreeReq of the | |||
| // current actor is in front of SendMemoryAllocReq of the next actor. One is to reuse the memory more fully, the | |||
| // other is to ensure the execution order and avoid the illegal memory timing problem. | |||
| SendMemoryFreeReq(context); | |||
| SendOutput(context); | |||
| } | |||
| @@ -23,7 +23,7 @@ | |||
| #include <utility> | |||
| #include <unordered_map> | |||
| #include "runtime/framework/actor/actor_common.h" | |||
| #include "runtime/framework/actor/memory_interface_actor.h" | |||
| #include "runtime/framework/actor/memory_aware_actor.h" | |||
| #include "runtime/hardware/device_context.h" | |||
| #include "runtime/framework/device_tensor_store.h" | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| @@ -38,12 +38,12 @@ using mindspore::kernel::AddressPtr; | |||
| using mindspore::tensor::TensorPtr; | |||
| // The kernel actor is used to receive the device tensors and control info to luanch kernel. | |||
| // The processing flow is RunOpData/RunOpControl -> CheckLaunchCondition -> AllocateMemory | |||
| // -> OnMemoryAllocFinish -> LaunchKernel -> FreeMemory -> SendOutput. | |||
| class KernelActor : public MemoryInterfaceActor { | |||
| // The processing flow is RunOpData/RunOpControl -> CheckLaunchCondition -> SendMemoryAllocReq | |||
| // -> OnMemoryAllocFinish -> LaunchKernel -> SendMemoryFreeReq -> SendOutput. | |||
| class KernelActor : public MemoryAwareActor { | |||
| public: | |||
| KernelActor(std::string name, CNodePtr kernel, const DeviceContext *device_context, const AID memory_manager_aid) | |||
| : MemoryInterfaceActor(name), | |||
| : MemoryAwareActor(name), | |||
| kernel_(kernel), | |||
| device_context_(device_context), | |||
| memory_manager_aid_(memory_manager_aid), | |||
| @@ -60,8 +60,8 @@ class KernelActor : public MemoryInterfaceActor { | |||
| const std::vector<TensorPtr> *input_tensors); | |||
| // The memory related operation interface. | |||
| void AllocateMemory(OpContext<DeviceTensor> *context) override; | |||
| void FreeMemory(OpContext<DeviceTensor> *context) override; | |||
| void SendMemoryAllocReq(OpContext<DeviceTensor> *context) override; | |||
| void SendMemoryFreeReq(OpContext<DeviceTensor> *context) override; | |||
| // The real kernel launch processing after memory alloc finished. | |||
| void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) override; | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_ | |||
| #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_ | |||
| #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_AWARE_ACTOR_H_ | |||
| #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_AWARE_ACTOR_H_ | |||
| #include <utility> | |||
| #include <string> | |||
| @@ -25,15 +25,15 @@ | |||
| namespace mindspore { | |||
| namespace runtime { | |||
| // The actor represents a set of common memory related operations of actor. | |||
| class MemoryInterfaceActor : public OpActor<DeviceTensor> { | |||
| class MemoryAwareActor : public OpActor<DeviceTensor> { | |||
| public: | |||
| explicit MemoryInterfaceActor(std::string name) : OpActor(name) {} | |||
| virtual ~MemoryInterfaceActor() = default; | |||
| virtual void AllocateMemory(OpContext<DeviceTensor> *context) = 0; | |||
| virtual void FreeMemory(OpContext<DeviceTensor> *context) = 0; | |||
| explicit MemoryAwareActor(std::string name) : OpActor(name) {} | |||
| virtual ~MemoryAwareActor() = default; | |||
| virtual void SendMemoryAllocReq(OpContext<DeviceTensor> *context) = 0; | |||
| virtual void SendMemoryFreeReq(OpContext<DeviceTensor> *context) = 0; | |||
| virtual void OnMemoryAllocFinish(OpContext<DeviceTensor> *context) = 0; | |||
| }; | |||
| } // namespace runtime | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_ | |||
| #endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_AWARE_ACTOR_H_ | |||
| @@ -41,7 +41,7 @@ void MemoryManagerActor::AllocateMemory(std::vector<DeviceTensor *> alloc_list, | |||
| } | |||
| // Call back to the from actor to process after memory allocation finished. | |||
| Async(from_aid, &MemoryInterfaceActor::OnMemoryAllocFinish, op_context); | |||
| Async(from_aid, &MemoryAwareActor::OnMemoryAllocFinish, op_context); | |||
| } | |||
| void MemoryManagerActor::FreeMemory(std::vector<DeviceTensor *> free_list, const DeviceContext *device_context, | |||
| @@ -235,7 +235,7 @@ void SwitchActor::SendOutput(OpContext<DeviceTensor> *context) { | |||
| } | |||
| } | |||
| void SwitchActor::FreeMemory(OpContext<DeviceTensor> *context) { | |||
| void SwitchActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) { | |||
| Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, input_device_tensors_, device_context_, context); | |||
| } | |||
| @@ -68,7 +68,7 @@ class SwitchActor : public SwitchActorBase<DeviceTensor> { | |||
| // Fetch the args of switch branch. | |||
| void FetchInputDeviceTensor(OpContext<DeviceTensor> *context); | |||
| void SendOutput(OpContext<DeviceTensor> *context); | |||
| void FreeMemory(OpContext<DeviceTensor> *context); | |||
| void SendMemoryFreeReq(OpContext<DeviceTensor> *context); | |||
| // All inputs of the switch actor, excluding weight and tensor. | |||
| // Used to receive input data, the first input is the condition of switch. | |||
| @@ -1579,8 +1579,10 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil | |||
| if (!AnfAlgo::OutputAddrExist(value_node, 0)) { | |||
| continue; | |||
| } | |||
| ofs << "\t\tdevcie tensor key:" << value_node->fullname_with_scope() << "\n"; | |||
| const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(value_node.get()); | |||
| const auto &front_node = FetchFrontNodeByBackendNode(value_node, graph); | |||
| const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(front_node.get()); | |||
| ofs << "\t\tdevcie tensor key:" << front_node->fullname_with_scope() << "\tvalue size:" << device_tensors.size() | |||
| << "\n"; | |||
| for (const auto &device_tensor : device_tensors) { | |||
| ofs << "\t\t\tdevcie tensor value:" << device_tensor << "\tptr:" << device_tensor->GetPtr() | |||
| << "\tsize:" << device_tensor->GetSize() << "\toriginal_ref_count:" << device_tensor->original_ref_count() | |||
| @@ -1594,8 +1596,9 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil | |||
| continue; | |||
| } | |||
| const auto &front_node = FetchFrontNodeByBackendNode(input_node, graph); | |||
| ofs << "\t\tdevcie tensor key:" << front_node->fullname_with_scope() << "\n"; | |||
| const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(front_node.get()); | |||
| ofs << "\t\tdevcie tensor key:" << front_node->fullname_with_scope() << "\tvalue size:" << device_tensors.size() | |||
| << "\n"; | |||
| for (const auto &device_tensor : device_tensors) { | |||
| ofs << "\t\t\tdevcie tensor value:" << device_tensor << "\tptr:" << device_tensor->GetPtr() | |||
| << "\tsize:" << device_tensor->GetSize() << "\toriginal_ref_count:" << device_tensor->original_ref_count() | |||
| @@ -409,7 +409,7 @@ VectorRef MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef & | |||
| input_tensors.emplace_back(input_tensor); | |||
| } | |||
| // Run actor DAG. | |||
| // Run in the pynative mode. | |||
| VectorRef outputs; | |||
| auto ms_context = MsContext::GetInstance(); | |||
| const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode); | |||
| @@ -419,7 +419,7 @@ VectorRef MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef & | |||
| } | |||
| mindspore::ScopedLongRunning long_running; | |||
| // Run actor DAG. | |||
| const auto &actor_set = runtime::GraphScheduler::GetInstance().Fetch(actor_info); | |||
| MS_EXCEPTION_IF_NULL(actor_set); | |||
| runtime::GraphScheduler::GetInstance().PrepareRun(actor_set, graph_compiler_info, input_tensors); | |||