| @@ -47,6 +47,8 @@ class GPUMemoryManager; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| enum class DeviceAddressStatus { kInDevice, kInHost, kInDeviceToHost, kInHostToDevice }; | |||||
| class DeviceAddress { | class DeviceAddress { | ||||
| public: | public: | ||||
| explicit DeviceAddress(void *ptr, size_t size) : ptr_(ptr), size_(size) {} | explicit DeviceAddress(void *ptr, size_t size) : ptr_(ptr), size_(size) {} | ||||
| @@ -60,6 +62,8 @@ class DeviceAddress { | |||||
| size_t GetSize() const { return size_; } | size_t GetSize() const { return size_; } | ||||
| std::string format() const { return format_; } | std::string format() const { return format_; } | ||||
| TypeId type_id() const { return type_id_; } | TypeId type_id() const { return type_id_; } | ||||
| virtual void set_status(DeviceAddressStatus status) {} | |||||
| virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; } | |||||
| protected: | protected: | ||||
| const void *ptr() const { return ptr_; } | const void *ptr() const { return ptr_; } | ||||
| @@ -15,9 +15,7 @@ | |||||
| */ | */ | ||||
| #include "device/gpu/cuda_driver.h" | #include "device/gpu/cuda_driver.h" | ||||
| #include <iostream> | #include <iostream> | ||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "utils/convert_utils.h" | #include "utils/convert_utils.h" | ||||
| @@ -54,6 +52,27 @@ bool CudaDriver::FreeDeviceMem(const DeviceMemPtr &addr) { | |||||
| return true; | return true; | ||||
| } | } | ||||
| size_t CudaDriver::AllocHostPinnedMem(size_t size, void **addr) { | |||||
| if (size == 0) { | |||||
| MS_LOG(EXCEPTION) << "The memory allocate size is 0"; | |||||
| } | |||||
| auto ret = cudaHostAlloc(addr, size, cudaHostAllocDefault); | |||||
| if (ret != cudaSuccess) { | |||||
| MS_LOG(ERROR) << "cudaHostAlloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); | |||||
| return 0; | |||||
| } | |||||
| return size; | |||||
| } | |||||
| void CudaDriver::FreeHostPinnedMem(void *addr) { | |||||
| if (addr) { | |||||
| auto ret = cudaFreeHost(addr); | |||||
| if (ret != cudaSuccess) { | |||||
| MS_LOG(EXCEPTION) << "cudaFreeHost failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); | |||||
| } | |||||
| } | |||||
| } | |||||
| bool CudaDriver::CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) { | bool CudaDriver::CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) { | ||||
| auto ret = cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice); | auto ret = cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice); | ||||
| if (ret != cudaSuccess) { | if (ret != cudaSuccess) { | ||||
| @@ -72,6 +91,25 @@ bool CudaDriver::CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr & | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool CudaDriver::CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size, DeviceStream stream) { | |||||
| auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, (cudaStream_t)stream); | |||||
| if (ret != cudaSuccess) { | |||||
| MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool CudaDriver::CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size, | |||||
| DeviceStream stream) { | |||||
| auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, (cudaStream_t)stream); | |||||
| if (ret != cudaSuccess) { | |||||
| MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| size_t CudaDriver::total_mem_size() { | size_t CudaDriver::total_mem_size() { | ||||
| size_t free; | size_t free; | ||||
| size_t total; | size_t total; | ||||
| @@ -122,6 +160,55 @@ bool CudaDriver::SyncStream(const DeviceStream &stream) { | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool CudaDriver::CreateEvent(DeviceEvent *event, unsigned int flag) { | |||||
| auto ret = cudaEventCreateWithFlags(reinterpret_cast<cudaEvent_t *>(event), flag); | |||||
| if (ret != cudaSuccess) { | |||||
| MS_LOG(ERROR) << "cudaEventCreateWithFlags failed, ret[" << static_cast<int>(ret) << "], " | |||||
| << cudaGetErrorString(ret); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool CudaDriver::DestroyEvent(const DeviceEvent &event) { | |||||
| auto ret = cudaEventDestroy((cudaEvent_t)event); | |||||
| if (ret != cudaSuccess) { | |||||
| MS_LOG(ERROR) << "cudaEventDestroy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool CudaDriver::RecordEvent(DeviceEvent event, DeviceStream stream) { | |||||
| auto ret = cudaEventRecord((cudaEvent_t)event, (cudaStream_t)stream); | |||||
| if (ret != cudaSuccess) { | |||||
| MS_LOG(ERROR) << "cudaEventRecord failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool CudaDriver::SyncEvent(const DeviceEvent &event) { | |||||
| auto ret = cudaEventSynchronize((cudaEvent_t)event); | |||||
| if (ret != cudaSuccess) { | |||||
| MS_LOG(ERROR) << "cudaEventSynchronize failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool CudaDriver::QueryEvent(const DeviceEvent &event) { | |||||
| auto ret = cudaEventQuery((cudaEvent_t)event); | |||||
| if (ret == cudaSuccess) { | |||||
| return true; | |||||
| } else if (ret == cudaErrorNotReady) { | |||||
| return false; | |||||
| } else { | |||||
| MS_LOG(ERROR) << "cudaEventQuery failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| int CudaDriver::device_count() { | int CudaDriver::device_count() { | ||||
| int dev_count; | int dev_count; | ||||
| auto ret = cudaGetDeviceCount(&dev_count); | auto ret = cudaGetDeviceCount(&dev_count); | ||||
| @@ -33,8 +33,16 @@ class CudaDriver { | |||||
| // such as malloc/free and memory copy from host to device and reverse. | // such as malloc/free and memory copy from host to device and reverse. | ||||
| static size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr); | static size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr); | ||||
| static bool FreeDeviceMem(const DeviceMemPtr &addr); | static bool FreeDeviceMem(const DeviceMemPtr &addr); | ||||
| static size_t AllocHostPinnedMem(size_t size, void **addr); | |||||
| static void FreeHostPinnedMem(void *addr); | |||||
| static bool CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size); | static bool CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size); | ||||
| static bool CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size); | static bool CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size); | ||||
| static bool CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size, DeviceStream stream = 0); | |||||
| static bool CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size, | |||||
| DeviceStream stream = 0); | |||||
| static size_t total_mem_size(); | static size_t total_mem_size(); | ||||
| static size_t free_mem_size(); | static size_t free_mem_size(); | ||||
| @@ -44,6 +52,12 @@ class CudaDriver { | |||||
| static bool DestroyStream(const DeviceStream &stream); | static bool DestroyStream(const DeviceStream &stream); | ||||
| static bool SyncStream(const DeviceStream &stream); | static bool SyncStream(const DeviceStream &stream); | ||||
| static bool CreateEvent(DeviceEvent *event, unsigned int flag = cudaEventDefault); | |||||
| static bool DestroyEvent(const DeviceEvent &event); | |||||
| static bool RecordEvent(DeviceEvent event, DeviceStream stream = 0); | |||||
| static bool SyncEvent(const DeviceEvent &event); | |||||
| static bool QueryEvent(const DeviceEvent &event); | |||||
| // Encapsulate the cuda APIs associated with device management. | // Encapsulate the cuda APIs associated with device management. | ||||
| static int device_count(); | static int device_count(); | ||||
| static bool set_current_device(int index); | static bool set_current_device(int index); | ||||
| @@ -33,6 +33,11 @@ class GPUDeviceAddress : public DeviceAddress { | |||||
| bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const override; | bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const override; | ||||
| bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type, const void *host_ptr) const override; | bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type, const void *host_ptr) const override; | ||||
| void set_status(DeviceAddressStatus status) { status_ = status; } | |||||
| DeviceAddressStatus status() const { return status_; } | |||||
| private: | |||||
| DeviceAddressStatus status_{DeviceAddressStatus::kInDevice}; | |||||
| }; | }; | ||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace device | } // namespace device | ||||
| @@ -0,0 +1,131 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "device/gpu/gpu_memory_copy_manager.h" | |||||
| #include "device/gpu/gpu_common.h" | |||||
| #include "device/gpu/gpu_device_manager.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| namespace mindspore { | |||||
| namespace device { | |||||
| namespace gpu { | |||||
| void GPUMemCopyManager::Init() { | |||||
| CHECK_OP_RET_WITH_EXCEPT(GPUDeviceManager::GetInstance().CreateStream(&swap_out_stream_), | |||||
| "Failed to create CUDA stream of memory swap out."); | |||||
| CHECK_OP_RET_WITH_EXCEPT(GPUDeviceManager::GetInstance().CreateStream(&swap_in_stream_), | |||||
| "Failed to create CUDA stream of memory swap in."); | |||||
| } | |||||
| void GPUMemCopyManager::AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) { | |||||
| MS_EXCEPTION_IF_NULL(device_address); | |||||
| MS_EXCEPTION_IF_NULL(host_addr.addr); | |||||
| DeviceEvent event = nullptr; | |||||
| CHECK_OP_RET_WITH_EXCEPT(CudaDriver::CreateEvent(&event, cudaEventDisableTiming), "Failed to create CUDA event."); | |||||
| DeviceMemPtr device_ptr = const_cast<DeviceMemPtr>(device_address->GetPtr()); | |||||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||||
| device_address->set_status(DeviceAddressStatus::kInDeviceToHost); | |||||
| CHECK_OP_RET_WITH_EXCEPT( | |||||
| CudaDriver::CopyDeviceMemToHostAsync(host_addr.addr, device_ptr, host_addr.size, swap_out_stream_), | |||||
| "Failed to copy device memory to host."); | |||||
| CHECK_OP_RET_WITH_EXCEPT(CudaDriver::RecordEvent(event, swap_out_stream_), | |||||
| "Failed to record CUDA event to swap out stream."); | |||||
| swap_out_queue_.emplace(device_address, event); | |||||
| } | |||||
| void GPUMemCopyManager::AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) { | |||||
| MS_EXCEPTION_IF_NULL(device_address); | |||||
| MS_EXCEPTION_IF_NULL(host_addr.addr); | |||||
| DeviceEvent event = nullptr; | |||||
| CHECK_OP_RET_WITH_EXCEPT(CudaDriver::CreateEvent(&event, cudaEventDisableTiming), "Failed to create CUDA event."); | |||||
| DeviceMemPtr device_ptr = const_cast<DeviceMemPtr>(device_address->GetPtr()); | |||||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||||
| device_address->set_status(DeviceAddressStatus::kInHostToDevice); | |||||
| CHECK_OP_RET_WITH_EXCEPT( | |||||
| CudaDriver::CopyHostMemToDeviceAsync(device_ptr, host_addr.addr, host_addr.size, swap_in_stream_), | |||||
| "Failed to copy host memory to device."); | |||||
| CHECK_OP_RET_WITH_EXCEPT(CudaDriver::RecordEvent(event, swap_in_stream_), | |||||
| "Failed to record CUDA event to swap in stream."); | |||||
| swap_in_queue_.emplace(device_address, event); | |||||
| } | |||||
| bool GPUMemCopyManager::SyncMemCopyStream(SwapKind swap_kind) { | |||||
| if (swap_kind == SwapKind::kDeviceToHost) { | |||||
| return GPUDeviceManager::GetInstance().SyncStream(swap_out_stream_); | |||||
| } else { | |||||
| return GPUDeviceManager::GetInstance().SyncStream(swap_in_stream_); | |||||
| } | |||||
| } | |||||
| DeviceAddressPtr GPUMemCopyManager::UpdateSwapOutQueue() { | |||||
| if (swap_out_queue_.empty()) { | |||||
| return nullptr; | |||||
| } | |||||
| auto &task = swap_out_queue_.front(); | |||||
| auto device_address = task.first; | |||||
| auto &event = task.second; | |||||
| bool finish_swap = CudaDriver::QueryEvent(event); | |||||
| if (!finish_swap) { | |||||
| return nullptr; | |||||
| } | |||||
| CHECK_OP_RET_WITH_EXCEPT(CudaDriver::DestroyEvent(event), "Failed to destroy CUDA event of swap out."); | |||||
| swap_out_queue_.pop(); | |||||
| return device_address; | |||||
| } | |||||
| DeviceAddressPtr GPUMemCopyManager::UpdateSwapInQueue() { | |||||
| if (swap_in_queue_.empty()) { | |||||
| return nullptr; | |||||
| } | |||||
| auto &task = swap_in_queue_.front(); | |||||
| auto device_address = task.first; | |||||
| auto &event = task.second; | |||||
| bool finish_swap = CudaDriver::QueryEvent(event); | |||||
| if (!finish_swap) { | |||||
| return nullptr; | |||||
| } | |||||
| CHECK_OP_RET_WITH_EXCEPT(CudaDriver::DestroyEvent(event), "Failed to destroy CUDA event of swap in."); | |||||
| swap_in_queue_.pop(); | |||||
| return device_address; | |||||
| } | |||||
| bool GPUMemCopyManager::AllocHostPinnedMem(size_t size, void **addr) { | |||||
| auto alloc_size = CudaDriver::AllocHostPinnedMem(size, addr); | |||||
| return alloc_size == size; | |||||
| } | |||||
| void GPUMemCopyManager::FreeHostPinnedMem(void *addr) { CudaDriver::FreeHostPinnedMem(addr); } | |||||
| void GPUMemCopyManager::ClearSwapQueue() { | |||||
| CHECK_OP_RET_WITH_EXCEPT(SyncMemCopyStream(SwapKind::kDeviceToHost), "Failed to sync swap out stream"); | |||||
| CHECK_OP_RET_WITH_EXCEPT(SyncMemCopyStream(SwapKind::kHostToDevice), "Failed to sync swap in stream"); | |||||
| while (!swap_out_queue_.empty()) { | |||||
| auto &event = swap_out_queue_.front().second; | |||||
| CHECK_OP_RET_WITH_EXCEPT(CudaDriver::DestroyEvent(event), "Failed to destroy CUDA event of swap out."); | |||||
| swap_out_queue_.pop(); | |||||
| } | |||||
| while (!swap_in_queue_.empty()) { | |||||
| auto &event = swap_in_queue_.front().second; | |||||
| CHECK_OP_RET_WITH_EXCEPT(CudaDriver::DestroyEvent(event), "Failed to destroy CUDA event of swap in."); | |||||
| swap_in_queue_.pop(); | |||||
| } | |||||
| } | |||||
| } // namespace gpu | |||||
| } // namespace device | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,69 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_COPY_MANAGER_H_ | |||||
| #define MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_COPY_MANAGER_H_ | |||||
| #include <memory> | |||||
| #include <queue> | |||||
| #include <utility> | |||||
| #include "pre_activate/mem_reuse/mem_copy_manager.h" | |||||
| #include "device/device_address.h" | |||||
| #include "device/gpu/cuda_driver.h" | |||||
| #include "kernel/kernel.h" | |||||
| namespace mindspore { | |||||
| namespace device { | |||||
| namespace gpu { | |||||
| using mindspore::device::memswap::MemCopyManager; | |||||
| using mindspore::device::memswap::SwapKind; | |||||
| class GPUMemCopyManager : public MemCopyManager { | |||||
| public: | |||||
| GPUMemCopyManager() = default; | |||||
| ~GPUMemCopyManager() override = default; | |||||
| void Init() override; | |||||
| void AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) override; | |||||
| void AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) override; | |||||
| bool SyncMemCopyStream(SwapKind swap_kind) override; | |||||
| DeviceAddressPtr UpdateSwapOutQueue() override; | |||||
| DeviceAddressPtr UpdateSwapInQueue() override; | |||||
| bool AllocHostPinnedMem(size_t size, void **addr) override; | |||||
| void FreeHostPinnedMem(void *addr) override; | |||||
| void ClearSwapQueue() override; | |||||
| private: | |||||
| DeviceStream swap_out_stream_{nullptr}; | |||||
| DeviceStream swap_in_stream_{nullptr}; | |||||
| std::queue<std::pair<DeviceAddressPtr, DeviceEvent>> swap_out_queue_; | |||||
| std::queue<std::pair<DeviceAddressPtr, DeviceEvent>> swap_in_queue_; | |||||
| }; | |||||
| using GPUMemCopyManagerPtr = std::shared_ptr<GPUMemCopyManager>; | |||||
| } // namespace gpu | |||||
| } // namespace device | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_COPY_MANAGER_H_ | |||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_COPY_MANAGER_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_COPY_MANAGER_H_ | |||||
| #include <vector> | #include <vector> | ||||
| #include <map> | #include <map> | ||||
| @@ -26,10 +26,6 @@ | |||||
| #include "device/gpu/cuda_driver.h" | #include "device/gpu/cuda_driver.h" | ||||
| #include "kernel/kernel.h" | #include "kernel/kernel.h" | ||||
| using mindspore::device::gpu::DeviceEvent; | |||||
| using mindspore::device::gpu::DeviceMemPtr; | |||||
| using mindspore::device::gpu::DeviceStream; | |||||
| using mindspore::device::gpu::HostMemPtr; | |||||
| using HostAddress = mindspore::kernel::Address; | using HostAddress = mindspore::kernel::Address; | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| @@ -74,31 +70,29 @@ class MemCopyManager { | |||||
| public: | public: | ||||
| MemCopyManager() = default; | MemCopyManager() = default; | ||||
| ~MemCopyManager() = default; | |||||
| virtual ~MemCopyManager() = default; | |||||
| void Init(); | |||||
| virtual void Init() {} | |||||
| void AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr); | |||||
| virtual void AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) {} | |||||
| void AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr); | |||||
| virtual void AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) {} | |||||
| bool SyncMemCopyStream(SwapKind swap_kind); | |||||
| virtual bool SyncMemCopyStream(SwapKind swap_kind) { return true; } | |||||
| DeviceAddressPtr UpdateSwapOutQueue(); | |||||
| virtual DeviceAddressPtr UpdateSwapOutQueue() { return nullptr; } | |||||
| DeviceAddressPtr UpdateSwapInQueue(); | |||||
| virtual DeviceAddressPtr UpdateSwapInQueue() { return nullptr; } | |||||
| void ClearSwapQueue(); | |||||
| virtual bool AllocHostPinnedMem(size_t size, void **addr) { return true; } | |||||
| private: | |||||
| DeviceStream swap_out_stream_{nullptr}; | |||||
| DeviceStream swap_in_stream_{nullptr}; | |||||
| std::queue<std::pair<DeviceAddressPtr, DeviceEvent>> swap_out_queue_; | |||||
| std::queue<std::pair<DeviceAddressPtr, DeviceEvent>> swap_in_queue_; | |||||
| virtual void FreeHostPinnedMem(void *addr) {} | |||||
| virtual void ClearSwapQueue() {} | |||||
| }; | }; | ||||
| using MemCopyManagerPtr = std::shared_ptr<MemCopyManager>; | using MemCopyManagerPtr = std::shared_ptr<MemCopyManager>; | ||||
| } // namespace memswap | } // namespace memswap | ||||
| } // namespace device | } // namespace device | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_ | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_COPY_MANAGER_H_ | |||||
| @@ -17,7 +17,9 @@ | |||||
| #define TESTS_UT_STUB_RUNTIME_INCLUDE_CUDA_RUNTIME_API_H_ | #define TESTS_UT_STUB_RUNTIME_INCLUDE_CUDA_RUNTIME_API_H_ | ||||
| #include <cstddef> | #include <cstddef> | ||||
| typedef enum { cudaSuccess = 0 } cudaError_t; | |||||
| typedef enum { cudaSuccess = 0, cudaErrorNotReady = 1 } cudaError_t; | |||||
| unsigned int cudaEventDefault = 0; | |||||
| enum cudaMemcpyKind { | enum cudaMemcpyKind { | ||||
| cudaMemcpyHostToHost = 0, | cudaMemcpyHostToHost = 0, | ||||