Merge pull request !24714 from jjfeing/br_replace_rtmemcpyxx_with_acl_apitags/v1.6.0
| @@ -16,6 +16,7 @@ | |||
| #include "backend/kernel_compiler/rts/assign.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| using mindspore::ge::model_runner::MemcpyAsyncTaskInfo; | |||
| using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>; | |||
| @@ -26,7 +27,7 @@ AssignKernel::AssignKernel() {} | |||
| AssignKernel::~AssignKernel() {} | |||
| bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/, | |||
| bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /* workspace */, | |||
| const std::vector<AddressPtr> & /*outputs*/, void *stream_ptr) { | |||
| if (inputs.size() != 2) { | |||
| MS_LOG(ERROR) << "inputs size is not two"; | |||
| @@ -39,10 +40,10 @@ bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vect | |||
| MS_LOG(INFO) << "first addr is same with second addr , no need assign"; | |||
| return true; | |||
| } | |||
| rtError_t status = rtMemcpyAsync(inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, | |||
| RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); | |||
| rtError_t status = aclrtMemcpyAsync(inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, | |||
| ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); | |||
| if (status != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "Assign op rtMemcpyAsync failed!"; | |||
| MS_LOG(ERROR) << "Assign op aclrtMemcpyAsync failed!"; | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -59,7 +60,7 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in | |||
| MS_EXCEPTION_IF_NULL(inputs[1]); | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = | |||
| std::make_shared<MemcpyAsyncTaskInfo>(unique_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, | |||
| inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false); | |||
| inputs[1]->size, ACL_MEMCPY_DEVICE_TO_DEVICE, false); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include <string> | |||
| #include "abstract/utils.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "common/trans.h" | |||
| #include "utils/ms_context.h" | |||
| @@ -54,16 +55,17 @@ bool MemCpyAsyncKernel::Launch(const std::vector<AddressPtr> &inputs, const std: | |||
| return true; | |||
| } | |||
| if (outputs[0]->size < inputs[0]->size) { | |||
| MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax " << outputs[0]->size << " is less than src size " << inputs[0]->size; | |||
| MS_LOG(EXCEPTION) << "aclrtMemcpyAsync destMax " << outputs[0]->size << " is less than src size " | |||
| << inputs[0]->size; | |||
| } | |||
| // input x -> memcpy_async -> AllReduce | |||
| if (outputs[0]->size > inputs[0]->size) { | |||
| MS_LOG(WARNING) << "rtMemcpyAsync destMax > src size"; | |||
| MS_LOG(WARNING) << "aclrtMemcpyAsync destMax > src size"; | |||
| } | |||
| rtError_t status = rtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, | |||
| RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); | |||
| rtError_t status = aclrtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, | |||
| ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); | |||
| if (status != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "MemCpyAsync op rtMemcpyAsync failed!"; | |||
| MS_LOG(ERROR) << "MemCpyAsync op aclrtMemcpyAsync failed!"; | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -117,17 +119,17 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr | |||
| MS_EXCEPTION_IF_NULL(outputs[0]); | |||
| MS_EXCEPTION_IF_NULL(inputs[0]); | |||
| if (outputs[0]->size < inputs[0]->size) { | |||
| MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax < src size"; | |||
| MS_LOG(EXCEPTION) << "aclrtMemcpyAsync destMax < src size"; | |||
| } | |||
| // input x -> memcpy_async -> AllReduce | |||
| if (outputs[0]->size > inputs[0]->size) { | |||
| MS_LOG(WARNING) << "rtMemcpyAsync destMax > src size"; | |||
| MS_LOG(WARNING) << "aclrtMemcpyAsync destMax > src size"; | |||
| } | |||
| stream_id_ = stream_id; | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = | |||
| std::make_shared<MemcpyAsyncTaskInfo>(unique_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, | |||
| inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); | |||
| inputs[0]->size, ACL_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -149,12 +151,12 @@ device::DynamicKernelPtr MemCpyAsyncKernel::GenDynamicKernel(const CNodePtr &cno | |||
| MS_EXCEPTION_IF_NULL(kernel_outputs[0]); | |||
| MS_EXCEPTION_IF_NULL(kernel_inputs[0]); | |||
| if (kernel_outputs[0]->size < kernel_inputs[0]->size) { | |||
| MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax " << kernel_outputs[0]->size << " is less than src size " | |||
| MS_LOG(EXCEPTION) << "aclrtMemcpyAsync destMax " << kernel_outputs[0]->size << " is less than src size " | |||
| << kernel_inputs[0]->size; | |||
| } | |||
| // input x -> memcpy_async -> AllReduce | |||
| if (kernel_outputs[0]->size > kernel_inputs[0]->size) { | |||
| MS_LOG(WARNING) << "Check rtMemcpyAsync destMax > src size"; | |||
| MS_LOG(WARNING) << "Check aclrtMemcpyAsync destMax > src size"; | |||
| } | |||
| return std::make_shared<MemcpyRtsDynamicKernel>(stream_ptr, cnode_ptr, kernel_outputs[0]->addr, | |||
| @@ -24,6 +24,7 @@ | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| #include "common/trans.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/device/kernel_runtime.h" | |||
| #include "utils/ms_context.h" | |||
| @@ -53,16 +54,16 @@ bool TensorCopySlices::Launch(const std::vector<AddressPtr> &inputs, const std:: | |||
| return false; | |||
| } | |||
| auto status = rtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, | |||
| RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); | |||
| auto status = aclrtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, | |||
| ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); | |||
| if (status != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "MemCpyAsync op rtMemcpyAsync failed!"; | |||
| MS_LOG(ERROR) << "MemCpyAsync op aclrtMemcpyAsync failed!"; | |||
| return false; | |||
| } | |||
| status = rtMemcpyAsync(VoidPointerOffset(outputs[0]->addr, offset_), copy_size_, inputs[1]->addr, copy_size_, | |||
| RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); | |||
| status = aclrtMemcpyAsync(VoidPointerOffset(outputs[0]->addr, offset_), copy_size_, inputs[1]->addr, copy_size_, | |||
| ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); | |||
| if (status != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "MemCpyAsync op rtMemcpyAsync failed!"; | |||
| MS_LOG(ERROR) << "MemCpyAsync op aclrtMemcpyAsync failed!"; | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -151,10 +152,10 @@ std::vector<TaskInfoPtr> TensorCopySlices::GenTask(const std::vector<AddressPtr> | |||
| stream_id_ = stream_id; | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr1 = | |||
| std::make_shared<MemcpyAsyncTaskInfo>(unique_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, | |||
| inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); | |||
| inputs[0]->size, ACL_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr2 = std::make_shared<MemcpyAsyncTaskInfo>( | |||
| unique_name_, stream_id, VoidPointerOffset(outputs[0]->addr, offset_), copy_size_, inputs[1]->addr, copy_size_, | |||
| RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); | |||
| ACL_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); | |||
| return {task_info_ptr1, task_info_ptr2}; | |||
| } | |||
| @@ -28,6 +28,7 @@ | |||
| #include "proto/attr.pb.h" | |||
| #include "proto/node_def.pb.h" | |||
| #include "runtime/rt.h" | |||
| #include "acl/acl_rt.h" | |||
| using mindspore::kernel::Address; | |||
| using AddressPtr = std::shared_ptr<Address>; | |||
| @@ -198,9 +199,9 @@ bool AscendPsCache::SynchronizeStream() { | |||
| bool AscendPsCache::CopyHostMemToDevice(void *dst, const void *src, size_t size) { | |||
| MS_ERROR_IF_NULL(dst); | |||
| MS_ERROR_IF_NULL(src); | |||
| auto ret = rtMemcpyAsync(dst, size, src, size, RT_MEMCPY_HOST_TO_DEVICE, stream_); | |||
| auto ret = aclrtMemcpyAsync(dst, size, src, size, ACL_MEMCPY_HOST_TO_DEVICE, stream_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "rtMemcpyAsync failed, the error num is:" << ret; | |||
| MS_LOG(ERROR) << "aclrtMemcpyAsync failed, the error num is:" << ret; | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -209,9 +210,9 @@ bool AscendPsCache::CopyHostMemToDevice(void *dst, const void *src, size_t size) | |||
| bool AscendPsCache::CopyDeviceMemToHost(void *dst, const void *src, size_t size) { | |||
| MS_ERROR_IF_NULL(dst); | |||
| MS_ERROR_IF_NULL(src); | |||
| auto ret = rtMemcpyAsync(dst, size, src, size, RT_MEMCPY_DEVICE_TO_HOST, stream_); | |||
| auto ret = aclrtMemcpyAsync(dst, size, src, size, ACL_MEMCPY_DEVICE_TO_HOST, stream_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "rtMemcpyAsync failed, the error num is:" << ret; | |||
| MS_LOG(ERROR) << "aclrtMemcpyAsync failed, the error num is:" << ret; | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -109,13 +109,14 @@ void AscendBucket::CopyTensorToContiguousMemory() { | |||
| MS_LOG(DEBUG) << "MemcpyAsync dst size:" << memcpy_output_addrs_[i]->size | |||
| << " src size:" << memcpy_input_addrs_[i]->size; | |||
| if (memcpy_output_addrs_[i]->size < memcpy_input_addrs_[i]->size) { | |||
| MS_LOG(EXCEPTION) << "rtMemcpyAsync dst size < src size"; | |||
| MS_LOG(EXCEPTION) << "aclrtMemcpyAsync dst size < src size"; | |||
| } | |||
| auto ret = rtMemcpyAsync(memcpy_output_addrs_[i]->addr, memcpy_output_addrs_[i]->size, memcpy_input_addrs_[i]->addr, | |||
| memcpy_input_addrs_[i]->size, RT_MEMCPY_DEVICE_TO_DEVICE, compute_stream_); | |||
| auto ret = | |||
| aclrtMemcpyAsync(memcpy_output_addrs_[i]->addr, memcpy_output_addrs_[i]->size, memcpy_input_addrs_[i]->addr, | |||
| memcpy_input_addrs_[i]->size, ACL_MEMCPY_DEVICE_TO_DEVICE, compute_stream_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rtMemcpyAsync failed, error code:" << ret; | |||
| MS_LOG(EXCEPTION) << "Call aclrtMemcpyAsync failed, error code:" << ret; | |||
| } | |||
| } | |||
| } | |||
| @@ -21,6 +21,7 @@ | |||
| #include <set> | |||
| #include <algorithm> | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/device/kernel_runtime_manager.h" | |||
| #include "runtime/device/kernel_runtime.h" | |||
| #include "runtime/device/memory_manager.h" | |||
| @@ -74,7 +75,7 @@ const std::set<std::string> kOpNeedTransFormat = { | |||
| kOpFormat_NHWC, kOpFormat_HWCN, kOpFormat_NC1HWC0, kOpFormat_FRAC_Z, kOpFormat_C1HWNCoC0, | |||
| kOpFormat_FRAC_NZ, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D}; | |||
| void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind) { | |||
| void SyncMemory(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind) { | |||
| if (size == 0) { | |||
| return; | |||
| } | |||
| @@ -86,11 +87,11 @@ void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind) | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| runtime_instance->SetContext(); | |||
| // Only apply asynchronous copy in Pynative && RT_MEMCPY_HOST_TO_DEVICE mode | |||
| if (execution_mode != kPynativeMode || kind != RT_MEMCPY_HOST_TO_DEVICE) { | |||
| auto ret_rt_memcpy = rtMemcpy(dst, size, src, size, kind); | |||
| // Only apply asynchronous copy in Pynative && ACL_MEMCPY_HOST_TO_DEVICE mode | |||
| if (execution_mode != kPynativeMode || kind != ACL_MEMCPY_HOST_TO_DEVICE) { | |||
| auto ret_rt_memcpy = aclrtMemcpy(dst, size, src, size, kind); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "rtMemcpy failed"; | |||
| MS_EXCEPTION(DeviceProcessError) << "aclrtMemcpy failed"; | |||
| } | |||
| } else { | |||
| auto ret = runtime_instance->MemcpyAsync(dst, src, size, static_cast<int32_t>(RT_MEMCPY_HOST_TO_DEVICE_EX)); | |||
| @@ -126,7 +127,7 @@ bool FloatToHalfAndSyncHostToDevice(void *dst, size_t dst_size, const void *src, | |||
| } | |||
| std::vector<float16> half_data(elem_num); | |||
| FloatToHalf(half_data.data(), src, elem_num); | |||
| SyncMemory(dst, half_data.data(), dst_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| SyncMemory(dst, half_data.data(), dst_size, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| return true; | |||
| } | |||
| @@ -137,7 +138,7 @@ bool Float64ToFloatAndSyncHostToDevice(void *dst, size_t dst_size, const void *s | |||
| size_t elem_num = dst_size / sizeof(float); | |||
| auto host_tmp = std::vector<float>(elem_num); | |||
| DoubleToFloat(host_tmp.data(), src, elem_num); | |||
| SyncMemory(dst, host_tmp.data(), dst_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| SyncMemory(dst, host_tmp.data(), dst_size, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| return true; | |||
| } | |||
| @@ -148,7 +149,7 @@ bool SyncDeviceToHostAndHalfToFloat(void *dst, size_t dst_size, const void *src, | |||
| << dst_size << "]"; | |||
| } | |||
| std::vector<float16> half_data(elem_num); | |||
| SyncMemory(half_data.data(), src, src_size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| SyncMemory(half_data.data(), src, src_size, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| HalfToFloat(dst, half_data.data(), elem_num); | |||
| return true; | |||
| } | |||
| @@ -159,7 +160,7 @@ bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *s | |||
| } | |||
| size_t elem_num = src_size / sizeof(float); | |||
| auto host_tmp = std::vector<float>(elem_num); | |||
| SyncMemory(host_tmp.data(), src, src_size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| SyncMemory(host_tmp.data(), src, src_size, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| FloatToDouble(dst, host_tmp.data(), elem_num); | |||
| return true; | |||
| } | |||
| @@ -208,14 +209,14 @@ bool AscendDeviceAddress::SyncDeviceToHost(size_t size, void *const host_ptr) co | |||
| MS_EXCEPTION_IF_NULL(host_ptr); | |||
| BindDevice(); | |||
| SyncStream(); | |||
| SyncMemory(host_ptr, ptr_, size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| SyncMemory(host_ptr, ptr_, size, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| return true; | |||
| } | |||
| bool AscendDeviceAddress::SyncHostToDevice(size_t size, const void *host_ptr) const { | |||
| MS_EXCEPTION_IF_NULL(host_ptr); | |||
| BindDevice(); | |||
| SyncMemory(ptr_, host_ptr, size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| SyncMemory(ptr_, host_ptr, size, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| return true; | |||
| } | |||
| @@ -236,14 +237,14 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size | |||
| } | |||
| if (format_ == kOpFormat_NCHW || format_ == kOpFormat_DEFAULT || format_ == kOpFormat_NCDHW) { | |||
| if (type_id_ == type) { | |||
| SyncMemory(host_ptr, ptr_, size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| SyncMemory(host_ptr, ptr_, size, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| sync_ok = true; | |||
| } else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) { | |||
| sync_ok = SyncDeviceToHostAndFloatToFloat64(host_ptr, size, ptr_, size_); | |||
| } else { | |||
| auto shape_size = abstract::ShapeSize(host_shape); | |||
| auto host = std::vector<uint8_t>(size_); | |||
| SyncMemory(host.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
| SyncMemory(host.data(), ptr_, size_, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size_}; | |||
| sync_ok = trans::TransDataType(type_args, host_ptr); | |||
| if (!sync_ok) { | |||
| @@ -317,10 +318,10 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const | |||
| return false; | |||
| } | |||
| if (type_id_ == type) { | |||
| SyncMemory(host_ptr, output_addr_vec[0], size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| SyncMemory(host_ptr, output_addr_vec[0], size, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| } else { | |||
| auto host = std::vector<uint8_t>(size); | |||
| SyncMemory(host.data(), output_addr_vec[0], size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| SyncMemory(host.data(), output_addr_vec[0], size, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| auto shape_size = abstract::ShapeSize(host_shape); | |||
| const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size}; | |||
| sync_ok = trans::TransDataType(type_args, host_ptr); | |||
| @@ -356,7 +357,7 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const ShapeVector &sh | |||
| } | |||
| } | |||
| auto host_tmp = std::vector<uint8_t>(size_); | |||
| SyncMemory(host_tmp.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
| SyncMemory(host_tmp.data(), ptr_, size_, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| auto node_index = GetNodeIndex(); | |||
| if (type_id_ != type) { | |||
| const trans::FormatArgs format_args{host_tmp.data(), size_, kOpFormat_NCHW, format_, | |||
| @@ -403,7 +404,7 @@ bool AscendDeviceAddress::SyncHostToDevice(const ShapeVector &shape, size_t size | |||
| } | |||
| if (format_ == kOpFormat_NCHW || format_ == kOpFormat_DEFAULT || format_ == kOpFormat_NCDHW || format_ == format) { | |||
| if (type_id_ == type) { | |||
| SyncMemory(ptr_, host_ptr, size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| SyncMemory(ptr_, host_ptr, size, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| sync_ok = true; | |||
| } else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) { | |||
| sync_ok = Float64ToFloatAndSyncHostToDevice(ptr_, size_, host_ptr, size); | |||
| @@ -416,7 +417,7 @@ bool AscendDeviceAddress::SyncHostToDevice(const ShapeVector &shape, size_t size | |||
| MS_LOG(ERROR) << "Trans data type failed."; | |||
| return false; | |||
| } | |||
| SyncMemory(ptr_, host_tmp.data(), size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| SyncMemory(ptr_, host_tmp.data(), size_, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| } | |||
| } else { | |||
| auto iter = kOpNeedTransFormat.find(format_); | |||
| @@ -449,7 +450,7 @@ bool AscendDeviceAddress::SyncDeviceToDevice(const ShapeVector &shape, size_t si | |||
| MS_LOG(ERROR) << "src size is greater than det size, src size is: " << size << ", dst size is: " << size_; | |||
| return false; | |||
| } | |||
| auto ret_rt_memcpy = rtMemcpy(ptr_, size, src_ptr, size, RT_MEMCPY_DEVICE_TO_DEVICE); | |||
| auto ret_rt_memcpy = aclrtMemcpy(ptr_, size, src_ptr, size, ACL_MEMCPY_DEVICE_TO_DEVICE); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "SyncDeviceToDevice failed, rtMemcpy mem size [" << size << "], ret [" << ret_rt_memcpy << "]"; | |||
| return false; | |||
| @@ -515,7 +516,7 @@ bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const ShapeVector &sh | |||
| MS_LOG(ERROR) << "Trans format failed."; | |||
| return false; | |||
| } | |||
| SyncMemory(ptr_, dst_tmp.data(), size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| SyncMemory(ptr_, dst_tmp.data(), size_, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| } else { | |||
| const trans::FormatArgs format_args{host_ptr, size_, kOpFormat_NCHW, format_, host_shape, device_shape, type_id_}; | |||
| auto host_tmp = std::vector<uint8_t>(size_); | |||
| @@ -524,7 +525,7 @@ bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const ShapeVector &sh | |||
| MS_LOG(ERROR) << "Trans format failed."; | |||
| return false; | |||
| } | |||
| SyncMemory(ptr_, host_tmp.data(), size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| SyncMemory(ptr_, host_tmp.data(), size_, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| } | |||
| return sync_ok; | |||
| } | |||
| @@ -579,9 +580,9 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std:: | |||
| ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size, host_shape, host_type); | |||
| } else { | |||
| auto host_tmp = std::vector<uint8_t>(size_); | |||
| auto ret_rt_memcpy = rtMemcpy(host_tmp.data(), size_, ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
| auto ret_rt_memcpy = aclrtMemcpy(host_tmp.data(), size_, ptr_, size_, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]"; | |||
| MS_LOG(ERROR) << "SyncDeviceToHost: aclrtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]"; | |||
| } | |||
| std::string path = filepath + '.' + format_; | |||
| MS_LOG(INFO) << "E2E Dump path is " << path; | |||
| @@ -28,6 +28,7 @@ | |||
| #include "utils/mpi/mpi_config.h" | |||
| #include "common/trans.h" | |||
| #include "runtime/rt.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/device/ascend/ascend_stream_manager.h" | |||
| #include "runtime/device/ascend/ascend_stream_assign.h" | |||
| #include "runtime/device/ascend/ge_runtime/model_runner.h" | |||
| @@ -17,6 +17,7 @@ | |||
| #include "runtime/device/ascend/ascend_launch_mul.h" | |||
| #include "abstract/utils.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "backend/session/single_kernel_graph.h" | |||
| #include "frontend/parallel/context.h" | |||
| @@ -52,9 +53,9 @@ void AscendLaunchMul::FreeLaunchDeviceMem() { | |||
| } | |||
| void AscendLaunchMul::CopyHostMemToDevice(size_t origin_size, size_t dst_size) { | |||
| auto ret = rtMemcpyAsync(input2_addr_, dst_size, &input2_value_, origin_size, RT_MEMCPY_HOST_TO_DEVICE, stream_); | |||
| auto ret = aclrtMemcpyAsync(input2_addr_, dst_size, &input2_value_, origin_size, ACL_MEMCPY_HOST_TO_DEVICE, stream_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "launch rtMemcpyAsync failed, ret:" << ret; | |||
| MS_LOG(EXCEPTION) << "launch aclrtMemcpyAsync failed, ret:" << ret; | |||
| } | |||
| } | |||
| } // namespace mindspore::device::ascend | |||
| @@ -19,6 +19,7 @@ | |||
| #include "runtime/device/ascend/ascend_memory_adapter.h" | |||
| #include "utils/ms_context.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #ifndef ENABLE_SECURITY | |||
| #include "runtime/device/ascend/profiling/profiling_manager.h" | |||
| #include "profiler/device/ascend/memory_profiling.h" | |||
| @@ -136,14 +137,14 @@ size_t AscendMemoryManager::GetAvailableMemSize() { | |||
| void AscendMemoryManager::SwapIn(const void *host_ptr, void *device_ptr, size_t mem_size, void *stream) { | |||
| if (stream == nullptr) { | |||
| auto ret_rt_memcpy = rtMemcpy(device_ptr, mem_size, host_ptr, mem_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| auto ret_rt_memcpy = aclrtMemcpy(device_ptr, mem_size, host_ptr, mem_size, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "SwapIn rtMemcpy failed."; | |||
| MS_EXCEPTION(DeviceProcessError) << "SwapIn aclrtMemcpy failed."; | |||
| } | |||
| } else { | |||
| auto ret_rt_memcpy = rtMemcpyAsync(device_ptr, mem_size, host_ptr, mem_size, RT_MEMCPY_HOST_TO_DEVICE, stream); | |||
| auto ret_rt_memcpy = aclrtMemcpyAsync(device_ptr, mem_size, host_ptr, mem_size, ACL_MEMCPY_HOST_TO_DEVICE, stream); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "SwapIn rtMemcpyAsync failed."; | |||
| MS_EXCEPTION(DeviceProcessError) << "SwapIn aclrtMemcpyAsync failed."; | |||
| } | |||
| if (rtStreamSynchronize(stream) != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error."; | |||
| @@ -153,14 +154,14 @@ void AscendMemoryManager::SwapIn(const void *host_ptr, void *device_ptr, size_t | |||
| void AscendMemoryManager::SwapOut(const void *device_ptr, void *host_ptr, size_t mem_size, void *stream) { | |||
| if (stream == nullptr) { | |||
| auto ret_rt_memcpy = rtMemcpy(host_ptr, mem_size, device_ptr, mem_size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| auto ret_rt_memcpy = aclrtMemcpy(host_ptr, mem_size, device_ptr, mem_size, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "SwapOut rtMemcpy failed."; | |||
| MS_EXCEPTION(DeviceProcessError) << "SwapOut aclrtMemcpy failed."; | |||
| } | |||
| } else { | |||
| auto ret_rt_memcpy = rtMemcpyAsync(host_ptr, mem_size, device_ptr, mem_size, RT_MEMCPY_DEVICE_TO_HOST, stream); | |||
| auto ret_rt_memcpy = aclrtMemcpyAsync(host_ptr, mem_size, device_ptr, mem_size, ACL_MEMCPY_DEVICE_TO_HOST, stream); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "SwapOut rtMemcpyAsync failed."; | |||
| MS_EXCEPTION(DeviceProcessError) << "SwapOut aclrtMemcpyAsync failed."; | |||
| } | |||
| if (rtStreamSynchronize(stream) != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error."; | |||
| @@ -24,6 +24,7 @@ | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "utils/convert_utils_base.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/kernel.h" | |||
| #include "runtime/rt_model.h" | |||
| #include "runtime/device/ascend/ge_types_convert.h" | |||
| @@ -335,9 +336,9 @@ void DataDumper::OpDebugRegister() { | |||
| } | |||
| rt_ret = | |||
| rtMemcpy(op_debug_dump_args_, sizeof(void *), &op_debug_buffer_addr_, sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE); | |||
| aclrtMemcpy(op_debug_dump_args_, sizeof(void *), &op_debug_buffer_addr_, sizeof(void *), ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed, ret = " << rt_ret; | |||
| MS_LOG(EXCEPTION) << "[DataDump] Call aclrtMemcpy failed, ret = " << rt_ret; | |||
| } | |||
| rt_ret = rtDebugRegister(model_handle_(), op_debug_mode, op_debug_buffer_addr_, &debug_stream_id_, &debug_task_id_); | |||
| @@ -381,9 +382,9 @@ void DataDumper::RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, voi | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed"; | |||
| } | |||
| rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| rt_ret = aclrtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed"; | |||
| MS_LOG(EXCEPTION) << "[DataDump] Call aclrtMemcpy failed"; | |||
| } | |||
| MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start"; | |||
| @@ -29,6 +29,7 @@ | |||
| #include "runtime/device/ascend/executor/tiling/op_tiling_adapter.h" | |||
| #include "common/trans.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||
| #include "acl/acl_rt.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| @@ -207,14 +208,14 @@ bool AiCoreDynamicKernel::CopyTilingToDevice() { | |||
| } | |||
| if (tiling_data_.empty() || tiling_data_ptr_ == nullptr) { | |||
| MS_LOG(INFO) << "Tiling size is 0, skip rtMemcpyAsync"; | |||
| MS_LOG(INFO) << "Tiling size is 0, skip aclrtMemcpyAsync"; | |||
| return true; | |||
| } | |||
| auto ret = rtMemcpyAsync(tiling_data_ptr_, tiling_data_.size(), tiling_data_.c_str(), tiling_data_.size(), | |||
| RT_MEMCPY_HOST_TO_DEVICE_EX, stream_); | |||
| auto ret = aclrtMemcpyAsync(tiling_data_ptr_, tiling_data_.size(), tiling_data_.c_str(), tiling_data_.size(), | |||
| ACL_MEMCPY_HOST_TO_DEVICE, stream_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Tiling rtMemcpyAsync failed, ret:" << ret; | |||
| MS_LOG(EXCEPTION) << "Tiling aclrtMemcpyAsync failed, ret:" << ret; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -16,6 +16,7 @@ | |||
| #include "runtime/device/ascend/executor/ai_cpu_dynamic_kernel.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/kernel.h" | |||
| #include "utils/utils.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| @@ -95,10 +96,10 @@ void AiCpuDynamicKernel::Initialize() { | |||
| } | |||
| ext_info_size_ = ext_info_data_.size(); | |||
| ret = rtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_data_.data(), ext_info_data_.size(), | |||
| RT_MEMCPY_HOST_TO_DEVICE); | |||
| ret = aclrtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_data_.data(), ext_info_data_.size(), | |||
| ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rtMemcpy ext_info_addr_dev_ failed. Op name: " << cnode->fullname_with_scope(); | |||
| MS_LOG(EXCEPTION) << "Call aclrtMemcpy ext_info_addr_dev_ failed. Op name: " << cnode->fullname_with_scope(); | |||
| } | |||
| auto aicpu_param_head = reinterpret_cast<kernel::AicpuParamHead *>(args_.data()); | |||
| @@ -169,10 +170,10 @@ bool AiCpuDynamicKernel::UpdateExtInfo() { | |||
| } | |||
| } | |||
| auto ret = rtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_handler_->GetExtInfo(), | |||
| ext_info_handler_->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE); | |||
| auto ret = aclrtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_handler_->GetExtInfo(), | |||
| ext_info_handler_->GetExtInfoLen(), ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "UpdateExtInfo rtMemcpy failed. Node info: " << cnode->fullname_with_scope(); | |||
| MS_LOG(ERROR) << "UpdateExtInfo aclrtMemcpy failed. Node info: " << cnode->fullname_with_scope(); | |||
| return false; | |||
| } | |||
| @@ -185,14 +186,14 @@ bool AiCpuDynamicKernel::UpdateOutputShapeFromExtInfo() { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_LOG(INFO) << "UpdateOutputShapeFromExtInfo start. Op name " << cnode->fullname_with_scope(); | |||
| MS_EXCEPTION_IF_NULL(ext_info_handler_); | |||
| auto ret = rtMemcpy(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_, | |||
| ext_info_size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
| auto ret = aclrtMemcpy(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_, | |||
| ext_info_size_, ACL_MEMCPY_DEVICE_TO_HOST); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "rtMemcpy output shape failed. Op name: " << cnode->fullname_with_scope(); | |||
| MS_LOG(ERROR) << "aclrtMemcpy output shape failed. Op name: " << cnode->fullname_with_scope(); | |||
| return false; | |||
| } | |||
| MS_LOG(INFO) << "rtMemcpy from device to host success"; | |||
| MS_LOG(INFO) << "aclrtMemcpy from device to host success"; | |||
| std::vector<TypeId> type_ids; | |||
| std::vector<std::vector<size_t>> shapes; | |||
| @@ -17,14 +17,15 @@ | |||
| #include "runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| void MemcpyRtsDynamicKernel::Execute() { | |||
| auto status = rtMemcpyAsync(dst_, dest_max_, src_, count_, RT_MEMCPY_DEVICE_TO_DEVICE, stream_); | |||
| auto status = aclrtMemcpyAsync(dst_, dest_max_, src_, count_, ACL_MEMCPY_DEVICE_TO_DEVICE, stream_); | |||
| if (status != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "MemCpyAsync op execute rtMemcpyAsync failed!"; | |||
| MS_LOG(EXCEPTION) << "MemCpyAsync op execute aclrtMemcpyAsync failed!"; | |||
| } | |||
| } | |||
| } // namespace ascend | |||
| @@ -17,6 +17,7 @@ | |||
| #include "runtime/device/ascend/ge_runtime/task/aicpu_task.h" | |||
| #include <vector> | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/kernel.h" | |||
| #include "runtime/device/ascend/ge_runtime/task/task_factory.h" | |||
| #include "aicpu/common/aicpu_task_struct.h" | |||
| @@ -116,8 +117,8 @@ void AicpuTask::SetAicpuParamHead(uint32_t args_size, uint32_t io_addrs_num) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMalloc failed, ret: " << flag; | |||
| } | |||
| flag = rtMemcpy(ext_info_, ext_size, const_cast<void *>(reinterpret_cast<const void *>(ext_info.data())), ext_size, | |||
| RT_MEMCPY_HOST_TO_DEVICE); | |||
| flag = aclrtMemcpy(ext_info_, ext_size, const_cast<void *>(reinterpret_cast<const void *>(ext_info.data())), | |||
| ext_size, ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (flag != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << flag; | |||
| } | |||
| @@ -128,8 +129,8 @@ void AicpuTask::SetAicpuParamHead(uint32_t args_size, uint32_t io_addrs_num) { | |||
| } | |||
| // Memcpy AicpuParamHead | |||
| auto rt_ret = rtMemcpy(args_, sizeof(aicpu::AicpuParamHead), reinterpret_cast<void *>(&aicpu_param_head), | |||
| sizeof(aicpu::AicpuParamHead), RT_MEMCPY_HOST_TO_DEVICE); | |||
| auto rt_ret = aclrtMemcpy(args_, sizeof(aicpu::AicpuParamHead), reinterpret_cast<void *>(&aicpu_param_head), | |||
| sizeof(aicpu::AicpuParamHead), ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret; | |||
| } | |||
| @@ -138,9 +139,9 @@ void AicpuTask::SetAicpuParamHead(uint32_t args_size, uint32_t io_addrs_num) { | |||
| void AicpuTask::SetInputOutputAddrs(const std::vector<void *> &io_addrs, uint32_t io_addr_offset) { | |||
| // Memcpy io addrs | |||
| if (!io_addrs.empty()) { | |||
| auto rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset), | |||
| static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), io_addrs.data(), | |||
| static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE); | |||
| auto rt_ret = aclrtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset), | |||
| static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), io_addrs.data(), | |||
| static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret; | |||
| } | |||
| @@ -151,16 +152,16 @@ void AicpuTask::SetNodeDef(uint32_t node_def_len_offset, uint32_t node_def_addr_ | |||
| // Memcpy node def | |||
| auto size = task_info_->node_def().size(); | |||
| auto rt_ret = | |||
| rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_len_offset), sizeof(uint32_t), | |||
| reinterpret_cast<const void *>(&size), sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| aclrtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_len_offset), sizeof(uint32_t), | |||
| reinterpret_cast<const void *>(&size), sizeof(uint32_t), ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret; | |||
| } | |||
| // Memcpy node def | |||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_addr_offset), | |||
| task_info_->node_def().size(), reinterpret_cast<const void *>(task_info_->node_def().data()), | |||
| task_info_->node_def().size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
| rt_ret = aclrtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_addr_offset), | |||
| task_info_->node_def().size(), reinterpret_cast<const void *>(task_info_->node_def().data()), | |||
| task_info_->node_def().size(), ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret; | |||
| } | |||
| @@ -16,6 +16,7 @@ | |||
| #include "runtime/device/ascend/ge_runtime/task/label_goto_task.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/device/ascend/ge_runtime/task/task_factory.h" | |||
| namespace mindspore::ge::model_runner { | |||
| @@ -64,7 +65,7 @@ void LabelGotoTask::Distribute() { | |||
| } | |||
| uint64_t index = 0; | |||
| rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE); | |||
| rt_ret = aclrtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret; | |||
| } | |||
| @@ -16,6 +16,7 @@ | |||
| #include "runtime/device/ascend/ge_runtime/task/memcpy_async_task.h" | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/device/ascend/ge_runtime/task/task_factory.h" | |||
| namespace mindspore::ge::model_runner { | |||
| @@ -39,10 +40,10 @@ void MemcpyAsyncTask::Distribute() { | |||
| MS_LOG(INFO) << "MemcpyAsyncTask Distribute start."; | |||
| MS_LOG(INFO) << "dst_max: " << task_info_->dst_max() << ", count: " << task_info_->count() | |||
| << ", kind: " << task_info_->kind(); | |||
| rtError_t rt_ret = rtMemcpyAsync(task_info_->dst(), task_info_->dst_max(), task_info_->src(), task_info_->count(), | |||
| static_cast<rtMemcpyKind_t>(task_info_->kind()), stream_); | |||
| rtError_t rt_ret = aclrtMemcpyAsync(task_info_->dst(), task_info_->dst_max(), task_info_->src(), task_info_->count(), | |||
| static_cast<aclrtMemcpyKind>(task_info_->kind()), stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMemcpyAsync failed, ret: " << rt_ret; | |||
| MS_LOG(EXCEPTION) << "Call rt api aclrtMemcpyAsync failed, ret: " << rt_ret; | |||
| } | |||
| MS_LOG(INFO) << "DistributeTask end"; | |||
| } | |||
| @@ -17,6 +17,7 @@ | |||
| #include "runtime/device/ascend/ge_runtime/task/tbe_task.h" | |||
| #include <vector> | |||
| #include "runtime/mem.h" | |||
| #include "acl/acl_rt.h" | |||
| #include "runtime/kernel.h" | |||
| #include "runtime/device/ascend/ge_runtime/task/task_factory.h" | |||
| @@ -78,8 +79,8 @@ void TbeTask::Distribute() { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMalloc failed, ret: " << rt_ret << " mem size " << args_size; | |||
| } | |||
| rt_ret = rtMemcpy(args_, args_size, reinterpret_cast<void *>(tensor_device_addrs.data()), args_size, | |||
| RT_MEMCPY_HOST_TO_DEVICE); | |||
| rt_ret = aclrtMemcpy(args_, args_size, reinterpret_cast<void *>(tensor_device_addrs.data()), args_size, | |||
| ACL_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret; | |||
| } | |||
| @@ -0,0 +1,65 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "acl/acl_rt.h" | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief synchronous memory replication between host and device | |||
| * | |||
| * @param dst [IN] destination address pointer | |||
| * @param destMax [IN] Max length of the destination address memory | |||
| * @param src [IN] source address pointer | |||
| * @param count [IN] the length of byte to copy | |||
| * @param kind [IN] memcpy type | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| */ | |||
| ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, | |||
| aclrtMemcpyKind kind) { | |||
| return ACL_ERROR_NONE; | |||
| } | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief Asynchronous memory replication between Host and Device | |||
| * | |||
| * @par Function | |||
| * After calling this interface, | |||
| * be sure to call the aclrtSynchronizeStream interface to ensure that | |||
| * the task of memory replication has been completed | |||
| * | |||
| * @par Restriction | |||
| * @li For on-chip Device-to-Device memory copy, | |||
| * both the source and destination addresses must be 64-byte aligned | |||
| * | |||
| * @param dst [IN] destination address pointer | |||
| * @param destMax [IN] Max length of destination address memory | |||
| * @param src [IN] source address pointer | |||
| * @param count [IN] the number of byte to copy | |||
| * @param kind [IN] memcpy type | |||
| * @param stream [IN] asynchronized task stream | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| * | |||
| * @see aclrtSynchronizeStream | |||
| */ | |||
| ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, | |||
| aclrtMemcpyKind kind, aclrtStream stream) { | |||
| return ACL_ERROR_NONE; | |||
| } | |||