Browse Source

!24714 replace rtmemcpyxx to acl memcpy

Merge pull request !24714 from jjfeing/br_replace_rtmemcpyxx_with_acl_api
tags/v1.6.0
i-robot Gitee 4 years ago
parent
commit
cfc6ea32ff
18 changed files with 185 additions and 102 deletions
  1. +6
    -5
      mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc
  2. +12
    -10
      mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc
  3. +9
    -8
      mindspore/ccsrc/backend/kernel_compiler/rts/tensor_copy_slices.cc
  4. +5
    -4
      mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.cc
  5. +5
    -4
      mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
  6. +24
    -23
      mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
  7. +1
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
  8. +3
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_mul.cc
  9. +9
    -8
      mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
  10. +5
    -4
      mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
  11. +5
    -4
      mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc
  12. +11
    -10
      mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
  13. +3
    -2
      mindspore/ccsrc/runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.cc
  14. +13
    -12
      mindspore/ccsrc/runtime/device/ascend/ge_runtime/task/aicpu_task.cc
  15. +2
    -1
      mindspore/ccsrc/runtime/device/ascend/ge_runtime/task/label_goto_task.cc
  16. +4
    -3
      mindspore/ccsrc/runtime/device/ascend/ge_runtime/task/memcpy_async_task.cc
  17. +3
    -2
      mindspore/ccsrc/runtime/device/ascend/ge_runtime/task/tbe_task.cc
  18. +65
    -0
      tests/ut/cpp/stub/runtime/acl_rt.cc

+ 6
- 5
mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc View File

@@ -16,6 +16,7 @@

#include "backend/kernel_compiler/rts/assign.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"

using mindspore::ge::model_runner::MemcpyAsyncTaskInfo;
using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
@@ -26,7 +27,7 @@ AssignKernel::AssignKernel() {}

AssignKernel::~AssignKernel() {}

bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /* workspace */,
const std::vector<AddressPtr> & /*outputs*/, void *stream_ptr) {
if (inputs.size() != 2) {
MS_LOG(ERROR) << "inputs size is not two";
@@ -39,10 +40,10 @@ bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vect
MS_LOG(INFO) << "first addr is same with second addr , no need assign";
return true;
}
rtError_t status = rtMemcpyAsync(inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size,
RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
rtError_t status = aclrtMemcpyAsync(inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size,
ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Assign op rtMemcpyAsync failed!";
MS_LOG(ERROR) << "Assign op aclrtMemcpyAsync failed!";
return false;
}
return true;
@@ -59,7 +60,7 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in
MS_EXCEPTION_IF_NULL(inputs[1]);
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
std::make_shared<MemcpyAsyncTaskInfo>(unique_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr,
inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false);
inputs[1]->size, ACL_MEMCPY_DEVICE_TO_DEVICE, false);
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}


+ 12
- 10
mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc View File

@@ -19,6 +19,7 @@
#include <string>
#include "abstract/utils.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "common/trans.h"
#include "utils/ms_context.h"
@@ -54,16 +55,17 @@ bool MemCpyAsyncKernel::Launch(const std::vector<AddressPtr> &inputs, const std:
return true;
}
if (outputs[0]->size < inputs[0]->size) {
MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax " << outputs[0]->size << " is less than src size " << inputs[0]->size;
MS_LOG(EXCEPTION) << "aclrtMemcpyAsync destMax " << outputs[0]->size << " is less than src size "
<< inputs[0]->size;
}
// input x -> memcpy_async -> AllReduce
if (outputs[0]->size > inputs[0]->size) {
MS_LOG(WARNING) << "rtMemcpyAsync destMax > src size";
MS_LOG(WARNING) << "aclrtMemcpyAsync destMax > src size";
}
rtError_t status = rtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size,
RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
rtError_t status = aclrtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size,
ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "MemCpyAsync op rtMemcpyAsync failed!";
MS_LOG(ERROR) << "MemCpyAsync op aclrtMemcpyAsync failed!";
return false;
}
return true;
@@ -117,17 +119,17 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
MS_EXCEPTION_IF_NULL(outputs[0]);
MS_EXCEPTION_IF_NULL(inputs[0]);
if (outputs[0]->size < inputs[0]->size) {
MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax < src size";
MS_LOG(EXCEPTION) << "aclrtMemcpyAsync destMax < src size";
}
// input x -> memcpy_async -> AllReduce
if (outputs[0]->size > inputs[0]->size) {
MS_LOG(WARNING) << "rtMemcpyAsync destMax > src size";
MS_LOG(WARNING) << "aclrtMemcpyAsync destMax > src size";
}

stream_id_ = stream_id;
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
std::make_shared<MemcpyAsyncTaskInfo>(unique_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr,
inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
inputs[0]->size, ACL_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}
@@ -149,12 +151,12 @@ device::DynamicKernelPtr MemCpyAsyncKernel::GenDynamicKernel(const CNodePtr &cno
MS_EXCEPTION_IF_NULL(kernel_outputs[0]);
MS_EXCEPTION_IF_NULL(kernel_inputs[0]);
if (kernel_outputs[0]->size < kernel_inputs[0]->size) {
MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax " << kernel_outputs[0]->size << " is less than src size "
MS_LOG(EXCEPTION) << "aclrtMemcpyAsync destMax " << kernel_outputs[0]->size << " is less than src size "
<< kernel_inputs[0]->size;
}
// input x -> memcpy_async -> AllReduce
if (kernel_outputs[0]->size > kernel_inputs[0]->size) {
MS_LOG(WARNING) << "Check rtMemcpyAsync destMax > src size";
MS_LOG(WARNING) << "Check aclrtMemcpyAsync destMax > src size";
}

return std::make_shared<MemcpyRtsDynamicKernel>(stream_ptr, cnode_ptr, kernel_outputs[0]->addr,


+ 9
- 8
mindspore/ccsrc/backend/kernel_compiler/rts/tensor_copy_slices.cc View File

@@ -24,6 +24,7 @@
#include "backend/kernel_compiler/common_utils.h"
#include "common/trans.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "runtime/device/kernel_runtime.h"
#include "utils/ms_context.h"

@@ -53,16 +54,16 @@ bool TensorCopySlices::Launch(const std::vector<AddressPtr> &inputs, const std::
return false;
}

auto status = rtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size,
RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
auto status = aclrtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size,
ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "MemCpyAsync op rtMemcpyAsync failed!";
MS_LOG(ERROR) << "MemCpyAsync op aclrtMemcpyAsync failed!";
return false;
}
status = rtMemcpyAsync(VoidPointerOffset(outputs[0]->addr, offset_), copy_size_, inputs[1]->addr, copy_size_,
RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
status = aclrtMemcpyAsync(VoidPointerOffset(outputs[0]->addr, offset_), copy_size_, inputs[1]->addr, copy_size_,
ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "MemCpyAsync op rtMemcpyAsync failed!";
MS_LOG(ERROR) << "MemCpyAsync op aclrtMemcpyAsync failed!";
return false;
}
return true;
@@ -151,10 +152,10 @@ std::vector<TaskInfoPtr> TensorCopySlices::GenTask(const std::vector<AddressPtr>
stream_id_ = stream_id;
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr1 =
std::make_shared<MemcpyAsyncTaskInfo>(unique_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr,
inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
inputs[0]->size, ACL_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr2 = std::make_shared<MemcpyAsyncTaskInfo>(
unique_name_, stream_id, VoidPointerOffset(outputs[0]->addr, offset_), copy_size_, inputs[1]->addr, copy_size_,
RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
ACL_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
return {task_info_ptr1, task_info_ptr2};
}



+ 5
- 4
mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.cc View File

@@ -28,6 +28,7 @@
#include "proto/attr.pb.h"
#include "proto/node_def.pb.h"
#include "runtime/rt.h"
#include "acl/acl_rt.h"

using mindspore::kernel::Address;
using AddressPtr = std::shared_ptr<Address>;
@@ -198,9 +199,9 @@ bool AscendPsCache::SynchronizeStream() {
bool AscendPsCache::CopyHostMemToDevice(void *dst, const void *src, size_t size) {
MS_ERROR_IF_NULL(dst);
MS_ERROR_IF_NULL(src);
auto ret = rtMemcpyAsync(dst, size, src, size, RT_MEMCPY_HOST_TO_DEVICE, stream_);
auto ret = aclrtMemcpyAsync(dst, size, src, size, ACL_MEMCPY_HOST_TO_DEVICE, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtMemcpyAsync failed, the error num is:" << ret;
MS_LOG(ERROR) << "aclrtMemcpyAsync failed, the error num is:" << ret;
return false;
}
return true;
@@ -209,9 +210,9 @@ bool AscendPsCache::CopyHostMemToDevice(void *dst, const void *src, size_t size)
bool AscendPsCache::CopyDeviceMemToHost(void *dst, const void *src, size_t size) {
MS_ERROR_IF_NULL(dst);
MS_ERROR_IF_NULL(src);
auto ret = rtMemcpyAsync(dst, size, src, size, RT_MEMCPY_DEVICE_TO_HOST, stream_);
auto ret = aclrtMemcpyAsync(dst, size, src, size, ACL_MEMCPY_DEVICE_TO_HOST, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtMemcpyAsync failed, the error num is:" << ret;
MS_LOG(ERROR) << "aclrtMemcpyAsync failed, the error num is:" << ret;
return false;
}
return true;


+ 5
- 4
mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc View File

@@ -109,13 +109,14 @@ void AscendBucket::CopyTensorToContiguousMemory() {
MS_LOG(DEBUG) << "MemcpyAsync dst size:" << memcpy_output_addrs_[i]->size
<< " src size:" << memcpy_input_addrs_[i]->size;
if (memcpy_output_addrs_[i]->size < memcpy_input_addrs_[i]->size) {
MS_LOG(EXCEPTION) << "rtMemcpyAsync dst size < src size";
MS_LOG(EXCEPTION) << "aclrtMemcpyAsync dst size < src size";
}

auto ret = rtMemcpyAsync(memcpy_output_addrs_[i]->addr, memcpy_output_addrs_[i]->size, memcpy_input_addrs_[i]->addr,
memcpy_input_addrs_[i]->size, RT_MEMCPY_DEVICE_TO_DEVICE, compute_stream_);
auto ret =
aclrtMemcpyAsync(memcpy_output_addrs_[i]->addr, memcpy_output_addrs_[i]->size, memcpy_input_addrs_[i]->addr,
memcpy_input_addrs_[i]->size, ACL_MEMCPY_DEVICE_TO_DEVICE, compute_stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rtMemcpyAsync failed, error code:" << ret;
MS_LOG(EXCEPTION) << "Call aclrtMemcpyAsync failed, error code:" << ret;
}
}
}


+ 24
- 23
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc View File

@@ -21,6 +21,7 @@
#include <set>
#include <algorithm>
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/memory_manager.h"
@@ -74,7 +75,7 @@ const std::set<std::string> kOpNeedTransFormat = {
kOpFormat_NHWC, kOpFormat_HWCN, kOpFormat_NC1HWC0, kOpFormat_FRAC_Z, kOpFormat_C1HWNCoC0,
kOpFormat_FRAC_NZ, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D};

void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind) {
void SyncMemory(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind) {
if (size == 0) {
return;
}
@@ -86,11 +87,11 @@ void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind)
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->SetContext();

// Only apply asynchronous copy in Pynative && RT_MEMCPY_HOST_TO_DEVICE mode
if (execution_mode != kPynativeMode || kind != RT_MEMCPY_HOST_TO_DEVICE) {
auto ret_rt_memcpy = rtMemcpy(dst, size, src, size, kind);
// Only apply asynchronous copy in Pynative && ACL_MEMCPY_HOST_TO_DEVICE mode
if (execution_mode != kPynativeMode || kind != ACL_MEMCPY_HOST_TO_DEVICE) {
auto ret_rt_memcpy = aclrtMemcpy(dst, size, src, size, kind);
if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "rtMemcpy failed";
MS_EXCEPTION(DeviceProcessError) << "aclrtMemcpy failed";
}
} else {
auto ret = runtime_instance->MemcpyAsync(dst, src, size, static_cast<int32_t>(RT_MEMCPY_HOST_TO_DEVICE_EX));
@@ -126,7 +127,7 @@ bool FloatToHalfAndSyncHostToDevice(void *dst, size_t dst_size, const void *src,
}
std::vector<float16> half_data(elem_num);
FloatToHalf(half_data.data(), src, elem_num);
SyncMemory(dst, half_data.data(), dst_size, RT_MEMCPY_HOST_TO_DEVICE);
SyncMemory(dst, half_data.data(), dst_size, ACL_MEMCPY_HOST_TO_DEVICE);
return true;
}

@@ -137,7 +138,7 @@ bool Float64ToFloatAndSyncHostToDevice(void *dst, size_t dst_size, const void *s
size_t elem_num = dst_size / sizeof(float);
auto host_tmp = std::vector<float>(elem_num);
DoubleToFloat(host_tmp.data(), src, elem_num);
SyncMemory(dst, host_tmp.data(), dst_size, RT_MEMCPY_HOST_TO_DEVICE);
SyncMemory(dst, host_tmp.data(), dst_size, ACL_MEMCPY_HOST_TO_DEVICE);
return true;
}

@@ -148,7 +149,7 @@ bool SyncDeviceToHostAndHalfToFloat(void *dst, size_t dst_size, const void *src,
<< dst_size << "]";
}
std::vector<float16> half_data(elem_num);
SyncMemory(half_data.data(), src, src_size, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(half_data.data(), src, src_size, ACL_MEMCPY_DEVICE_TO_HOST);
HalfToFloat(dst, half_data.data(), elem_num);
return true;
}
@@ -159,7 +160,7 @@ bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *s
}
size_t elem_num = src_size / sizeof(float);
auto host_tmp = std::vector<float>(elem_num);
SyncMemory(host_tmp.data(), src, src_size, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host_tmp.data(), src, src_size, ACL_MEMCPY_DEVICE_TO_HOST);
FloatToDouble(dst, host_tmp.data(), elem_num);
return true;
}
@@ -208,14 +209,14 @@ bool AscendDeviceAddress::SyncDeviceToHost(size_t size, void *const host_ptr) co
MS_EXCEPTION_IF_NULL(host_ptr);
BindDevice();
SyncStream();
SyncMemory(host_ptr, ptr_, size, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host_ptr, ptr_, size, ACL_MEMCPY_DEVICE_TO_HOST);
return true;
}

bool AscendDeviceAddress::SyncHostToDevice(size_t size, const void *host_ptr) const {
MS_EXCEPTION_IF_NULL(host_ptr);
BindDevice();
SyncMemory(ptr_, host_ptr, size, RT_MEMCPY_HOST_TO_DEVICE);
SyncMemory(ptr_, host_ptr, size, ACL_MEMCPY_HOST_TO_DEVICE);
return true;
}

@@ -236,14 +237,14 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size
}
if (format_ == kOpFormat_NCHW || format_ == kOpFormat_DEFAULT || format_ == kOpFormat_NCDHW) {
if (type_id_ == type) {
SyncMemory(host_ptr, ptr_, size, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host_ptr, ptr_, size, ACL_MEMCPY_DEVICE_TO_HOST);
sync_ok = true;
} else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) {
sync_ok = SyncDeviceToHostAndFloatToFloat64(host_ptr, size, ptr_, size_);
} else {
auto shape_size = abstract::ShapeSize(host_shape);
auto host = std::vector<uint8_t>(size_);
SyncMemory(host.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host.data(), ptr_, size_, ACL_MEMCPY_DEVICE_TO_HOST);
const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size_};
sync_ok = trans::TransDataType(type_args, host_ptr);
if (!sync_ok) {
@@ -317,10 +318,10 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const
return false;
}
if (type_id_ == type) {
SyncMemory(host_ptr, output_addr_vec[0], size, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host_ptr, output_addr_vec[0], size, ACL_MEMCPY_DEVICE_TO_HOST);
} else {
auto host = std::vector<uint8_t>(size);
SyncMemory(host.data(), output_addr_vec[0], size, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host.data(), output_addr_vec[0], size, ACL_MEMCPY_DEVICE_TO_HOST);
auto shape_size = abstract::ShapeSize(host_shape);
const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size};
sync_ok = trans::TransDataType(type_args, host_ptr);
@@ -356,7 +357,7 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const ShapeVector &sh
}
}
auto host_tmp = std::vector<uint8_t>(size_);
SyncMemory(host_tmp.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host_tmp.data(), ptr_, size_, ACL_MEMCPY_DEVICE_TO_HOST);
auto node_index = GetNodeIndex();
if (type_id_ != type) {
const trans::FormatArgs format_args{host_tmp.data(), size_, kOpFormat_NCHW, format_,
@@ -403,7 +404,7 @@ bool AscendDeviceAddress::SyncHostToDevice(const ShapeVector &shape, size_t size
}
if (format_ == kOpFormat_NCHW || format_ == kOpFormat_DEFAULT || format_ == kOpFormat_NCDHW || format_ == format) {
if (type_id_ == type) {
SyncMemory(ptr_, host_ptr, size, RT_MEMCPY_HOST_TO_DEVICE);
SyncMemory(ptr_, host_ptr, size, ACL_MEMCPY_HOST_TO_DEVICE);
sync_ok = true;
} else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) {
sync_ok = Float64ToFloatAndSyncHostToDevice(ptr_, size_, host_ptr, size);
@@ -416,7 +417,7 @@ bool AscendDeviceAddress::SyncHostToDevice(const ShapeVector &shape, size_t size
MS_LOG(ERROR) << "Trans data type failed.";
return false;
}
SyncMemory(ptr_, host_tmp.data(), size_, RT_MEMCPY_HOST_TO_DEVICE);
SyncMemory(ptr_, host_tmp.data(), size_, ACL_MEMCPY_HOST_TO_DEVICE);
}
} else {
auto iter = kOpNeedTransFormat.find(format_);
@@ -449,7 +450,7 @@ bool AscendDeviceAddress::SyncDeviceToDevice(const ShapeVector &shape, size_t si
MS_LOG(ERROR) << "src size is greater than det size, src size is: " << size << ", dst size is: " << size_;
return false;
}
auto ret_rt_memcpy = rtMemcpy(ptr_, size, src_ptr, size, RT_MEMCPY_DEVICE_TO_DEVICE);
auto ret_rt_memcpy = aclrtMemcpy(ptr_, size, src_ptr, size, ACL_MEMCPY_DEVICE_TO_DEVICE);
if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_LOG(ERROR) << "SyncDeviceToDevice failed, rtMemcpy mem size [" << size << "], ret [" << ret_rt_memcpy << "]";
return false;
@@ -515,7 +516,7 @@ bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const ShapeVector &sh
MS_LOG(ERROR) << "Trans format failed.";
return false;
}
SyncMemory(ptr_, dst_tmp.data(), size_, RT_MEMCPY_HOST_TO_DEVICE);
SyncMemory(ptr_, dst_tmp.data(), size_, ACL_MEMCPY_HOST_TO_DEVICE);
} else {
const trans::FormatArgs format_args{host_ptr, size_, kOpFormat_NCHW, format_, host_shape, device_shape, type_id_};
auto host_tmp = std::vector<uint8_t>(size_);
@@ -524,7 +525,7 @@ bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const ShapeVector &sh
MS_LOG(ERROR) << "Trans format failed.";
return false;
}
SyncMemory(ptr_, host_tmp.data(), size_, RT_MEMCPY_HOST_TO_DEVICE);
SyncMemory(ptr_, host_tmp.data(), size_, ACL_MEMCPY_HOST_TO_DEVICE);
}
return sync_ok;
}
@@ -579,9 +580,9 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::
ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size, host_shape, host_type);
} else {
auto host_tmp = std::vector<uint8_t>(size_);
auto ret_rt_memcpy = rtMemcpy(host_tmp.data(), size_, ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST);
auto ret_rt_memcpy = aclrtMemcpy(host_tmp.data(), size_, ptr_, size_, ACL_MEMCPY_DEVICE_TO_HOST);
if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
MS_LOG(ERROR) << "SyncDeviceToHost: aclrtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
}
std::string path = filepath + '.' + format_;
MS_LOG(INFO) << "E2E Dump path is " << path;


+ 1
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc View File

@@ -28,6 +28,7 @@
#include "utils/mpi/mpi_config.h"
#include "common/trans.h"
#include "runtime/rt.h"
#include "acl/acl_rt.h"
#include "runtime/device/ascend/ascend_stream_manager.h"
#include "runtime/device/ascend/ascend_stream_assign.h"
#include "runtime/device/ascend/ge_runtime/model_runner.h"


+ 3
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_launch_mul.cc View File

@@ -17,6 +17,7 @@
#include "runtime/device/ascend/ascend_launch_mul.h"
#include "abstract/utils.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "backend/session/single_kernel_graph.h"
#include "frontend/parallel/context.h"

@@ -52,9 +53,9 @@ void AscendLaunchMul::FreeLaunchDeviceMem() {
}

void AscendLaunchMul::CopyHostMemToDevice(size_t origin_size, size_t dst_size) {
auto ret = rtMemcpyAsync(input2_addr_, dst_size, &input2_value_, origin_size, RT_MEMCPY_HOST_TO_DEVICE, stream_);
auto ret = aclrtMemcpyAsync(input2_addr_, dst_size, &input2_value_, origin_size, ACL_MEMCPY_HOST_TO_DEVICE, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "launch rtMemcpyAsync failed, ret:" << ret;
MS_LOG(EXCEPTION) << "launch aclrtMemcpyAsync failed, ret:" << ret;
}
}
} // namespace mindspore::device::ascend

+ 9
- 8
mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc View File

@@ -19,6 +19,7 @@
#include "runtime/device/ascend/ascend_memory_adapter.h"
#include "utils/ms_context.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include "profiler/device/ascend/memory_profiling.h"
@@ -136,14 +137,14 @@ size_t AscendMemoryManager::GetAvailableMemSize() {

void AscendMemoryManager::SwapIn(const void *host_ptr, void *device_ptr, size_t mem_size, void *stream) {
if (stream == nullptr) {
auto ret_rt_memcpy = rtMemcpy(device_ptr, mem_size, host_ptr, mem_size, RT_MEMCPY_HOST_TO_DEVICE);
auto ret_rt_memcpy = aclrtMemcpy(device_ptr, mem_size, host_ptr, mem_size, ACL_MEMCPY_HOST_TO_DEVICE);
if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "SwapIn rtMemcpy failed.";
MS_EXCEPTION(DeviceProcessError) << "SwapIn aclrtMemcpy failed.";
}
} else {
auto ret_rt_memcpy = rtMemcpyAsync(device_ptr, mem_size, host_ptr, mem_size, RT_MEMCPY_HOST_TO_DEVICE, stream);
auto ret_rt_memcpy = aclrtMemcpyAsync(device_ptr, mem_size, host_ptr, mem_size, ACL_MEMCPY_HOST_TO_DEVICE, stream);
if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "SwapIn rtMemcpyAsync failed.";
MS_EXCEPTION(DeviceProcessError) << "SwapIn aclrtMemcpyAsync failed.";
}
if (rtStreamSynchronize(stream) != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error.";
@@ -153,14 +154,14 @@ void AscendMemoryManager::SwapIn(const void *host_ptr, void *device_ptr, size_t

void AscendMemoryManager::SwapOut(const void *device_ptr, void *host_ptr, size_t mem_size, void *stream) {
if (stream == nullptr) {
auto ret_rt_memcpy = rtMemcpy(host_ptr, mem_size, device_ptr, mem_size, RT_MEMCPY_DEVICE_TO_HOST);
auto ret_rt_memcpy = aclrtMemcpy(host_ptr, mem_size, device_ptr, mem_size, ACL_MEMCPY_DEVICE_TO_HOST);
if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "SwapOut rtMemcpy failed.";
MS_EXCEPTION(DeviceProcessError) << "SwapOut aclrtMemcpy failed.";
}
} else {
auto ret_rt_memcpy = rtMemcpyAsync(host_ptr, mem_size, device_ptr, mem_size, RT_MEMCPY_DEVICE_TO_HOST, stream);
auto ret_rt_memcpy = aclrtMemcpyAsync(host_ptr, mem_size, device_ptr, mem_size, ACL_MEMCPY_DEVICE_TO_HOST, stream);
if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "SwapOut rtMemcpyAsync failed.";
MS_EXCEPTION(DeviceProcessError) << "SwapOut aclrtMemcpyAsync failed.";
}
if (rtStreamSynchronize(stream) != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error.";


+ 5
- 4
mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc View File

@@ -24,6 +24,7 @@
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/convert_utils_base.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "runtime/kernel.h"
#include "runtime/rt_model.h"
#include "runtime/device/ascend/ge_types_convert.h"
@@ -335,9 +336,9 @@ void DataDumper::OpDebugRegister() {
}

rt_ret =
rtMemcpy(op_debug_dump_args_, sizeof(void *), &op_debug_buffer_addr_, sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE);
aclrtMemcpy(op_debug_dump_args_, sizeof(void *), &op_debug_buffer_addr_, sizeof(void *), ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed, ret = " << rt_ret;
MS_LOG(EXCEPTION) << "[DataDump] Call aclrtMemcpy failed, ret = " << rt_ret;
}

rt_ret = rtDebugRegister(model_handle_(), op_debug_mode, op_debug_buffer_addr_, &debug_stream_id_, &debug_task_id_);
@@ -381,9 +382,9 @@ void DataDumper::RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, voi
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
}
rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
rt_ret = aclrtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
MS_LOG(EXCEPTION) << "[DataDump] Call aclrtMemcpy failed";
}

MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";


+ 5
- 4
mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc View File

@@ -29,6 +29,7 @@
#include "runtime/device/ascend/executor/tiling/op_tiling_adapter.h"
#include "common/trans.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "acl/acl_rt.h"

namespace mindspore {
namespace device {
@@ -207,14 +208,14 @@ bool AiCoreDynamicKernel::CopyTilingToDevice() {
}

if (tiling_data_.empty() || tiling_data_ptr_ == nullptr) {
MS_LOG(INFO) << "Tiling size is 0, skip rtMemcpyAsync";
MS_LOG(INFO) << "Tiling size is 0, skip aclrtMemcpyAsync";
return true;
}

auto ret = rtMemcpyAsync(tiling_data_ptr_, tiling_data_.size(), tiling_data_.c_str(), tiling_data_.size(),
RT_MEMCPY_HOST_TO_DEVICE_EX, stream_);
auto ret = aclrtMemcpyAsync(tiling_data_ptr_, tiling_data_.size(), tiling_data_.c_str(), tiling_data_.size(),
ACL_MEMCPY_HOST_TO_DEVICE, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Tiling rtMemcpyAsync failed, ret:" << ret;
MS_LOG(EXCEPTION) << "Tiling aclrtMemcpyAsync failed, ret:" << ret;
}
return true;
}


+ 11
- 10
mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc View File

@@ -16,6 +16,7 @@

#include "runtime/device/ascend/executor/ai_cpu_dynamic_kernel.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "runtime/kernel.h"
#include "utils/utils.h"
#include "backend/session/anf_runtime_algorithm.h"
@@ -95,10 +96,10 @@ void AiCpuDynamicKernel::Initialize() {
}
ext_info_size_ = ext_info_data_.size();

ret = rtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_data_.data(), ext_info_data_.size(),
RT_MEMCPY_HOST_TO_DEVICE);
ret = aclrtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_data_.data(), ext_info_data_.size(),
ACL_MEMCPY_HOST_TO_DEVICE);
if (ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rtMemcpy ext_info_addr_dev_ failed. Op name: " << cnode->fullname_with_scope();
MS_LOG(EXCEPTION) << "Call aclrtMemcpy ext_info_addr_dev_ failed. Op name: " << cnode->fullname_with_scope();
}

auto aicpu_param_head = reinterpret_cast<kernel::AicpuParamHead *>(args_.data());
@@ -169,10 +170,10 @@ bool AiCpuDynamicKernel::UpdateExtInfo() {
}
}

auto ret = rtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_handler_->GetExtInfo(),
ext_info_handler_->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE);
auto ret = aclrtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_handler_->GetExtInfo(),
ext_info_handler_->GetExtInfoLen(), ACL_MEMCPY_HOST_TO_DEVICE);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "UpdateExtInfo rtMemcpy failed. Node info: " << cnode->fullname_with_scope();
MS_LOG(ERROR) << "UpdateExtInfo aclrtMemcpy failed. Node info: " << cnode->fullname_with_scope();
return false;
}

@@ -185,14 +186,14 @@ bool AiCpuDynamicKernel::UpdateOutputShapeFromExtInfo() {
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(INFO) << "UpdateOutputShapeFromExtInfo start. Op name " << cnode->fullname_with_scope();
MS_EXCEPTION_IF_NULL(ext_info_handler_);
auto ret = rtMemcpy(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_,
ext_info_size_, RT_MEMCPY_DEVICE_TO_HOST);
auto ret = aclrtMemcpy(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_,
ext_info_size_, ACL_MEMCPY_DEVICE_TO_HOST);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtMemcpy output shape failed. Op name: " << cnode->fullname_with_scope();
MS_LOG(ERROR) << "aclrtMemcpy output shape failed. Op name: " << cnode->fullname_with_scope();
return false;
}

MS_LOG(INFO) << "rtMemcpy from device to host success";
MS_LOG(INFO) << "aclrtMemcpy from device to host success";
std::vector<TypeId> type_ids;
std::vector<std::vector<size_t>> shapes;



+ 3
- 2
mindspore/ccsrc/runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.cc View File

@@ -17,14 +17,15 @@
#include "runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.h"

#include "runtime/mem.h"
#include "acl/acl_rt.h"

namespace mindspore {
namespace device {
namespace ascend {
void MemcpyRtsDynamicKernel::Execute() {
auto status = rtMemcpyAsync(dst_, dest_max_, src_, count_, RT_MEMCPY_DEVICE_TO_DEVICE, stream_);
auto status = aclrtMemcpyAsync(dst_, dest_max_, src_, count_, ACL_MEMCPY_DEVICE_TO_DEVICE, stream_);
if (status != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "MemCpyAsync op execute rtMemcpyAsync failed!";
MS_LOG(EXCEPTION) << "MemCpyAsync op execute aclrtMemcpyAsync failed!";
}
}
} // namespace ascend


+ 13
- 12
mindspore/ccsrc/runtime/device/ascend/ge_runtime/task/aicpu_task.cc View File

@@ -17,6 +17,7 @@
#include "runtime/device/ascend/ge_runtime/task/aicpu_task.h"
#include <vector>
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "runtime/kernel.h"
#include "runtime/device/ascend/ge_runtime/task/task_factory.h"
#include "aicpu/common/aicpu_task_struct.h"
@@ -116,8 +117,8 @@ void AicpuTask::SetAicpuParamHead(uint32_t args_size, uint32_t io_addrs_num) {
MS_LOG(EXCEPTION) << "Call rt api rtMalloc failed, ret: " << flag;
}

flag = rtMemcpy(ext_info_, ext_size, const_cast<void *>(reinterpret_cast<const void *>(ext_info.data())), ext_size,
RT_MEMCPY_HOST_TO_DEVICE);
flag = aclrtMemcpy(ext_info_, ext_size, const_cast<void *>(reinterpret_cast<const void *>(ext_info.data())),
ext_size, ACL_MEMCPY_HOST_TO_DEVICE);
if (flag != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << flag;
}
@@ -128,8 +129,8 @@ void AicpuTask::SetAicpuParamHead(uint32_t args_size, uint32_t io_addrs_num) {
}

// Memcpy AicpuParamHead
auto rt_ret = rtMemcpy(args_, sizeof(aicpu::AicpuParamHead), reinterpret_cast<void *>(&aicpu_param_head),
sizeof(aicpu::AicpuParamHead), RT_MEMCPY_HOST_TO_DEVICE);
auto rt_ret = aclrtMemcpy(args_, sizeof(aicpu::AicpuParamHead), reinterpret_cast<void *>(&aicpu_param_head),
sizeof(aicpu::AicpuParamHead), ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
}
@@ -138,9 +139,9 @@ void AicpuTask::SetAicpuParamHead(uint32_t args_size, uint32_t io_addrs_num) {
void AicpuTask::SetInputOutputAddrs(const std::vector<void *> &io_addrs, uint32_t io_addr_offset) {
// Memcpy io addrs
if (!io_addrs.empty()) {
auto rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset),
static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), io_addrs.data(),
static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE);
auto rt_ret = aclrtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset),
static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), io_addrs.data(),
static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
}
@@ -151,16 +152,16 @@ void AicpuTask::SetNodeDef(uint32_t node_def_len_offset, uint32_t node_def_addr_
// Memcpy node def
auto size = task_info_->node_def().size();
auto rt_ret =
rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_len_offset), sizeof(uint32_t),
reinterpret_cast<const void *>(&size), sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE);
aclrtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_len_offset), sizeof(uint32_t),
reinterpret_cast<const void *>(&size), sizeof(uint32_t), ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
}

// Memcpy node def
rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_addr_offset),
task_info_->node_def().size(), reinterpret_cast<const void *>(task_info_->node_def().data()),
task_info_->node_def().size(), RT_MEMCPY_HOST_TO_DEVICE);
rt_ret = aclrtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_addr_offset),
task_info_->node_def().size(), reinterpret_cast<const void *>(task_info_->node_def().data()),
task_info_->node_def().size(), ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
}


+ 2
- 1
mindspore/ccsrc/runtime/device/ascend/ge_runtime/task/label_goto_task.cc View File

@@ -16,6 +16,7 @@

#include "runtime/device/ascend/ge_runtime/task/label_goto_task.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "runtime/device/ascend/ge_runtime/task/task_factory.h"

namespace mindspore::ge::model_runner {
@@ -64,7 +65,7 @@ void LabelGotoTask::Distribute() {
}

uint64_t index = 0;
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE);
rt_ret = aclrtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
}


+ 4
- 3
mindspore/ccsrc/runtime/device/ascend/ge_runtime/task/memcpy_async_task.cc View File

@@ -16,6 +16,7 @@

#include "runtime/device/ascend/ge_runtime/task/memcpy_async_task.h"
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "runtime/device/ascend/ge_runtime/task/task_factory.h"

namespace mindspore::ge::model_runner {
@@ -39,10 +40,10 @@ void MemcpyAsyncTask::Distribute() {
MS_LOG(INFO) << "MemcpyAsyncTask Distribute start.";
MS_LOG(INFO) << "dst_max: " << task_info_->dst_max() << ", count: " << task_info_->count()
<< ", kind: " << task_info_->kind();
rtError_t rt_ret = rtMemcpyAsync(task_info_->dst(), task_info_->dst_max(), task_info_->src(), task_info_->count(),
static_cast<rtMemcpyKind_t>(task_info_->kind()), stream_);
rtError_t rt_ret = aclrtMemcpyAsync(task_info_->dst(), task_info_->dst_max(), task_info_->src(), task_info_->count(),
static_cast<aclrtMemcpyKind>(task_info_->kind()), stream_);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtMemcpyAsync failed, ret: " << rt_ret;
MS_LOG(EXCEPTION) << "Call rt api aclrtMemcpyAsync failed, ret: " << rt_ret;
}
MS_LOG(INFO) << "DistributeTask end";
}


+ 3
- 2
mindspore/ccsrc/runtime/device/ascend/ge_runtime/task/tbe_task.cc View File

@@ -17,6 +17,7 @@
#include "runtime/device/ascend/ge_runtime/task/tbe_task.h"
#include <vector>
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "runtime/kernel.h"
#include "runtime/device/ascend/ge_runtime/task/task_factory.h"

@@ -78,8 +79,8 @@ void TbeTask::Distribute() {
MS_LOG(EXCEPTION) << "Call rt api rtMalloc failed, ret: " << rt_ret << " mem size " << args_size;
}

rt_ret = rtMemcpy(args_, args_size, reinterpret_cast<void *>(tensor_device_addrs.data()), args_size,
RT_MEMCPY_HOST_TO_DEVICE);
rt_ret = aclrtMemcpy(args_, args_size, reinterpret_cast<void *>(tensor_device_addrs.data()), args_size,
ACL_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
}


+ 65
- 0
tests/ut/cpp/stub/runtime/acl_rt.cc View File

@@ -0,0 +1,65 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "acl/acl_rt.h"

/**
* @ingroup AscendCL
* @brief synchronous memory replication between host and device
*
* @param dst [IN] destination address pointer
* @param destMax [IN] Max length of the destination address memory
* @param src [IN] source address pointer
* @param count [IN] the length of byte to copy
* @param kind [IN] memcpy type
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind) {
return ACL_ERROR_NONE;
}


/**
* @ingroup AscendCL
* @brief Asynchronous memory replication between Host and Device
*
* @par Function
* After calling this interface,
* be sure to call the aclrtSynchronizeStream interface to ensure that
* the task of memory replication has been completed
*
* @par Restriction
* @li For on-chip Device-to-Device memory copy,
* both the source and destination addresses must be 64-byte aligned
*
* @param dst [IN] destination address pointer
* @param destMax [IN] Max length of destination address memory
* @param src [IN] source address pointer
* @param count [IN] the number of byte to copy
* @param kind [IN] memcpy type
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind, aclrtStream stream) {
return ACL_ERROR_NONE;
}

Loading…
Cancel
Save