Browse Source

sync device to device

tags/v1.6.0
hwjiaorui 4 years ago
parent
commit
a8ced77662
11 changed files with 89 additions and 36 deletions
  1. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/host/dynamic_reshape_kernel.cc
  2. +59
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
  3. +4
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
  4. +7
    -0
      mindspore/ccsrc/runtime/device/kernel_runtime.cc
  5. +1
    -13
      mindspore/ccsrc/runtime/framework/actor/actor_common.cc
  6. +0
    -2
      mindspore/ccsrc/runtime/framework/actor/actor_common.h
  7. +1
    -1
      mindspore/ccsrc/runtime/framework/actor/control_flow/exit_actor.cc
  8. +3
    -8
      mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
  9. +5
    -3
      mindspore/ccsrc/runtime/framework/actor/output_actor.cc
  10. +3
    -1
      mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc
  11. +4
    -2
      mindspore/core/ir/device_sync.h

+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/host/dynamic_reshape_kernel.cc View File

@@ -97,8 +97,8 @@ void DynamicReshapeKernel::Execute() {
size_t input_size_byte = LongToSize(arr_prod) * abstract::TypeIdSize(type_x);
auto output_addr = AnfAlgo::GetOutputAddr(cnode, 0);
MS_EXCEPTION_IF_NULL(output_addr);
if (!output_addr->SyncDeviceToDevice(output_shapes, input_size_byte, address_x->type_id(), address_x->GetPtr(),
address_x->format())) {
if (!output_addr->SyncDeviceToDeviceWithSameFormatType(output_shapes, input_size_byte, address_x->type_id(),
address_x->GetPtr(), address_x->format())) {
MS_LOG(EXCEPTION) << "Host Reshape sync device to device failed.";
}
MS_LOG(INFO) << "Execute host ReshapeKernel End";


+ 59
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc View File

@@ -79,6 +79,12 @@ void SyncMemory(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind)
if (size == 0) {
return;
}
if (dst == nullptr) {
MS_LOG(EXCEPTION) << "dst ptr is null, please check the address is set correctly.";
}
if (src == nullptr) {
MS_LOG(EXCEPTION) << "src ptr is null, please check the address is set correctly.";
}
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
@@ -433,8 +439,8 @@ bool AscendDeviceAddress::SyncHostToDevice(const ShapeVector &shape, size_t size
return sync_ok;
}

bool AscendDeviceAddress::SyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
const std::string &format) const {
bool AscendDeviceAddress::SyncDeviceToDeviceWithSameFormatType(const ShapeVector &shape, size_t size, TypeId type,
const void *src_ptr, const std::string &format) const {
if (type_id_ > kMonadTypeBegin && type_id_ < kMonadTypeEnd) {
return true;
}
@@ -457,6 +463,57 @@ bool AscendDeviceAddress::SyncDeviceToDevice(const ShapeVector &shape, size_t si
return true;
}

bool AscendDeviceAddress::SyncDeviceToDeviceWithDiffFormatType(const DeviceSync *src_device_addr) const {
MS_EXCEPTION_IF_NULL(src_device_addr);
if (type_id_ > kMonadTypeBegin && type_id_ < kMonadTypeEnd) {
return true;
}

auto src_device_address = dynamic_cast<const AscendDeviceAddress *>(src_device_addr);
MS_EXCEPTION_IF_NULL(src_device_address);
if (size_ < src_device_address->GetSize()) {
MS_LOG(ERROR) << "Src size is greater than det size, src size is: " << src_device_address->GetSize()
<< ", dst size is: " << size_;
return false;
}
BindDevice();
auto host_shape = src_device_address->host_shape();
if (host_shape.empty()) {
MS_LOG(ERROR) << "host shape is empty, please check whether the host shape of source device address"
<< src_device_address << " is set.";
return false;
}
auto host_tensor = std::make_shared<tensor::Tensor>(src_device_address->type_id(), host_shape);
auto host_tensor_size = LongToSize(host_tensor->data().nbytes());
auto host_tensor_type = host_tensor->data_type();
if (!src_device_address->SyncDeviceToHost(host_shape, host_tensor_size, host_tensor_type, host_tensor->data_c())) {
MS_LOG(ERROR) << "Sync device to device failed at the stage of sync device to intermediate Tensor.";
return false;
}
if (!SyncHostToDevice(host_shape, host_tensor_size, host_tensor_type, host_tensor->data_c(),
host_tensor->device_info().host_format_)) {
MS_LOG(ERROR) << "Sync device to device failed at the stage of sync intermediate tensor to device.";
return false;
}
return true;
}

bool AscendDeviceAddress::SyncDeviceToDevice(const DeviceSync *src_device_addr) const {
MS_EXCEPTION_IF_NULL(src_device_addr);
auto src_device_address = dynamic_cast<const AscendDeviceAddress *>(src_device_addr);
if (format_ == src_device_address->format() && type_id_ == src_device_address->type_id()) {
return SyncDeviceToDeviceWithSameFormatType(ShapeVector(), src_device_address->GetSize(),
src_device_address->type_id(), src_device_address->GetPtr(),
src_device_address->format());
} else {
MS_LOG(WARNING) << "Can not copy from device to device directly, format or type is different, src(format:"
<< src_device_address->format() << ", type_id:" << TypeIdLabel(src_device_address->type_id())
<< "), dst(format:" << format_ << ", type_id:" << TypeIdLabel(type_id_)
<< ", use the intermediate Tensor copy instead.";
return SyncDeviceToDeviceWithDiffFormatType(src_device_addr);
}
}

bool AscendDeviceAddress::AsyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
const std::string &format) const {
MS_LOG(INFO) << "AsyncDeviceToDevice, dst(format:" << format_ << ", type_id:" << TypeIdLabel(type_id_)


+ 4
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h View File

@@ -52,8 +52,10 @@ class AscendDeviceAddress : public DeviceAddress {
const std::string &format = "DefaultFormat") const override;
bool AsyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
const std::string &format) const override;
bool SyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
const std::string &format) const override;
bool SyncDeviceToDeviceWithSameFormatType(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
const std::string &format) const override;
bool SyncDeviceToDeviceWithDiffFormatType(const DeviceSync *src_device_addr) const override;
bool SyncDeviceToDevice(const DeviceSync *src_device_addr) const override;
void ClearDeviceMemory() override;
DeviceAddressType DeviceType() const override { return DeviceAddressType::kAscend; }
#ifndef ENABLE_SECURITY


+ 7
- 0
mindspore/ccsrc/runtime/device/kernel_runtime.cc View File

@@ -615,6 +615,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
MS_EXCEPTION_IF_NULL(address.addr);
device_address = CreateDeviceAddress(address.addr, address.size, AnfAlgo::GetOutputFormat(item, index),
output_type_id, {item, index});
device_address->set_host_shape(trans::GetRuntimePaddingShape(item, index));
AnfAlgo::SetOutputAddr(device_address, index, item.get());
continue;
}
@@ -644,6 +645,7 @@ void KernelRuntime::GetDeviceAddress(const AnfNodePtr &item,
TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(item, index);
*device_address =
CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id, {item, index});
(*device_address)->set_host_shape(trans::GetRuntimePaddingShape(item, index));
MS_LOG(INFO) << "Assign Static Memory for Input node, size:" << tensor_size
<< " node:" << item->fullname_with_scope() << " index: " << index;
if (mem_manager_->MallocMem(kStaticMem, tensor_size, *device_address, graph_id) == nullptr) {
@@ -699,6 +701,9 @@ void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph &graph) {
MS_LOG(DEBUG) << "REF address is not same, ref node output need address update";
MS_LOG(DEBUG) << "REF origin op is " << origin_pair.first->DebugString() << ", output index is "
<< origin_pair.second << ", cur op is " << kernel->DebugString() << ", out index is " << i;
if (!cur_node_output_addr->host_shape().empty()) {
origin_node_output_addr->set_host_shape(cur_node_output_addr->host_shape());
}
AnfAlgo::SetOutputAddr(origin_node_output_addr, i, kernel.get());
}
}
@@ -763,6 +768,7 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNode
} else {
address->set_ptr(output_ptr);
}
address->set_host_shape(trans::GetRuntimePaddingShape(node, j));
AnfAlgo::SetOutputAddr(address, j, node.get());
output_ptr += align_size_list[j];
}
@@ -944,6 +950,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
auto output_format = AnfAlgo::GetOutputFormat(value_node, output_idx);
DeviceAddressPtr address =
CreateDeviceAddress(nullptr, node_size, output_format, output_type_id, {value_node, output_idx});
address->set_host_shape(trans::GetRuntimePaddingShape(value_node, output_idx));
address->set_from_persistent_mem(true);
MS_EXCEPTION_IF_NULL(address);
if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) &&


+ 1
- 13
mindspore/ccsrc/runtime/framework/actor/actor_common.cc View File

@@ -153,9 +153,7 @@ bool Copy(const DeviceTensor *dst_device_tensor, const DeviceTensor *src_device_
// Other device tensor copy to CPU device tensor.
return src_device_tensor->SyncDeviceToHost(copy_size, dst_device_tensor->GetMutablePtr());
} else if (dst_device_tensor->DeviceType() == src_device_tensor->DeviceType()) {
return dst_device_tensor->SyncDeviceToDevice(ShapeVector(), src_device_tensor->GetSize(),
src_device_tensor->type_id(), src_device_tensor->GetPtr(),
src_device_tensor->format());
return dst_device_tensor->SyncDeviceToDevice(src_device_tensor);
} else {
MS_LOG(ERROR) << "Invalid device type, src device type: " << src_device_tensor->DeviceType()
<< ", dst device type: " << dst_device_tensor->DeviceType();
@@ -292,15 +290,5 @@ std::string FetchActorName(KernelTransformType kernel_type, const std::string &a
}
return actor_name;
}

bool NeedSyncByTensor(const DeviceTensor *dst_device_addr, const DeviceTensor *src_device_addr) {
MS_EXCEPTION_IF_NULL(dst_device_addr);
MS_EXCEPTION_IF_NULL(src_device_addr);
if (src_device_addr->DeviceType() != dst_device_addr->DeviceType()) {
return false;
}
return (src_device_addr->format() != dst_device_addr->format() ||
src_device_addr->type_id() != dst_device_addr->type_id());
}
} // namespace runtime
} // namespace mindspore

+ 0
- 2
mindspore/ccsrc/runtime/framework/actor/actor_common.h View File

@@ -210,8 +210,6 @@ KernelTransformType FetchKernelTransformType(const AnfNodePtr &node, const Kerne
GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline);
std::string FetchActorName(KernelTransformType kernel_type, const std::string &actor_set_name,
const AnfNodePtr &node = nullptr, const KernelGraphPtr &graph = nullptr);

bool NeedSyncByTensor(const DeviceTensor *dst_device_tensor, const DeviceTensor *src_device_tensor);
} // namespace runtime
} // namespace mindspore



+ 1
- 1
mindspore/ccsrc/runtime/framework/actor/control_flow/exit_actor.cc View File

@@ -166,7 +166,7 @@ void ExitActor::CopyDeviceAddress(OpContext<DeviceTensor> *const context) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(GraphExecutionStrategy::kPipeline, *context, *device_contexts_[i],
GetAID().Name(), new_device_tensor->GetSize());
}
if (!new_device_tensor->SyncDeviceToDevice(
if (!new_device_tensor->SyncDeviceToDeviceWithSameFormatType(
trans::GetRuntimePaddingShape(node_with_index.first, node_with_index.second),
input_device_tensor->GetSize(), input_device_tensor->type_id(), input_device_tensor->GetPtr(),
input_device_tensor->format())) {


+ 3
- 8
mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc View File

@@ -242,15 +242,10 @@ void HostQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *cons
if (tensor_device_address.get() == device_tensor) {
continue;
}

if (NeedSyncByTensor(device_tensor, tensor_device_address.get())) {
host_tensor->data_sync(false);
} else {
if ((!Copy(device_tensor, tensor_device_address.get()))) {
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "Copy data failed.");
}
continue;
if ((!Copy(device_tensor, tensor_device_address.get()))) {
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "Copy data failed.");
}
continue;
}

// Sync data from host_tensor to device_tensor.


+ 5
- 3
mindspore/ccsrc/runtime/framework/actor/output_actor.cc View File

@@ -56,6 +56,7 @@ void OutputActor::RunOpControl(AID *const, OpContext<DeviceTensor> *const contex
if (outputs_[device_tensor_store_key.first] == nullptr) {
SET_OPCONTEXT_FAIL_RET_WITH_ERROR(*context, "Create output tensor failed.");
}
output_nodes_[device_tensor_store_key.first] = {device_tensor_store_key.second, 0};
}

current_outputs_num_ = 0;
@@ -175,6 +176,7 @@ void OutputActor::UpdateOutputDeviceAddress() {
auto node_with_index = device_tensor->GetNodeIndex();
tensor_device_address->SetNodeIndex(node_with_index.first, node_with_index.second);
tensor_device_address->set_from_persistent_mem(device_tensor->from_persistent_mem());
tensor_device_address->set_host_shape(device_tensor->host_shape());
// The outputs may have the same output node, so need skip when the node has been done.
if (device_tensor->GetPtr() == nullptr) {
continue;
@@ -190,9 +192,9 @@ void OutputActor::UpdateOutputDeviceAddress() {
<< output_node->fullname_with_scope() << ", alloc size: " << tensor_device_address->GetSize()
<< "B.";
}
if (!tensor_device_address->SyncDeviceToDevice(trans::GetRuntimePaddingShape(output_node, output_index),
device_tensor->GetSize(), device_tensor->type_id(),
device_tensor->GetPtr(), device_tensor->format())) {
if (!tensor_device_address->SyncDeviceToDeviceWithSameFormatType(
trans::GetRuntimePaddingShape(output_node, output_index), device_tensor->GetSize(),
device_tensor->type_id(), device_tensor->GetPtr(), device_tensor->format())) {
MS_LOG(EXCEPTION) << "Sync device to device failed, device type: " << tensor_device_address->DeviceType();
}
} else {


+ 3
- 1
mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc View File

@@ -258,8 +258,10 @@ void AscendDeviceContext::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_g
if (json_parser.e2e_dump_enabled() || json_parser.async_dump_enabled()) {
std::string root_dir = json_parser.path() + "/rank_" + std::to_string(rank_id_);
std::string target_dir = root_dir + "/graphs";
std::string cst_file_dir = GenerateDumpPath(graph->root_graph_id(), rank_id_, true);
std::string ir_file_path = target_dir + "/" + "ms_output_" + final_graph + ".ir";
DumpIRProtoWithSrcInfo(graph, final_graph, target_dir, kDebugWholeStack);
DumpConstantInfo(graph, cst_file_dir);
DumpIR("trace_code_graph", graph, true, kWholeStack, ir_file_path);
DumpGraphExeOrder("ms_execution_order_graph_" + std::to_string(graph->graph_id()) + ".csv", root_dir,
graph->execution_order());
@@ -453,8 +455,8 @@ void AscendDeviceContext::AssignOutputNopNodeDeviceAddress(const KernelGraphPtr
auto output_type = AnfAlgo::GetOutputDeviceDataType(output, 0);
auto device_address = CreateDeviceAddress(const_cast<void *>(ptr), size, output_format, output_type);
device_address->set_is_ptr_persisted(true);
device_address->set_host_shape(trans::GetRuntimePaddingShape(output, 0));
AnfAlgo::SetOutputAddr(device_address, 0, output.get());

AnfAlgo::SetNodeAttr(kAttrSkipNopOpAddr, MakeValue(false), output);
MS_LOG(INFO) << "Assign device address to output nop node " << output->fullname_with_scope();
}


+ 4
- 2
mindspore/core/ir/device_sync.h View File

@@ -39,10 +39,12 @@ class DeviceSync {
virtual bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const = 0;
virtual bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr,
const std::string &format = "DefaultFormat") const = 0;
virtual bool SyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
const std::string &format) const {
virtual bool SyncDeviceToDevice(const DeviceSync *src_device_addr) const { return true; }
virtual bool SyncDeviceToDeviceWithSameFormatType(const ShapeVector &shape, size_t size, TypeId type,
const void *src_ptr, const std::string &format) const {
return true;
}
virtual bool SyncDeviceToDeviceWithDiffFormatType(const DeviceSync *src_device_addr) const { return true; }
virtual bool AsyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
const std::string &format) const {
return true;


Loading…
Cancel
Save