Browse Source

!30660 Add host_shape to CreateDeviceAddress API Input

Merge pull request !30660 from jiaorui/host_shape
r1.7
i-robot Gitee 4 years ago
parent
commit
2ed51f3cc7
No known key found for this signature in database GPG Key ID: 173E9B9CA92EEF8F
17 changed files with 64 additions and 44 deletions
  1. +3
    -3
      mindspore/ccsrc/backend/graph_compiler/backend.cc
  2. +1
    -1
      mindspore/ccsrc/debug/data_dump/e2e_dump.cc
  3. +2
    -2
      mindspore/ccsrc/debug/debugger/debugger_utils.cc
  4. +12
    -0
      mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.cc
  5. +2
    -5
      mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.h
  6. +1
    -1
      mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc
  7. +2
    -2
      mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.h
  8. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.cc
  9. +2
    -2
      mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.h
  10. +3
    -2
      mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc
  11. +6
    -4
      mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc
  12. +5
    -3
      mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc
  13. +3
    -2
      mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc
  14. +3
    -2
      mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc
  15. +11
    -9
      mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc
  16. +6
    -4
      mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc
  17. +1
    -1
      mindspore/ccsrc/runtime/hardware/device_context.h

+ 3
- 3
mindspore/ccsrc/backend/graph_compiler/backend.cc View File

@@ -245,10 +245,10 @@ void ClearGraphDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *d
continue;
}
MS_EXCEPTION_IF_NULL(device_context);
auto new_device_address = device_context->CreateDeviceAddress(
nullptr, device_address->GetSize(), device_address->format(), device_address->type_id());
auto new_device_address =
device_context->CreateDeviceAddress(nullptr, device_address->GetSize(), device_address->format(),
device_address->type_id(), device_address->host_shape());
MS_EXCEPTION_IF_NULL(new_device_address);
new_device_address->set_host_shape(device_address->host_shape());
new_device_address->set_original_ref_count(device_address->original_ref_count());
new_device_address->ResetRefCount();
if (is_gradient_out) {


+ 1
- 1
mindspore/ccsrc/debug/data_dump/e2e_dump.cc View File

@@ -286,7 +286,7 @@ std::shared_ptr<device::DeviceAddress> CreateAscendDeviceAddress(const KernelLau
device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({kAscendDevice, device_id});
auto format = kOpFormat_DEFAULT;
MS_EXCEPTION_IF_NULL(addr_ptr);
return device_context->CreateDeviceAddress(addr_ptr->addr, addr_ptr->size, format, type);
return device_context->CreateDeviceAddress(addr_ptr->addr, addr_ptr->size, format, type, ShapeVector());
}

void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,


+ 2
- 2
mindspore/ccsrc/debug/debugger/debugger_utils.cc View File

@@ -86,7 +86,7 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
}

auto format = kOpFormat_DEFAULT;
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type);
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type, ShapeVector());
string input_tensor_name = input_kernel_name + ':' + "0";
ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
auto ret = device_addr->LoadMemToHost(input_tensor_name, UintToInt(exec_order), format, int_shapes, type, 0, true,
@@ -122,7 +122,7 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uin
}

auto format = kOpFormat_DEFAULT;
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type);
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type, ShapeVector());
string tensor_name = kernel_name + ':' + std::to_string(j);
ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
auto ret =


+ 12
- 0
mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.cc View File

@@ -934,6 +934,18 @@ void AscendDeviceContext::InsertEventBeforeRunTask(const KernelGraphPtr &graph)
graph_event_[graph->graph_id()] = compute_event;
}

DeviceAddressPtr AscendDeviceContext::CreateDeviceAddress(void *const device_ptr, size_t device_size,
const string &format, TypeId type_id,
const ShapeVector &shape) const {
auto device_address = std::make_shared<AscendDeviceAddress>(
device_ptr, device_size, format, type_id, device_context_key_.device_name_, device_context_key_.device_id_);
if (shape.empty()) {
MS_LOG(WARNING) << "shape size is empty.";
}
device_address->set_host_shape(shape);
return device_address;
}

MS_REGISTER_DEVICE(kAscendDevice, AscendDeviceContext);
} // namespace ascend
} // namespace device


+ 2
- 5
mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.h View File

@@ -91,11 +91,8 @@ class AscendDeviceContext : public DeviceContext {
const std::vector<size_t> &size_list) const override;

// Create concrete device address according different device type.
DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
TypeId type_id) const override {
return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id,
device_context_key_.device_name_, device_context_key_.device_id_);
}
DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format, TypeId type_id,
const ShapeVector &shape = ShapeVector()) const override;

// Get device address type according different device type, such GPU, Ascend.
DeviceAddressType GetDeviceAddressType() const override { return DeviceAddressType::kAscend; }


+ 1
- 1
mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc View File

@@ -119,7 +119,7 @@ void CPUDeviceContext::FreeMemory(void *const ptr) const {
}

DeviceAddressPtr CPUDeviceContext::CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
TypeId type_id) const {
TypeId type_id, const ShapeVector &shape) const {
return std::make_shared<CPUDeviceAddress>(device_ptr, device_size, format, type_id, device_context_key_.device_name_,
device_context_key_.device_id_);
}


+ 2
- 2
mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.h View File

@@ -43,8 +43,8 @@ class CPUDeviceContext : public DeviceContext {
void *AllocateMemory(size_t size) const override;
void FreeMemory(void *const ptr) const override;

DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
TypeId type_id) const override;
DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format, TypeId type_id,
const ShapeVector &shape = ShapeVector()) const override;
DeviceAddressType GetDeviceAddressType() const override { return DeviceAddressType::kCPU; }

void OptimizeGraph(const KernelGraphPtr &graph) const override;


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.cc View File

@@ -233,7 +233,7 @@ void GPUDeviceContext::FreeMemory(void *const ptr) const {
}

DeviceAddressPtr GPUDeviceContext::CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
TypeId type_id) const {
TypeId type_id, const ShapeVector &shape) const {
return std::make_shared<GPUDeviceAddress>(device_ptr, device_size, format, type_id, device_context_key_.device_name_,
device_context_key_.device_id_);
}


+ 2
- 2
mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.h View File

@@ -49,8 +49,8 @@ class GPUDeviceContext : public DeviceContext {
void *AllocateMemory(size_t size) const override;
void FreeMemory(void *const ptr) const override;

DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
TypeId type_id) const override;
DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format, TypeId type_id,
const ShapeVector &shape = ShapeVector()) const override;
DeviceAddressType GetDeviceAddressType() const override { return DeviceAddressType::kGPU; }

// Optimize the kernel graph for graph mode.


+ 3
- 2
mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc View File

@@ -172,8 +172,9 @@ void ExitActor::CopyDeviceAddress(OpContext<DeviceTensor> *const context) {
}
MS_EXCEPTION_IF_NULL(device_contexts_[i]);
// Create the new device tensor to take over the input_device_tensors which are the outputs of kernel graphs.
auto new_device_tensor = device_contexts_[i]->CreateDeviceAddress(
nullptr, input_device_tensor->GetSize(), input_device_tensor->format(), input_device_tensor->type_id());
auto new_device_tensor =
device_contexts_[i]->CreateDeviceAddress(nullptr, input_device_tensor->GetSize(), input_device_tensor->format(),
input_device_tensor->type_id(), input_device_tensor->host_shape());
MS_EXCEPTION_IF_NULL(new_device_tensor);
(void)created_device_tensors_.emplace_back(new_device_tensor);
(void)new_device_tensors.emplace_back(new_device_tensor.get());


+ 6
- 4
mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc View File

@@ -467,8 +467,9 @@ void DataPrepareActor::PrepareDataForStepMode(const std::vector<std::vector<Tens
output_type_id = common::AnfAlgo::GetOutputInferDataType(input_node, 0);
}
size_t tensor_size = AnfAlgo::GetOutputTensorMemSize(input_node, 0);
auto device_address = device_context->CreateDeviceAddress(
nullptr, tensor_size, AnfAlgo::GetOutputFormat(input_node, 0), output_type_id);
auto device_address =
device_context->CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(input_node, 0),
output_type_id, trans::GetRuntimePaddingShape(input_node, 0));
MS_EXCEPTION_IF_NULL(device_address);
AnfAlgo::SetOutputAddr(device_address, 0, input_node.get());
device_address->SetNodeIndex(input_node, 0);
@@ -668,8 +669,9 @@ void DataPrepareActor::PrepareDataForWeightNode(const AnfNodePtr &backend_node,
// The step mode can't reuse the device tensor, because other actors may use the device tensor in step mode.
if ((strategy_ == GraphExecutionStrategy::kStep) ||
(device_tensor->DeviceType() != device_context->GetDeviceAddressType())) {
host_tensor_address = device_context->CreateDeviceAddress(nullptr, device_tensor->GetSize(),
device_tensor->format(), device_tensor->type_id());
host_tensor_address =
device_context->CreateDeviceAddress(nullptr, device_tensor->GetSize(), device_tensor->format(),
device_tensor->type_id(), device_tensor->host_shape());
host_tensor_address->set_from_persistent_mem(tensor->is_parameter());
} else {
host_tensor_address = device_tensor;


+ 5
- 3
mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc View File

@@ -127,7 +127,8 @@ void KernelActor::FetchWorkspaceDeviceTensor() {
launch_info_.workspaces_.erase(launch_info_.workspaces_.end() - size, launch_info_.workspaces_.end());
} else if (launch_info_.workspaces_.size() < workspace_sizes.size()) {
for (size_t i = launch_info_.workspaces_.size(); i < workspace_sizes.size(); ++i) {
auto device_address = device_contexts_[0]->CreateDeviceAddress(nullptr, workspace_sizes[i], "", kTypeUnknown);
auto device_address =
device_contexts_[0]->CreateDeviceAddress(nullptr, workspace_sizes[i], "", kTypeUnknown, ShapeVector());
MS_LOG(DEBUG) << "Create addr for node:" << common::AnfAlgo::GetNodeDebugString(kernel_)
<< " addr:" << device_address;
AnfAlgo::SetWorkspaceAddr(device_address, i, kernel_.get()); // set to kernel_info
@@ -309,8 +310,9 @@ void KernelActor::CopyInputDeviceTensor(const OpData<DeviceTensor> *input_data,
SET_OPCONTEXT_FAIL_RET_WITH_ERROR_BY_STRATEGY(strategy_, *context, "The input index is of range.");
}
if (copy_input_device_tensors_[input_data->index_] == nullptr) {
copy_input_device_tensors_[input_data->index_] = device_contexts_[0]->CreateDeviceAddress(
nullptr, device_tensor->GetSize(), device_tensor->format(), device_tensor->type_id());
copy_input_device_tensors_[input_data->index_] =
device_contexts_[0]->CreateDeviceAddress(nullptr, device_tensor->GetSize(), device_tensor->format(),
device_tensor->type_id(), device_tensor->host_shape());
}
auto &new_device_tensor = copy_input_device_tensors_[input_data->index_];
MS_EXCEPTION_IF_NULL(new_device_tensor);


+ 3
- 2
mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc View File

@@ -149,8 +149,9 @@ TensorPtr OutputActor::CreateOutputTensor(const AnfNodePtr &output_node, size_t
if (output_node_to_tensor_device_address_.count({output_node, output_index}) > 0) {
tensor->set_device_address(output_node_to_tensor_device_address_[{output_node, output_index}]);
} else {
auto tensor_device_address = device_context->CreateDeviceAddress(nullptr, device_tensor->GetSize(),
device_tensor->format(), device_tensor->type_id());
auto tensor_device_address =
device_context->CreateDeviceAddress(nullptr, device_tensor->GetSize(), device_tensor->format(),
device_tensor->type_id(), device_tensor->host_shape());
MS_EXCEPTION_IF_NULL(tensor_device_address);
tensor->set_device_address(tensor_device_address);
output_node_to_tensor_device_address_[{output_node, output_index}] = tensor_device_address;


+ 3
- 2
mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc View File

@@ -283,7 +283,7 @@ void CreateDeviceTensorForValueNode(const KernelWithIndex &front_node_with_index
// Create device tensor.
std::string output_format = AnfAlgo::GetOutputFormat(backend_node, 0);
device::DeviceAddressPtr address =
device_context->CreateDeviceAddress(nullptr, tensor_size, output_format, output_type_id);
device_context->CreateDeviceAddress(nullptr, tensor_size, output_format, output_type_id, ShapeVector());
MS_EXCEPTION_IF_NULL(address);
MS_LOG(DEBUG) << "Create address for node:" << common::AnfAlgo::GetNodeDebugString(front_node) << " addr:" << address
<< " size:" << tensor_size;
@@ -311,7 +311,8 @@ void CreateDeviceTensorForFrontNode(const KernelWithIndex &front_node_with_index
size_t size = AnfAlgo::GetOutputTensorMemSize(node, 0);

// Create device tensor.
device::DeviceAddressPtr address = device_context->CreateDeviceAddress(nullptr, size, kOpFormat_DEFAULT, type_id);
device::DeviceAddressPtr address =
device_context->CreateDeviceAddress(nullptr, size, kOpFormat_DEFAULT, type_id, ShapeVector());
MS_EXCEPTION_IF_NULL(address);
MS_LOG(INFO) << "Create address for node that has no corresponding backend node:"
<< common::AnfAlgo::GetNodeDebugString(node) << " addr:" << address << " size:" << size


+ 11
- 9
mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc View File

@@ -100,10 +100,10 @@ void CreateParameterDeviceAddress(const DeviceContext *device_context, const Ker
}

size_t tensor_size = AnfAlgo::GetOutputTensorMemSize(item, index);
auto device_address = device_context->CreateDeviceAddress(nullptr, tensor_size,
AnfAlgo::GetOutputFormat(item, index), output_type_id);
auto device_address =
device_context->CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id,
trans::GetRuntimePaddingShape(item, index));
device_address->set_from_persistent_mem(item->isa<Parameter>());
device_address->set_host_shape(trans::GetRuntimePaddingShape(item, index));
MS_LOG(DEBUG) << "Create addr for node:" << common::AnfAlgo::GetNodeDebugString(item)
<< " addr:" << device_address;
AnfAlgo::SetOutputAddr(device_address, index, item.get());
@@ -144,8 +144,8 @@ void CreateDeviceAddressForTensorValue(const DeviceContext *device_context, cons
}
std::string output_format = AnfAlgo::GetOutputFormat(value_node, output_idx);

device::DeviceAddressPtr address =
device_context->CreateDeviceAddress(nullptr, tensor_size, output_format, output_type_id);
device::DeviceAddressPtr address = device_context->CreateDeviceAddress(
nullptr, tensor_size, output_format, output_type_id, trans::GetRuntimePaddingShape(value_node, output_idx));
MS_LOG(DEBUG) << "Create addr for node:" << common::AnfAlgo::GetNodeDebugString(value_node) << " addr:" << address;
MS_EXCEPTION_IF_NULL(address);
address->set_from_persistent_mem(true);
@@ -169,7 +169,8 @@ void CreateValueNodeDeviceAddress(const DeviceContext *device_context, const Ker
} else if (node_value->isa<StringImm>()) {
auto value = GetValue<std::string>(node_value);
size_t tensor_size = value.size();
auto address = device_context->CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeUInt8);
auto address =
device_context->CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeUInt8, ShapeVector());
MS_EXCEPTION_IF_NULL(address);
address->set_from_persistent_mem(true);
MS_LOG(DEBUG) << "Create addr for node:" << common::AnfAlgo::GetNodeDebugString(value_node)
@@ -199,8 +200,8 @@ void CreateKernelOutputDeviceAddress(const DeviceContext *device_context, const
auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
auto address_size = AnfAlgo::GetOutputTensorMemSize(kernel, i);
auto device_address = device_context->CreateDeviceAddress(nullptr, address_size, output_format, output_type);
device_address->set_host_shape(trans::GetRuntimePaddingShape(kernel, i));
auto device_address = device_context->CreateDeviceAddress(nullptr, address_size, output_format, output_type,
trans::GetRuntimePaddingShape(kernel, i));
if (is_gradient_out) {
device_address->set_from_persistent_mem(true);
}
@@ -227,7 +228,8 @@ void CreateKernelWorkspaceDeviceAddress(const DeviceContext *device_context, con
if (AnfAlgo::WorkspaceAddrExist(kernel, i)) {
break;
}
auto device_address = device_context->CreateDeviceAddress(nullptr, workspace_sizes[i], "", kTypeUnknown);
auto device_address =
device_context->CreateDeviceAddress(nullptr, workspace_sizes[i], "", kTypeUnknown, ShapeVector());
MS_LOG(DEBUG) << "Create addr for node:" << common::AnfAlgo::GetNodeDebugString(kernel)
<< " addr:" << device_address;
AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get());


+ 6
- 4
mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc View File

@@ -2046,8 +2046,9 @@ void GraphScheduler::PersistDeviceTensor(const GraphCompilerInfo &graph_compiler
if (DeviceTensorStore::GetInstance().Fetch(front_node.get(), device_context->GetDeviceAddressType()) == nullptr) {
MS_LOG(WARNING) << "Fetch no device tensor store by:" << front_node->fullname_with_scope()
<< ", type:" << device_context->GetDeviceAddressType();
auto other_type_device_tensor = device_context->CreateDeviceAddress(
nullptr, device_tensor->GetSize(), device_tensor->format(), device_tensor->type_id());
auto other_type_device_tensor =
device_context->CreateDeviceAddress(nullptr, device_tensor->GetSize(), device_tensor->format(),
device_tensor->type_id(), device_tensor->host_shape());
other_type_device_tensor->SetNodeIndex(input_node, 0);
other_type_device_tensor->set_from_persistent_mem(input_node->isa<Parameter>());
AddDeviceTensorStore(front_node.get(), other_type_device_tensor);
@@ -2089,8 +2090,9 @@ void GraphScheduler::PersistDeviceTensorForRootGraphControlNode(const GraphCompi
auto sub_device_tensor = AnfAlgo::GetMutableOutputAddr(backend_node, 0, false);
MS_EXCEPTION_IF_NULL(sub_device_tensor);

auto new_device_tensor = device_context->CreateDeviceAddress(
nullptr, sub_device_tensor->GetSize(), sub_device_tensor->format(), sub_device_tensor->type_id());
auto new_device_tensor =
device_context->CreateDeviceAddress(nullptr, sub_device_tensor->GetSize(), sub_device_tensor->format(),
sub_device_tensor->type_id(), sub_device_tensor->host_shape());
MS_EXCEPTION_IF_NULL(new_device_tensor);
new_device_tensor->SetNodeIndex(backend_node, 0);
new_device_tensor->set_is_ptr_persisted(sub_device_tensor->is_ptr_persisted());


+ 1
- 1
mindspore/ccsrc/runtime/hardware/device_context.h View File

@@ -88,7 +88,7 @@ class DeviceContext {

// Create concrete device address according different device type.
virtual DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
TypeId type_id) const = 0;
TypeId type_id, const ShapeVector &shape) const = 0;

// Get device address type according different device type, such GPU, Ascend.
virtual DeviceAddressType GetDeviceAddressType() const = 0;


Loading…
Cancel
Save