Merge pull request !1513 from zyli2020/refine_data_copy_in_multi_graphtags/v0.5.0-beta
| @@ -25,6 +25,7 @@ | |||||
| #include "device/kernel_runtime_manager.h" | #include "device/kernel_runtime_manager.h" | ||||
| #include "predict/predict.h" | #include "predict/predict.h" | ||||
| #include "common/utils.h" | #include "common/utils.h" | ||||
| #include "common/trans.h" | |||||
| #include "utils/context/ms_context.h" | #include "utils/context/ms_context.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -83,6 +84,49 @@ void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input | |||||
| runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); | runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); | ||||
| } | } | ||||
| void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | |||||
| const std::vector<tensor::TensorPtr> &inputs_const) const { | |||||
| std::vector<tensor::TensorPtr> inputs(inputs_const); | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto input_nodes = kernel_graph->inputs(); | |||||
| auto ms_context = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(ms_context); | |||||
| for (size_t i = 0; i < inputs.size(); ++i) { | |||||
| auto tensor = inputs[i]; | |||||
| MS_EXCEPTION_IF_NULL(tensor); | |||||
| auto input_node = input_nodes[i]; | |||||
| MS_EXCEPTION_IF_NULL(input_node); | |||||
| if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) { | |||||
| auto pk_node = input_node->cast<ParameterPtr>(); | |||||
| auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0); | |||||
| bool need_sync = false; | |||||
| if (ms_context->enable_pynative_infer()) { | |||||
| if (tensor->device_address().get() == nullptr || tensor->device_address() != device_address) { | |||||
| need_sync = true; | |||||
| } | |||||
| } else { | |||||
| if (tensor->is_dirty()) { | |||||
| need_sync = true; | |||||
| } else if (tensor->device_address() != device_address) { | |||||
| AnfAlgo::SetOutputAddr(tensor->device_address(), 0, pk_node.get()); | |||||
| need_sync = false; | |||||
| } | |||||
| } | |||||
| if (need_sync) { | |||||
| tensor->set_device_address(device_address); | |||||
| MS_EXCEPTION_IF_NULL(device_address); | |||||
| if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), | |||||
| LongToSize(tensor->data().nbytes()), tensor->data_type(), | |||||
| tensor->data_c(false))) { | |||||
| MS_LOG(EXCEPTION) << "SyncHostToDevice failed."; | |||||
| } | |||||
| } | |||||
| } | |||||
| tensor->set_dirty(false); | |||||
| } | |||||
| } | |||||
| void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const { | void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const { | ||||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | ||||
| MS_EXCEPTION_IF_NULL(runtime_instance); | MS_EXCEPTION_IF_NULL(runtime_instance); | ||||
| @@ -59,6 +59,9 @@ class GPUSession : public SessionBasic { | |||||
| void RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input_tensors, KernelGraph *kernel_graph) const; | void RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input_tensors, KernelGraph *kernel_graph) const; | ||||
| void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | |||||
| const std::vector<tensor::TensorPtr> &inputs_const) const override; | |||||
| void Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| }; | }; | ||||
| using GPUSessionPtr = std::shared_ptr<GPUSession>; | using GPUSessionPtr = std::shared_ptr<GPUSession>; | ||||
| @@ -89,7 +89,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne | |||||
| // if in paynative mode,data only copyed to host when user want to print data | // if in paynative mode,data only copyed to host when user want to print data | ||||
| auto ms_context = MsContext::GetInstance(); | auto ms_context = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(ms_context); | MS_EXCEPTION_IF_NULL(ms_context); | ||||
| if (ms_context->execution_mode() == kPynativeMode) { | |||||
| if (ms_context->execution_mode() == kPynativeMode || ms_context->device_target() == kGPUDevice) { | |||||
| tensor->set_device_address(AnfAlgo::GetMutableOutputAddr(node, output_index)); | tensor->set_device_address(AnfAlgo::GetMutableOutputAddr(node, output_index)); | ||||
| tensor->set_dirty(false); | tensor->set_dirty(false); | ||||
| } else if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, output_index), | } else if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, output_index), | ||||
| @@ -216,7 +216,7 @@ bool ValueToBool(const ValuePtr &v, bool *value) { | |||||
| } else if (v->isa<tensor::Tensor>()) { | } else if (v->isa<tensor::Tensor>()) { | ||||
| auto tensor = v->cast<tensor::TensorPtr>(); | auto tensor = v->cast<tensor::TensorPtr>(); | ||||
| MS_EXCEPTION_IF_NULL(tensor); | MS_EXCEPTION_IF_NULL(tensor); | ||||
| (void)tensor->data_sync(); | |||||
| bool *tensor_data = static_cast<bool *>(tensor->data_c()); | bool *tensor_data = static_cast<bool *>(tensor->data_c()); | ||||
| // maybe need to support if tensor is a bool array | // maybe need to support if tensor is a bool array | ||||
| auto vb = tensor_data[0]; | auto vb = tensor_data[0]; | ||||
| @@ -70,13 +70,15 @@ x3 = np.array([[1, 2], [3, 4], [5.0, 88.0]]).astype(np.float32) | |||||
| def test_status(): | def test_status(): | ||||
| ms_status = Net() | ms_status = Net() | ||||
| output1 = ms_status(Tensor(x1)) | output1 = ms_status(Tensor(x1)) | ||||
| output2 = ms_status(Tensor(x2)) | |||||
| output3 = ms_status(Tensor(x3)) | |||||
| expect1 = 1 | expect1 = 1 | ||||
| expect2 = 1 | |||||
| expect3 = 0 | |||||
| assert output1.asnumpy()[0] == expect1 | assert output1.asnumpy()[0] == expect1 | ||||
| output2 = ms_status(Tensor(x2)) | |||||
| expect2 = 1 | |||||
| assert output2.asnumpy()[0] == expect2 | assert output2.asnumpy()[0] == expect2 | ||||
| output3 = ms_status(Tensor(x3)) | |||||
| expect3 = 0 | |||||
| assert output3.asnumpy()[0] == expect3 | assert output3.asnumpy()[0] == expect3 | ||||
| @@ -86,13 +88,15 @@ def test_status(): | |||||
| def test_nan(): | def test_nan(): | ||||
| ms_isnan = Netnan() | ms_isnan = Netnan() | ||||
| output1 = ms_isnan(Tensor(x1)) | output1 = ms_isnan(Tensor(x1)) | ||||
| output2 = ms_isnan(Tensor(x2)) | |||||
| output3 = ms_isnan(Tensor(x3)) | |||||
| expect1 = [[False, False, True, False]] | expect1 = [[False, False, True, False]] | ||||
| expect2 = [[False, False, False, False]] | |||||
| expect3 = [[False, False], [False, False], [False, False]] | |||||
| assert (output1.asnumpy() == expect1).all() | assert (output1.asnumpy() == expect1).all() | ||||
| output2 = ms_isnan(Tensor(x2)) | |||||
| expect2 = [[False, False, False, False]] | |||||
| assert (output2.asnumpy() == expect2).all() | assert (output2.asnumpy() == expect2).all() | ||||
| output3 = ms_isnan(Tensor(x3)) | |||||
| expect3 = [[False, False], [False, False], [False, False]] | |||||
| assert (output3.asnumpy() == expect3).all() | assert (output3.asnumpy() == expect3).all() | ||||
| @@ -102,13 +106,15 @@ def test_nan(): | |||||
| def test_inf(): | def test_inf(): | ||||
| ms_isinf = Netinf() | ms_isinf = Netinf() | ||||
| output1 = ms_isinf(Tensor(x1)) | output1 = ms_isinf(Tensor(x1)) | ||||
| output2 = ms_isinf(Tensor(x2)) | |||||
| output3 = ms_isinf(Tensor(x3)) | |||||
| expect1 = [[False, False, False, False]] | expect1 = [[False, False, False, False]] | ||||
| expect2 = [[True, False, False, False]] | |||||
| expect3 = [[False, False], [False, False], [False, False]] | |||||
| assert (output1.asnumpy() == expect1).all() | assert (output1.asnumpy() == expect1).all() | ||||
| output2 = ms_isinf(Tensor(x2)) | |||||
| expect2 = [[True, False, False, False]] | |||||
| assert (output2.asnumpy() == expect2).all() | assert (output2.asnumpy() == expect2).all() | ||||
| output3 = ms_isinf(Tensor(x3)) | |||||
| expect3 = [[False, False], [False, False], [False, False]] | |||||
| assert (output3.asnumpy() == expect3).all() | assert (output3.asnumpy() == expect3).all() | ||||
| @@ -118,11 +124,13 @@ def test_inf(): | |||||
| def test_finite(): | def test_finite(): | ||||
| ms_isfinite = Netfinite() | ms_isfinite = Netfinite() | ||||
| output1 = ms_isfinite(Tensor(x1)) | output1 = ms_isfinite(Tensor(x1)) | ||||
| output2 = ms_isfinite(Tensor(x2)) | |||||
| output3 = ms_isfinite(Tensor(x3)) | |||||
| expect1 = [[True, True, False, True]] | expect1 = [[True, True, False, True]] | ||||
| expect2 = [[False, True, True, True]] | |||||
| expect3 = [[True, True], [True, True], [True, True]] | |||||
| assert (output1.asnumpy() == expect1).all() | assert (output1.asnumpy() == expect1).all() | ||||
| output2 = ms_isfinite(Tensor(x2)) | |||||
| expect2 = [[False, True, True, True]] | |||||
| assert (output2.asnumpy() == expect2).all() | assert (output2.asnumpy() == expect2).all() | ||||
| output3 = ms_isfinite(Tensor(x3)) | |||||
| expect3 = [[True, True], [True, True], [True, True]] | |||||
| assert (output3.asnumpy() == expect3).all() | assert (output3.asnumpy() == expect3).all() | ||||