Merge pull request !27427 from hwjiaorui/mindrt-bug-fixtags/v1.6.0
| @@ -298,8 +298,8 @@ void TbeKernelCompileManager::ParseTargetJobStatus(const nlohmann::json &json, T | |||
| MS_LOG(EXCEPTION) << "Parse query result error."; | |||
| } | |||
| auto json_name = GetJsonValue<std::string>(query_result, kFusionOpName); | |||
| auto target_job_id = query_result.at(kJobId); | |||
| auto status = query_result.at(kStatus); | |||
| auto target_job_id = GetJsonValue<int>(query_result, kJobId); | |||
| auto status = GetJsonValue<std::string>(query_result, kStatus); | |||
| auto all_logs = GetJsonValue<std::vector<nlohmann::json>>(query_result, kProcessInfo); | |||
| auto message = FilterExceptionMessage(all_logs); | |||
| // save job status and exception message | |||
| @@ -354,8 +354,8 @@ void TbeKernelCompileManager::JsonAssemble(const std::string &job_type, const nl | |||
| (*dst_json)[kJobContent] = job_info; | |||
| } else if (job_type == kQuery) { | |||
| nlohmann::json content; | |||
| content[kSourceId] = src_json[kSourceId]; | |||
| content[kJobId] = src_json[kJobId]; | |||
| content[kSourceId] = GetJsonValue<int>(src_json, kSourceId); | |||
| content[kJobId] = GetJsonValue<int>(src_json, kJobId); | |||
| (*dst_json)[kJobContent] = content; | |||
| } else { | |||
| (*dst_json)[kJobContent] = src_json; | |||
| @@ -438,7 +438,8 @@ void TbeKernelCompileManager::SaveIOSizeInfo(const nlohmann::json &json, const s | |||
| std::vector<size_t> input_size_list; | |||
| std::vector<size_t> output_size_list; | |||
| if (!output_nodes.empty()) { | |||
| (void)TbeKernelBuild::GetIOSize(json[kOpList], output_nodes, &input_size_list, &output_size_list); | |||
| (void)TbeKernelBuild::GetIOSize(GetJsonValue<nlohmann::json>(json, kOpList), output_nodes, &input_size_list, | |||
| &output_size_list); | |||
| } else { | |||
| (void)TbeKernelBuild::GetIOSize(json, &input_size_list, &output_size_list); | |||
| } | |||
| @@ -1642,6 +1642,7 @@ void FinalizeHccl() { | |||
| (void)FinalizeBackend(); | |||
| #else | |||
| session::ExecutorManager::Instance().Clear(); | |||
| device::DeviceContextManager::GetInstance().ClearDeviceContexts(); | |||
| device::KernelRuntimeManager::Instance().ClearRuntimeResource(); | |||
| #endif | |||
| } | |||
| @@ -255,18 +255,17 @@ bool CheckHitTargetDtype(const std::map<TypeId, TypeId> &type_map, const TypeId | |||
| } | |||
| bool TagRaiseReduce(const std::shared_ptr<kernel::KernelBuildInfo> &kernel_build_info, const CNodePtr &cnode, | |||
| const std::map<TypeId, TypeId> &type_map) { | |||
| const std::map<TypeId, TypeId> &type_map, bool *int64_flag) { | |||
| // filte kernel info that unsupported raise or reduce datatype | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(kernel_build_info); | |||
| bool flag = false; | |||
| for (size_t input_index = 0; input_index < kernel_build_info->GetInputNum(); ++input_index) { | |||
| auto in_dtype = AnfAlgo::GetPrevNodeOutputInferDataType(cnode, input_index); | |||
| auto device_dtype = kernel_build_info->GetInputDeviceType(input_index); | |||
| if (device_dtype == kNumberTypeFloat || device_dtype == kNumberTypeFloat32) { | |||
| device_dtype = kNumberTypeFloat32; | |||
| } | |||
| if (!CheckHitTargetDtype(type_map, in_dtype, device_dtype, &flag)) { | |||
| if (!CheckHitTargetDtype(type_map, in_dtype, device_dtype, int64_flag)) { | |||
| return false; | |||
| } | |||
| } | |||
| @@ -278,14 +277,10 @@ bool TagRaiseReduce(const std::shared_ptr<kernel::KernelBuildInfo> &kernel_build | |||
| device_dtype = kNumberTypeFloat32; | |||
| } | |||
| if (!CheckHitTargetDtype(type_map, in_dtype, device_dtype, &flag)) { | |||
| if (!CheckHitTargetDtype(type_map, in_dtype, device_dtype, int64_flag)) { | |||
| return false; | |||
| } | |||
| } | |||
| if (flag) { | |||
| auto node_name = AnfAlgo::GetCNodeName(cnode); | |||
| MS_LOG(WARNING) << "Operator:[" << node_name << "] don't support int64, reduce precision from int64 to int32."; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -298,10 +293,11 @@ std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecis | |||
| const std::map<TypeId, TypeId> reduce_map = {{kNumberTypeInt64, kNumberTypeInt32}, | |||
| {kNumberTypeFloat, kNumberTypeFloat16}, | |||
| {kNumberTypeFloat32, kNumberTypeFloat16}}; | |||
| bool int64_reduce = false; | |||
| // raise precision | |||
| for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { | |||
| MS_EXCEPTION_IF_NULL(kernel_info_list[info_index]); | |||
| if (TagRaiseReduce(kernel_info_list[info_index], cnode, raise_map)) { | |||
| if (TagRaiseReduce(kernel_info_list[info_index], cnode, raise_map, &int64_reduce)) { | |||
| filtered_kernel_info_list.push_back(kernel_info_list[info_index]); | |||
| } | |||
| } | |||
| @@ -317,7 +313,7 @@ std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecis | |||
| if (context_ptr->get_param<bool>(MS_CTX_ENABLE_REDUCE_PRECISION)) { | |||
| for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { | |||
| MS_EXCEPTION_IF_NULL(kernel_info_list[info_index]); | |||
| if (TagRaiseReduce(kernel_info_list[info_index], cnode, reduce_map)) { | |||
| if (TagRaiseReduce(kernel_info_list[info_index], cnode, reduce_map, &int64_reduce)) { | |||
| filtered_kernel_info_list.push_back(kernel_info_list[info_index]); | |||
| } | |||
| } | |||
| @@ -325,6 +321,10 @@ std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecis | |||
| if (!filtered_kernel_info_list.empty()) { | |||
| *precision_reduce = true; | |||
| } | |||
| if (int64_reduce) { | |||
| auto node_name = AnfAlgo::GetCNodeName(cnode); | |||
| MS_LOG(WARNING) << "Operator:[" << node_name << "] don't support int64, reduce precision from int64 to int32."; | |||
| } | |||
| return filtered_kernel_info_list; | |||
| } | |||
| @@ -292,5 +292,15 @@ std::string FetchActorName(KernelTransformType kernel_type, const std::string &a | |||
| } | |||
| return actor_name; | |||
| } | |||
| bool CheckMemcpyInDevice(const DeviceTensor *dst_device_addr, const DeviceTensor *src_device_addr) { | |||
| MS_EXCEPTION_IF_NULL(dst_device_addr); | |||
| if (src_device_addr == nullptr) { | |||
| return false; | |||
| } | |||
| return (src_device_addr->DeviceType() == dst_device_addr->DeviceType() && | |||
| src_device_addr->format() == dst_device_addr->format() && | |||
| src_device_addr->type_id() == dst_device_addr->type_id()); | |||
| } | |||
| } // namespace runtime | |||
| } // namespace mindspore | |||
| @@ -210,6 +210,8 @@ KernelTransformType FetchKernelTransformType(const AnfNodePtr &node, const Kerne | |||
| GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline); | |||
| std::string FetchActorName(KernelTransformType kernel_type, const std::string &actor_set_name, | |||
| const AnfNodePtr &node = nullptr, const KernelGraphPtr &graph = nullptr); | |||
| bool CheckMemcpyInDevice(const DeviceTensor *dst_device_tensor, const DeviceTensor *src_device_tensor); | |||
| } // namespace runtime | |||
| } // namespace mindspore | |||
| @@ -239,10 +239,14 @@ void HostQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *cons | |||
| auto tensor_device_address = std::dynamic_pointer_cast<DeviceTensor>(host_tensor->device_address()); | |||
| // Sync data from host_tensor_device_address to device_tensor. | |||
| if (tensor_device_address != nullptr) { | |||
| if ((tensor_device_address.get() != device_tensor) && (!Copy(device_tensor, tensor_device_address.get()))) { | |||
| SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "Copy data failed."); | |||
| if (CheckMemcpyInDevice(device_tensor, tensor_device_address.get())) { | |||
| if ((tensor_device_address.get() != device_tensor) && (!Copy(device_tensor, tensor_device_address.get()))) { | |||
| SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "Copy data failed."); | |||
| } | |||
| continue; | |||
| } else { | |||
| host_tensor->data_sync(false); | |||
| } | |||
| continue; | |||
| } | |||
| // Sync data from host_tensor to device_tensor. | |||
| @@ -109,7 +109,7 @@ TensorPtr OutputActor::CreateOutputTensor(const AnfNodePtr &output_node, size_t | |||
| const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(output_node, output_index, false); | |||
| MS_EXCEPTION_IF_NULL(device_tensor); | |||
| // In the input as output scenario, use the device tensor of node. | |||
| if (output_node->isa<ValueNode>() || output_node->isa<Parameter>()) { | |||
| if (IsPersistentDeviceTensor(output_node)) { | |||
| tensor->set_device_address(device_tensor); | |||
| return tensor; | |||
| } | |||
| @@ -151,7 +151,7 @@ void OutputActor::UpdateOutputDeviceAddress() { | |||
| auto output_index = output_nodes_[i].second; | |||
| auto &tensor = outputs_[i]; | |||
| // In the input as output scenario, the output device tensor may come from the input tensor and can't be replaced. | |||
| if ((output_node == nullptr) || output_node->isa<ValueNode>() || output_node->isa<Parameter>()) { | |||
| if ((output_node == nullptr) || IsPersistentDeviceTensor(output_node)) { | |||
| continue; | |||
| } | |||
| @@ -28,6 +28,7 @@ | |||
| #include "runtime/hardware/ascend/ascend_graph_optimization.h" | |||
| #include "backend/kernel_compiler/ascend_kernel_mod.h" | |||
| #include "runtime/device/ascend/ascend_bucket.h" | |||
| #include "common/util/error_manager/error_manager.h" | |||
| #ifndef ENABLE_SECURITY | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| @@ -65,6 +66,7 @@ namespace ascend { | |||
| using KernelGraph = mindspore::session::KernelGraph; | |||
| const char kMsVm[] = "vm"; | |||
| constexpr size_t kAtomicCleanInputSize = 2; | |||
| constexpr auto kUnknowErrorString = "Unknown error occurred"; | |||
| namespace { | |||
| CNodePtr GetNextLabelSet(const std::vector<CNodePtr> &kernel_nodes, uint32_t index) { | |||
| size_t node_sizes = kernel_nodes.size(); | |||
| @@ -582,10 +584,30 @@ bool AscendDeviceContext::LaunchGraph(const KernelGraphPtr &graph) const { | |||
| runtime_instance_->SetContext(); | |||
| device::KernelAdjust::GetInstance().LoadDeviceLoopCtrlParameters(graph); | |||
| auto ret = ExecuteGraph(graph); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "run task error!"; | |||
| ReportErrorMessage(); | |||
| return ret; | |||
| } | |||
| ReportWarningMessage(); | |||
| MS_LOG(INFO) << "Status record: end launch graph. graph id: " << graph->graph_id(); | |||
| return ret; | |||
| } | |||
| void AscendDeviceContext::ReportErrorMessage() const { | |||
| const string &error_message = ErrorManager::GetInstance().GetErrorMessage(); | |||
| if (!error_message.empty() && error_message.find(kUnknowErrorString) == string::npos) { | |||
| MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message; | |||
| } | |||
| } | |||
| void AscendDeviceContext::ReportWarningMessage() const { | |||
| const string &warning_message = ErrorManager::GetInstance().GetWarningMessage(); | |||
| if (!warning_message.empty()) { | |||
| MS_LOG(WARNING) << "Ascend warning message:\n" << warning_message; | |||
| } | |||
| } | |||
| bool AscendDeviceContext::SyncStream(size_t stream_id) const { | |||
| MS_EXCEPTION_IF_NULL(runtime_instance_); | |||
| return runtime_instance_->SyncStream(); | |||
| @@ -597,7 +619,9 @@ bool AscendDeviceContext::IsExecutingSink(const KernelGraphPtr &graph) const { | |||
| return ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK) && IsGraphMode(); | |||
| } | |||
| bool AscendDeviceContext::IsLoopCountSink(const KernelGraphPtr &graph) const { return IsGraphMode(); } | |||
| bool AscendDeviceContext::IsLoopCountSink(const KernelGraphPtr &graph) const { | |||
| return device::KernelAdjust::NeedLoopSink() && IsGraphMode(); | |||
| } | |||
| // kernel by kernel mode interface | |||
| void AscendDeviceContext::OptimizeSingleOpGraph(const KernelGraphPtr &graph) const { | |||
| @@ -138,6 +138,9 @@ class AscendDeviceContext : public DeviceContext { | |||
| static bool IsGraphMode(); | |||
| bool SyncRuning() const; | |||
| void ReportErrorMessage() const; | |||
| void ReportWarningMessage() const; | |||
| // Kernel Runtime --- only for task sink | |||
| AscendKernelRuntime *runtime_instance_{nullptr}; | |||
| std::shared_ptr<MemoryManager> mem_manager_{nullptr}; | |||