Merge pull request !2528 from caifubi/clean-codex-and-code-reviewtags/v0.6.0-beta
| @@ -28,6 +28,7 @@ PluginIntf *ProfilingEngineImpl::CreatePlugin() { | |||
| int ProfilingEngineImpl::ReleasePlugin(PluginIntf *plugin) { | |||
| if (plugin != nullptr) { | |||
| delete plugin; | |||
| plugin = nullptr; | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -15,11 +15,8 @@ | |||
| */ | |||
| #include "device/ascend/profiling/profiling_manager.h" | |||
| #include <stdlib.h> | |||
| #include <vector> | |||
| #include <nlohmann/json.hpp> | |||
| #include "securec/include/securec.h" | |||
| #include "./prof_mgr_core.h" | |||
| #include "device/ascend/profiling/plugin_impl.h" | |||
| @@ -30,9 +27,6 @@ | |||
| #include "utils/convert_utils.h" | |||
| #include "runtime/base.h" | |||
| using std::vector; | |||
| using Json = nlohmann::json; | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| @@ -124,35 +118,43 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { | |||
| auto context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| const string prof_options_str = context->profiling_options(); | |||
| vector<string> opts = Split(prof_options_str, ':'); | |||
| std::vector<string> opts = Split(prof_options_str, ':'); | |||
| if (opts.empty()) { | |||
| MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!"; | |||
| return true; | |||
| } | |||
| // current one docker only use one device` | |||
| Json p_device; | |||
| nlohmann::json p_device; | |||
| // JOBID | |||
| auto job_id = GetJobId(); | |||
| p_device["jobID"] = std::to_string(job_id); | |||
| // device_id | |||
| p_device["deviceID"] = std::to_string(device_id); | |||
| // features:'training_trace', 'task_trace' etc | |||
| Json features; | |||
| for (vector<string>::size_type i = 0; i < opts.size(); i++) { | |||
| Json f; | |||
| nlohmann::json features; | |||
| for (std::vector<string>::size_type i = 0; i < opts.size(); i++) { | |||
| nlohmann::json f; | |||
| f["name"] = opts[i]; | |||
| features[i] = f; | |||
| } | |||
| p_device["features"] = features; | |||
| // only one device, but sProfMgrStartUp API require for device list | |||
| Json devices; | |||
| nlohmann::json devices; | |||
| devices[0] = p_device; | |||
| Json startCfg; | |||
| nlohmann::json startCfg; | |||
| startCfg["startCfg"] = devices; | |||
| if (!ProfStartUp(NOT_NULL(&startCfg))) { | |||
| MS_LOG(ERROR) << "ProfMgrStartUp failed."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool ProfilingManager::ProfStartUp(NotNull<nlohmann::json *> startCfg) { | |||
| // convert json to string | |||
| std::stringstream ss; | |||
| ss << startCfg; | |||
| ss << *startCfg; | |||
| std::string cfg = ss.str(); | |||
| MS_LOG(INFO) << "profiling config " << cfg; | |||
| auto ret = rtProfilerStart(); | |||
| @@ -20,18 +20,15 @@ | |||
| #include <cstring> | |||
| #include <string> | |||
| #include <memory> | |||
| #include <nlohmann/json.hpp> | |||
| #include "utils/contract.h" | |||
| #include "utils/context/ms_context.h" | |||
| using std::map; | |||
| using std::string; | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| // PROFILING_CUSTOM_LOGID_START 3 | |||
| const uint64_t kProfilingFpStartLogId = 1; | |||
| const uint64_t kProfilingBpEndLogId = 2; | |||
| const uint64_t kProfilingIterEndLogId = 255; | |||
| class ProfilingEngineImpl; | |||
| class ProfilingManager { | |||
| public: | |||
| @@ -52,6 +49,7 @@ class ProfilingManager { | |||
| ~ProfilingManager() { prof_handle_ = nullptr; } | |||
| private: | |||
| bool ProfStartUp(NotNull<nlohmann::json *> json); | |||
| std::shared_ptr<ProfilingEngineImpl> engine_0_; | |||
| uint32_t device_id_; | |||
| void *prof_handle_; | |||
| @@ -33,6 +33,10 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_"; | |||
| constexpr char kFpStartNode[] = "PROFILING_FP_START"; | |||
| constexpr char kBpEndNode[] = "PROFILING_BP_END"; | |||
| constexpr char kIterEndNode[] = "PROFILING_ITER_END"; | |||
| // PROFILING_CUSTOM_LOGID_START 3 | |||
| constexpr uint64_t kProfilingFpStartLogId = 1; | |||
| constexpr uint64_t kProfilingBpEndLogId = 2; | |||
| constexpr uint64_t kProfilingIterEndLogId = 255; | |||
| std::map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_; | |||
| std::map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_; | |||
| std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> ProfilingUtils::graph_point_; | |||
| @@ -58,9 +58,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info | |||
| if (task_info->hccl_type() == kBroadcastOpName) { | |||
| // call hcom broadcast interface to run op | |||
| const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0); | |||
| ret = hcom_broadcast(tag_broadcast.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()), | |||
| static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()), | |||
| static_cast<u32>(task_info->root_id()), hccl_group.c_str(), stream); | |||
| ret = hcom_broadcast(tag_broadcast.c_str(), task_info->input_data_addr(), static_cast<u64>(task_info->count()), | |||
| static_cast<hcclDataType_t>(task_info->data_type()), static_cast<u32>(task_info->root_id()), | |||
| hccl_group.c_str(), stream); | |||
| if (ret != HCCL_SUCCESS) { | |||
| MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret); | |||
| return false; | |||
| @@ -68,9 +68,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info | |||
| } else if (task_info->hccl_type() == kAllGatherOpName) { | |||
| // call hcom allgather interface to run op | |||
| const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0); | |||
| ret = hcom_all_gather(tag_all_gather.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()), | |||
| reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()), | |||
| static_cast<hcclDataType_t>(task_info->data_type()), hccl_group.c_str(), stream); | |||
| ret = hcom_all_gather(tag_all_gather.c_str(), task_info->input_data_addr(), task_info->output_data_addr(), | |||
| static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()), | |||
| hccl_group.c_str(), stream); | |||
| if (ret != HCCL_SUCCESS) { | |||
| MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret; | |||
| return false; | |||
| @@ -78,9 +78,8 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info | |||
| } else if (task_info->hccl_type() == kAllReduceOpName) { | |||
| // call hcom allreduce interface to run op | |||
| const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0); | |||
| ret = hcom_all_reduce(tag_all_reduce.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()), | |||
| reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()), | |||
| static_cast<hcclDataType_t>(task_info->data_type()), | |||
| ret = hcom_all_reduce(tag_all_reduce.c_str(), task_info->input_data_addr(), task_info->output_data_addr(), | |||
| static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()), | |||
| static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream); | |||
| if (ret != HCCL_SUCCESS) { | |||
| MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret; | |||
| @@ -90,8 +89,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info | |||
| // call hcom reducescatter interface to run op | |||
| const string tag_reduce_scatter = | |||
| kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0); | |||
| ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()), | |||
| reinterpret_cast<void *>(task_info->output_data_addr()), | |||
| ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), task_info->input_data_addr(), task_info->output_data_addr(), | |||
| static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()), | |||
| static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream); | |||
| if (ret != HCCL_SUCCESS) { | |||
| @@ -40,39 +40,46 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve | |||
| return true; | |||
| } | |||
| void TaskGenerator::LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) { | |||
| MS_EXCEPTION_IF_NULL(anf_node_ptr); | |||
| MS_EXCEPTION_IF_NULL(kernel_inputs); | |||
| // akg process | |||
| // set atomic clean addr | |||
| if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) { | |||
| auto clean_output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(anf_node_ptr, kAttrAtomicOutputIndexs); | |||
| auto graph = anf_node_ptr->func_graph(); | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto manager = graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto node_users = manager->node_users(); | |||
| if (node_users[anf_node_ptr].empty()) { | |||
| MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty."; | |||
| } | |||
| auto depend_node = node_users[anf_node_ptr].pop().first; | |||
| if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) { | |||
| MS_LOG(EXCEPTION) << "Checking Depend node failed"; | |||
| } | |||
| if (node_users[depend_node].empty()) { | |||
| MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty."; | |||
| } | |||
| auto post_node = node_users[depend_node].pop().first; | |||
| for (auto index : clean_output_indexs) { | |||
| auto device_address = AnfAlgo::GetOutputAddr(post_node, index); | |||
| kernel::AddressPtr input = std::make_shared<kernel::Address>(); | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| input->addr = device_address->ptr_; | |||
| input->size = device_address->size_; | |||
| kernel_inputs->push_back(input); | |||
| } | |||
| MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size(); | |||
| } | |||
| } | |||
| void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) { | |||
| MS_EXCEPTION_IF_NULL(anf_node_ptr); | |||
| MS_EXCEPTION_IF_NULL(kernel_inputs); | |||
| if (anf_node_ptr->inputs().size() != 2) { | |||
| // akg process | |||
| // set atomic clean addr | |||
| if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) { | |||
| auto clean_output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(anf_node_ptr, kAttrAtomicOutputIndexs); | |||
| auto graph = anf_node_ptr->func_graph(); | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto manager = graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto node_users = manager->node_users(); | |||
| if (node_users[anf_node_ptr].empty()) { | |||
| MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty."; | |||
| } | |||
| auto depend_node = node_users[anf_node_ptr].pop().first; | |||
| if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) { | |||
| MS_LOG(EXCEPTION) << "Checking Depend node failed"; | |||
| } | |||
| if (node_users[depend_node].empty()) { | |||
| MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty."; | |||
| } | |||
| auto post_node = node_users[depend_node].pop().first; | |||
| for (auto index : clean_output_indexs) { | |||
| auto device_address = AnfAlgo::GetOutputAddr(post_node, index); | |||
| kernel::AddressPtr input = std::make_shared<kernel::Address>(); | |||
| input->addr = device_address->ptr_; | |||
| MS_EXCEPTION_IF_NULL(input->addr); | |||
| input->size = device_address->size_; | |||
| kernel_inputs->push_back(input); | |||
| } | |||
| MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size(); | |||
| } | |||
| LaunchAddrCleanAkgKernel(anf_node_ptr, kernel_inputs); | |||
| return; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]); | |||
| @@ -48,6 +48,7 @@ class TaskGenerator { | |||
| private: | |||
| static void LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs); | |||
| static void LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs); | |||
| static bool LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, std::vector<TaskInfoPtr> *task_info_list); | |||
| static bool LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list, | |||
| uint32_t graph_id); | |||
| @@ -79,6 +79,7 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph | |||
| std::vector<int> data_shape = tensor->shape(); | |||
| size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>()); | |||
| DeviceAddressPtr address = CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeFloat32); | |||
| MS_EXCEPTION_IF_NULL(address); | |||
| if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) { | |||
| address->ptr_ = tensor->data_c(false); | |||
| } else { | |||