| @@ -291,7 +291,9 @@ void PynativeInfer(const PrimitivePyPtr &prim, OpExecInfo *const op_exec_info, | |||
| prim->BeginRecordAddAttr(); | |||
| AbstractBasePtr infer_res = EvalOnePrim(prim, args_spec_list)->abstract(); | |||
| prim->EndRecordAddAttr(); | |||
| MS_EXCEPTION_IF_NULL(op_exec_info); | |||
| op_exec_info->abstract = infer_res; | |||
| MS_EXCEPTION_IF_NULL(op_exec_info->abstract); | |||
| MS_LOG(DEBUG) << "Prim " << prim->name() << " infer result " << op_exec_info->abstract->ToString(); | |||
| } | |||
| @@ -44,6 +44,7 @@ void AscendBucket::AllocateAllReduceAddr() { | |||
| MS_EXCEPTION_IF_NULL(tensor); | |||
| tensor_type_list_.emplace_back(tensor->data_type()); | |||
| DeviceAddressPtr device_address = std::dynamic_pointer_cast<DeviceAddress>(tensor->device_address()); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| auto origin_size = device_address->GetSize(); | |||
| auto align_size = MemoryManager::GetCommonAlignSize(origin_size); | |||
| origin_size_list.emplace_back(origin_size); | |||
| @@ -29,11 +29,11 @@ size_t AscendLaunchAtomicClean::AlignSizeForLaunchKernel(size_t size) { | |||
| uint8_t *AscendLaunchAtomicClean::AllocDeviceMem(size_t size) { return AscendLaunchKernel::AllocDeviceMem(size); } | |||
| void AscendLaunchAtomicClean::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void AscendLaunchAtomicClean::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| AscendLaunchKernel::KernelSelect(kernel_graph); | |||
| } | |||
| void AscendLaunchAtomicClean::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void AscendLaunchAtomicClean::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| AscendLaunchKernel::KernelBuild(kernel_graph); | |||
| } | |||
| @@ -100,10 +100,11 @@ void AscendLaunchAtomicClean::ConstructKernelGraphAndSetAttr() { | |||
| auto clean_node = atomic_clean_graph_->execution_order()[0]; | |||
| // set abstract | |||
| AbstractBasePtr abstract = std::make_shared<abstract::AbstractNone>(); | |||
| MS_EXCEPTION_IF_NULL(abstract); | |||
| MS_EXCEPTION_IF_NULL(clean_node); | |||
| clean_node->set_abstract(abstract); | |||
| // set build info | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| builder->SetKernelType(KernelType::TBE_KERNEL); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clean_node.get()); | |||
| // set attr | |||
| @@ -36,8 +36,8 @@ class AscendLaunchAtomicClean : public AscendLaunchKernel { | |||
| void FreeDeviceMem(void *addr) override; | |||
| size_t AlignSizeForLaunchKernel(size_t size) override; | |||
| uint8_t *AllocDeviceMem(size_t size) override; | |||
| void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void LaunchOpKernel() override; | |||
| void FreeLaunchDeviceMem() override; | |||
| @@ -31,7 +31,7 @@ uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) { | |||
| return static_cast<uint8_t *>(device_memory); | |||
| } | |||
| void AscendLaunchKernel::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void AscendLaunchKernel::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto node_list = kernel_graph->execution_order(); | |||
| for (size_t i = 0; i < node_list.size(); ++i) { | |||
| @@ -42,7 +42,7 @@ void AscendLaunchKernel::KernelSelect(std::shared_ptr<session::KernelGraph> kern | |||
| } | |||
| } | |||
| void AscendLaunchKernel::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void AscendLaunchKernel::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto ret = device::ascend::KernelBuild(kernel_graph->execution_order()); | |||
| if (!ret) { | |||
| @@ -30,8 +30,8 @@ class AscendLaunchKernel : public LaunchKernel { | |||
| void FreeDeviceMem(void *addr) override; | |||
| size_t AlignSizeForLaunchKernel(size_t size) override; | |||
| uint8_t *AllocDeviceMem(size_t size) override; | |||
| void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void SetInputAddr(uint8_t *input_addr) override = 0; | |||
| void LaunchOpKernel() override = 0; | |||
| @@ -29,11 +29,11 @@ size_t AscendLaunchMul::AlignSizeForLaunchKernel(size_t size) { | |||
| uint8_t *AscendLaunchMul::AllocDeviceMem(size_t size) { return AscendLaunchKernel::AllocDeviceMem(size); } | |||
| void AscendLaunchMul::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void AscendLaunchMul::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| AscendLaunchKernel::KernelSelect(kernel_graph); | |||
| } | |||
| void AscendLaunchMul::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void AscendLaunchMul::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| AscendLaunchKernel::KernelBuild(kernel_graph); | |||
| } | |||
| @@ -33,8 +33,8 @@ class AscendLaunchMul : public AscendLaunchKernel, public LaunchMul { | |||
| void FreeDeviceMem(void *addr) override; | |||
| size_t AlignSizeForLaunchKernel(size_t size) override; | |||
| uint8_t *AllocDeviceMem(size_t size) override; | |||
| void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void LaunchOpKernel() override; | |||
| void FreeLaunchDeviceMem() override; | |||
| @@ -15,6 +15,8 @@ | |||
| */ | |||
| #include "runtime/device/ascend/ascend_launch_transdata.h" | |||
| #include <algorithm> | |||
| #include "abstract/utils.h" | |||
| #include "backend/session/single_kernel_graph.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| @@ -28,11 +30,11 @@ size_t AscendLaunchTransData::AlignSizeForLaunchKernel(size_t size) { | |||
| uint8_t *AscendLaunchTransData::AllocDeviceMem(size_t size) { return AscendLaunchKernel::AllocDeviceMem(size); } | |||
| void AscendLaunchTransData::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void AscendLaunchTransData::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| AscendLaunchKernel::KernelSelect(kernel_graph); | |||
| } | |||
| void AscendLaunchTransData::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void AscendLaunchTransData::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| AscendLaunchKernel::KernelBuild(kernel_graph); | |||
| } | |||
| @@ -42,8 +42,8 @@ class AscendLaunchTransData : public AscendLaunchKernel { | |||
| void FreeDeviceMem(void *addr) override; | |||
| size_t AlignSizeForLaunchKernel(size_t size) override; | |||
| uint8_t *AllocDeviceMem(size_t size) override; | |||
| void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void LaunchOpKernel() override; | |||
| void FreeLaunchDeviceMem() override; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -109,6 +109,7 @@ bool AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { | |||
| MS_LOG(ERROR) << "Node:" << node_name_ | |||
| << " parse ext input shape failed as aicpu_ext_info->infoLen:" << aicpu_ext_info->infoLen | |||
| << " and need_len:" << need_len; | |||
| return false; | |||
| } | |||
| auto input = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -44,11 +44,7 @@ ProfilingManager &ProfilingManager::GetInstance() { | |||
| ProfilingManager::ProfilingManager() : device_id_(0), prof_cb_({0}), hccl_enabled_bef_profiling_enabled_(false) {} | |||
| uint64_t ProfilingManager::GetJobId() const { | |||
| constexpr int kDecimal = 10; | |||
| const char *job_id = std::getenv("JOB_ID"); | |||
| return ((job_id != nullptr) ? std::strtoul(job_id, nullptr, kDecimal) : 0); | |||
| } | |||
| uint64_t ProfilingManager::GetJobId() const { return 0; } | |||
| bool ProfilingManager::ReportProfilingData(const map<uint32_t, string> &op_taskId_map) const { | |||
| if (!IsProfiling()) { | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -32,12 +32,8 @@ | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| constexpr uint32_t kMaxProfilingNodeNum = 100; | |||
| constexpr char kCustomNode[] = "PROFILING_CUSTOM_"; | |||
| constexpr char kFpStartNode[] = "fp_point"; | |||
| constexpr char kBpEndNode[] = "bp_point"; | |||
| constexpr char kIterEndNode[] = "PROFILING_ITER_END"; | |||
| // PROFILING_CUSTOM_LOGID_START 3 | |||
| constexpr uint64_t kProfilingFpStartLogId = 2; | |||
| constexpr uint64_t kProfilingBpEndLogId = 3; | |||
| constexpr uint64_t kProfilingIterEndLogId = 4; | |||
| @@ -58,7 +54,6 @@ nlohmann::json GetContextProfilingOption() { | |||
| ProfilingTraceInfo ProfilingUtils::GenerateProfilingTrace(const session::KernelGraph &kernel_graph) { | |||
| MS_LOG(INFO) << "Profiling graph:" << kernel_graph.graph_id() << " Start to get trace"; | |||
| custom_node_index_ = 5000; | |||
| auto &cnode_exec_order = kernel_graph.execution_order(); | |||
| auto profiling_option = GetContextProfilingOption(); | |||
| @@ -69,7 +64,6 @@ ProfilingTraceInfo ProfilingUtils::GenerateProfilingTrace(const session::KernelG | |||
| GetTraceBegin(kernel_graph, profiling_option, &profiling_trace); | |||
| GetTraceIterEnd(kernel_graph, &profiling_trace); | |||
| GetTraceBpEnd(kernel_graph, profiling_option, &profiling_trace); | |||
| GetTraceCustomNode(&profiling_trace); | |||
| GetTraceHccl(kernel_graph, NOT_NULL(&profiling_trace)); | |||
| auto set_string_converter = [](const std::set<std::string> &str_set) { | |||
| @@ -86,19 +80,6 @@ ProfilingTraceInfo ProfilingUtils::GenerateProfilingTrace(const session::KernelG | |||
| return profiling_trace; | |||
| } | |||
| void ProfilingUtils::GetTraceCustomNode(ProfilingTraceInfo *trace_info) { | |||
| MS_EXCEPTION_IF_NULL(trace_info); | |||
| for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) { | |||
| std::string env_str = std::string(kCustomNode) + std::to_string(i); | |||
| auto node_full_name = common::GetEnv(env_str); | |||
| if (node_full_name.empty()) { | |||
| break; | |||
| } | |||
| MS_LOG(INFO) << "Get custom profiling node:" << node_full_name; | |||
| trace_info->trace_custom_node.emplace(node_full_name); | |||
| } | |||
| } | |||
| void ProfilingUtils::GetTraceHccl(const session::KernelGraph &kernel_graph, | |||
| NotNull<ProfilingTraceInfo *> profiling_trace) { | |||
| for (const auto &node : kernel_graph.execution_order()) { | |||
| @@ -332,29 +313,6 @@ CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNode | |||
| return profiling_node; | |||
| } | |||
| void ProfilingUtils::InsertProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<CNodePtr> *> kernel_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope()); | |||
| if (iter == profiling_trace_info.trace_custom_node.end()) { | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "Profiling graph:" << graph_ptr->graph_id() << " Match CustomOp:" << anf_node->fullname_with_scope(); | |||
| // custom op profiling job start from 10000. | |||
| auto custom_point_id = kDouble * custom_node_index_; | |||
| ProfilingContent front_profiling_content = {false, custom_point_id, 0}; | |||
| CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr); | |||
| kernel_list->insert(kernel_list->end() - 1, front_node); | |||
| SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), custom_point_id); | |||
| ProfilingContent back_profiling_content = {false, custom_point_id + 1, 0}; | |||
| CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr); | |||
| kernel_list->insert(kernel_list->end(), back_node); | |||
| SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), custom_point_id + 1); | |||
| ++custom_node_index_; | |||
| } | |||
| void ProfilingUtils::InsertProfilingTraceBpEnd(const AnfNodePtr &anf_node, | |||
| const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -73,12 +73,6 @@ class ProfilingUtils { | |||
| // Generate profiling trace | |||
| static ProfilingTraceInfo GenerateProfilingTrace(const session::KernelGraph &kernel_graph); | |||
| // Insert two profiling trace points, one in front and one behind | |||
| static void InsertProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, | |||
| const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<mindspore::CNodePtr> *> kernel_list); | |||
| static std::map<uint32_t, std::vector<std::string>> graph_kernel_name() { return graph_kernel_name_; } | |||
| inline static constexpr char kProfiling[] = "Profiling"; | |||
| @@ -97,7 +91,6 @@ class ProfilingUtils { | |||
| ProfilingTraceInfo *trace_info); | |||
| static void GetTraceIterEnd(const session::KernelGraph &kernel_graph, ProfilingTraceInfo *trace_info); | |||
| static std::string GetGraphLastKernelName(const session::KernelGraph &kernel_graph); | |||
| static void GetTraceCustomNode(ProfilingTraceInfo *trace_info); | |||
| static void GetTraceHccl(const session::KernelGraph &kernel_graph, NotNull<ProfilingTraceInfo *> profiling_trace); | |||
| static void GetCNodeOutputRealNode(const std::string &node_name, const session::KernelGraph &kernel_graph, | |||
| NotNull<std::set<std::string> *> getnext_outputs); | |||
| @@ -109,7 +102,6 @@ class ProfilingUtils { | |||
| inline static std::map<uint32_t, std::vector<CNodePtr>> graph_profiling_cnode_; | |||
| inline static std::map<uint32_t, std::vector<std::string>> graph_kernel_name_; | |||
| inline static std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> graph_point_; | |||
| inline static uint32_t custom_node_index_; | |||
| }; | |||
| } // namespace ascend | |||
| } // namespace device | |||
| @@ -45,14 +45,14 @@ uint8_t *GPULaunchkernel::AllocDeviceMem(size_t size) { | |||
| return static_cast<uint8_t *>(device_memory); | |||
| } | |||
| void GPULaunchkernel::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void GPULaunchkernel::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| auto node_list = kernel_graph->execution_order(); | |||
| for (size_t i = 0; i < node_list.size(); ++i) { | |||
| device::gpu::SetKernelInfo(node_list[i]); | |||
| } | |||
| } | |||
| void GPULaunchkernel::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void GPULaunchkernel::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| auto kernels = kernel_graph->execution_order(); | |||
| device::gpu::CreateGPUKernel(kernels); | |||
| } | |||
| @@ -30,8 +30,8 @@ class GPULaunchkernel : public LaunchKernel { | |||
| void FreeDeviceMem(void *addr) override; | |||
| size_t AlignSizeForLaunchKernel(size_t size) override; | |||
| uint8_t *AllocDeviceMem(size_t size) override; | |||
| void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void SetInputAddr(uint8_t *input_addr) override = 0; | |||
| void LaunchOpKernel() override = 0; | |||
| @@ -28,11 +28,11 @@ size_t GPULaunchMul::AlignSizeForLaunchKernel(size_t size) { return GPULaunchker | |||
| uint8_t *GPULaunchMul::AllocDeviceMem(size_t size) { return GPULaunchkernel::AllocDeviceMem(size); } | |||
| void GPULaunchMul::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void GPULaunchMul::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| GPULaunchkernel::KernelSelect(kernel_graph); | |||
| } | |||
| void GPULaunchMul::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) { | |||
| void GPULaunchMul::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| GPULaunchkernel::KernelBuild(kernel_graph); | |||
| } | |||
| @@ -32,8 +32,8 @@ class GPULaunchMul : public GPULaunchkernel, public LaunchMul { | |||
| void FreeDeviceMem(void *addr) override; | |||
| size_t AlignSizeForLaunchKernel(size_t size) override; | |||
| uint8_t *AllocDeviceMem(size_t size) override; | |||
| void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override; | |||
| void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override; | |||
| void LaunchOpKernel() override; | |||
| void FreeLaunchDeviceMem() override; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -35,7 +35,6 @@ | |||
| #include "utils/shape_utils.h" | |||
| namespace { | |||
| constexpr auto kProfilingGraphId = "PROFILING_GRAPH_ID"; | |||
| constexpr auto kGradients = "Gradients"; | |||
| constexpr auto kSpecifyParameter = "accu_status"; | |||
| size_t kNPUShape = 8; | |||
| @@ -853,15 +852,6 @@ void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) { | |||
| MS_LOG(INFO) << "No need to profiling"; | |||
| return; | |||
| } | |||
| auto graph_id_env = std::getenv(kProfilingGraphId); | |||
| if (graph_id_env != nullptr) { | |||
| auto graph_id = std::stoul(graph_id_env); | |||
| if (graph_id != kernel_graph_ptr->graph_id()) { | |||
| MS_LOG(WARNING) << "Get PROFILING_GRAPH_ID " << graph_id | |||
| << " Not Match Current Graph Id:" << kernel_graph_ptr->graph_id(); | |||
| return; | |||
| } | |||
| } | |||
| ProfilingTraceInfo profiling_trace_info = ProfilingUtils::GenerateProfilingTrace(*kernel_graph_ptr); | |||
| if (!profiling_trace_info.IsValid()) { | |||
| MS_LOG(INFO) << "[profiling] no profiling node found!"; | |||
| @@ -887,8 +877,6 @@ void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_tra | |||
| ProfilingUtils::InsertProfilingTraceFp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, | |||
| NOT_NULL(&new_cnode_list)); | |||
| new_cnode_list.emplace_back(cnode_ptr); | |||
| ProfilingUtils::InsertProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, | |||
| NOT_NULL(&new_cnode_list)); | |||
| ProfilingUtils::InsertProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, | |||
| NOT_NULL(&new_cnode_list)); | |||
| ProfilingUtils::InsertProfilingTraceIterEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, | |||
| @@ -34,8 +34,8 @@ class LaunchKernel { | |||
| virtual void FreeDeviceMem(void *addr) = 0; | |||
| virtual size_t AlignSizeForLaunchKernel(size_t size) = 0; | |||
| virtual uint8_t *AllocDeviceMem(size_t size) = 0; | |||
| virtual void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) = 0; | |||
| virtual void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) = 0; | |||
| virtual void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) = 0; | |||
| virtual void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) = 0; | |||
| virtual void SetInputAddr(uint8_t *input_addr) = 0; | |||
| virtual void LaunchOpKernel() = 0; | |||
| @@ -36,8 +36,8 @@ class LaunchMul { | |||
| virtual void FreeDeviceMem(void *addr) = 0; | |||
| virtual size_t AlignSizeForLaunchKernel(size_t size) = 0; | |||
| virtual uint8_t *AllocDeviceMem(size_t size) = 0; | |||
| virtual void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) = 0; | |||
| virtual void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) = 0; | |||
| virtual void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) = 0; | |||
| virtual void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) = 0; | |||
| virtual void CopyHostMemToDevice(size_t origin_size, size_t dst_size) = 0; | |||
| std::shared_ptr<session::KernelGraph> ObtainMulKernelGraph(); | |||