Browse Source

Cherry-pick code from enterprise

tags/v1.6.0
caifubi 4 years ago
parent
commit
8dc2d1b84c
21 changed files with 42 additions and 101 deletions
  1. +2
    -0
      mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
  2. +1
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
  3. +4
    -3
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_atomic_clean.cc
  4. +2
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_atomic_clean.h
  5. +2
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_kernel.cc
  6. +2
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_kernel.h
  7. +2
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_mul.cc
  8. +2
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_mul.h
  9. +4
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.cc
  10. +2
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.h
  11. +2
    -1
      mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc
  12. +2
    -6
      mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
  13. +1
    -43
      mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc
  14. +1
    -9
      mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.h
  15. +2
    -2
      mindspore/ccsrc/runtime/device/gpu/gpu_launch_kernel.cc
  16. +2
    -2
      mindspore/ccsrc/runtime/device/gpu/gpu_launch_kernel.h
  17. +2
    -2
      mindspore/ccsrc/runtime/device/gpu/gpu_launch_mul.cc
  18. +2
    -2
      mindspore/ccsrc/runtime/device/gpu/gpu_launch_mul.h
  19. +1
    -13
      mindspore/ccsrc/runtime/device/kernel_adjust.cc
  20. +2
    -2
      mindspore/ccsrc/runtime/device/launch_kernel.h
  21. +2
    -2
      mindspore/ccsrc/runtime/device/launch_mul.h

+ 2
- 0
mindspore/ccsrc/pipeline/pynative/pynative_execute.cc View File

@@ -291,7 +291,9 @@ void PynativeInfer(const PrimitivePyPtr &prim, OpExecInfo *const op_exec_info,
prim->BeginRecordAddAttr();
AbstractBasePtr infer_res = EvalOnePrim(prim, args_spec_list)->abstract();
prim->EndRecordAddAttr();
MS_EXCEPTION_IF_NULL(op_exec_info);
op_exec_info->abstract = infer_res;
MS_EXCEPTION_IF_NULL(op_exec_info->abstract);
MS_LOG(DEBUG) << "Prim " << prim->name() << " infer result " << op_exec_info->abstract->ToString();
}



+ 1
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc View File

@@ -44,6 +44,7 @@ void AscendBucket::AllocateAllReduceAddr() {
MS_EXCEPTION_IF_NULL(tensor);
tensor_type_list_.emplace_back(tensor->data_type());
DeviceAddressPtr device_address = std::dynamic_pointer_cast<DeviceAddress>(tensor->device_address());
MS_EXCEPTION_IF_NULL(device_address);
auto origin_size = device_address->GetSize();
auto align_size = MemoryManager::GetCommonAlignSize(origin_size);
origin_size_list.emplace_back(origin_size);


+ 4
- 3
mindspore/ccsrc/runtime/device/ascend/ascend_launch_atomic_clean.cc View File

@@ -29,11 +29,11 @@ size_t AscendLaunchAtomicClean::AlignSizeForLaunchKernel(size_t size) {

uint8_t *AscendLaunchAtomicClean::AllocDeviceMem(size_t size) { return AscendLaunchKernel::AllocDeviceMem(size); }

void AscendLaunchAtomicClean::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) {
void AscendLaunchAtomicClean::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
AscendLaunchKernel::KernelSelect(kernel_graph);
}

void AscendLaunchAtomicClean::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) {
void AscendLaunchAtomicClean::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
AscendLaunchKernel::KernelBuild(kernel_graph);
}

@@ -100,10 +100,11 @@ void AscendLaunchAtomicClean::ConstructKernelGraphAndSetAttr() {
auto clean_node = atomic_clean_graph_->execution_order()[0];
// set abstract
AbstractBasePtr abstract = std::make_shared<abstract::AbstractNone>();
MS_EXCEPTION_IF_NULL(abstract);
MS_EXCEPTION_IF_NULL(clean_node);
clean_node->set_abstract(abstract);
// set build info
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
MS_EXCEPTION_IF_NULL(builder);
builder->SetKernelType(KernelType::TBE_KERNEL);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clean_node.get());
// set attr


+ 2
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_launch_atomic_clean.h View File

@@ -36,8 +36,8 @@ class AscendLaunchAtomicClean : public AscendLaunchKernel {
void FreeDeviceMem(void *addr) override;
size_t AlignSizeForLaunchKernel(size_t size) override;
uint8_t *AllocDeviceMem(size_t size) override;
void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;
void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;

void LaunchOpKernel() override;
void FreeLaunchDeviceMem() override;


+ 2
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_launch_kernel.cc View File

@@ -31,7 +31,7 @@ uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) {
return static_cast<uint8_t *>(device_memory);
}

void AscendLaunchKernel::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) {
void AscendLaunchKernel::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto node_list = kernel_graph->execution_order();
for (size_t i = 0; i < node_list.size(); ++i) {
@@ -42,7 +42,7 @@ void AscendLaunchKernel::KernelSelect(std::shared_ptr<session::KernelGraph> kern
}
}

void AscendLaunchKernel::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) {
void AscendLaunchKernel::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto ret = device::ascend::KernelBuild(kernel_graph->execution_order());
if (!ret) {


+ 2
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_launch_kernel.h View File

@@ -30,8 +30,8 @@ class AscendLaunchKernel : public LaunchKernel {
void FreeDeviceMem(void *addr) override;
size_t AlignSizeForLaunchKernel(size_t size) override;
uint8_t *AllocDeviceMem(size_t size) override;
void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;
void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;

void SetInputAddr(uint8_t *input_addr) override = 0;
void LaunchOpKernel() override = 0;


+ 2
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_launch_mul.cc View File

@@ -29,11 +29,11 @@ size_t AscendLaunchMul::AlignSizeForLaunchKernel(size_t size) {

uint8_t *AscendLaunchMul::AllocDeviceMem(size_t size) { return AscendLaunchKernel::AllocDeviceMem(size); }

void AscendLaunchMul::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) {
void AscendLaunchMul::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
AscendLaunchKernel::KernelSelect(kernel_graph);
}

void AscendLaunchMul::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) {
void AscendLaunchMul::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
AscendLaunchKernel::KernelBuild(kernel_graph);
}



+ 2
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_launch_mul.h View File

@@ -33,8 +33,8 @@ class AscendLaunchMul : public AscendLaunchKernel, public LaunchMul {
void FreeDeviceMem(void *addr) override;
size_t AlignSizeForLaunchKernel(size_t size) override;
uint8_t *AllocDeviceMem(size_t size) override;
void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;
void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;

void LaunchOpKernel() override;
void FreeLaunchDeviceMem() override;


+ 4
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.cc View File

@@ -15,6 +15,8 @@
*/

#include "runtime/device/ascend/ascend_launch_transdata.h"

#include <algorithm>
#include "abstract/utils.h"
#include "backend/session/single_kernel_graph.h"
#include "backend/session/anf_runtime_algorithm.h"
@@ -28,11 +30,11 @@ size_t AscendLaunchTransData::AlignSizeForLaunchKernel(size_t size) {

uint8_t *AscendLaunchTransData::AllocDeviceMem(size_t size) { return AscendLaunchKernel::AllocDeviceMem(size); }

void AscendLaunchTransData::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) {
void AscendLaunchTransData::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
AscendLaunchKernel::KernelSelect(kernel_graph);
}

void AscendLaunchTransData::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) {
void AscendLaunchTransData::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
AscendLaunchKernel::KernelBuild(kernel_graph);
}



+ 2
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.h View File

@@ -42,8 +42,8 @@ class AscendLaunchTransData : public AscendLaunchKernel {
void FreeDeviceMem(void *addr) override;
size_t AlignSizeForLaunchKernel(size_t size) override;
uint8_t *AllocDeviceMem(size_t size) override;
void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;
void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;

void LaunchOpKernel() override;
void FreeLaunchDeviceMem() override;


+ 2
- 1
mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -109,6 +109,7 @@ bool AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) {
MS_LOG(ERROR) << "Node:" << node_name_
<< " parse ext input shape failed as aicpu_ext_info->infoLen:" << aicpu_ext_info->infoLen
<< " and need_len:" << need_len;
return false;
}
auto input = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg);



+ 2
- 6
mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -44,11 +44,7 @@ ProfilingManager &ProfilingManager::GetInstance() {

ProfilingManager::ProfilingManager() : device_id_(0), prof_cb_({0}), hccl_enabled_bef_profiling_enabled_(false) {}

uint64_t ProfilingManager::GetJobId() const {
constexpr int kDecimal = 10;
const char *job_id = std::getenv("JOB_ID");
return ((job_id != nullptr) ? std::strtoul(job_id, nullptr, kDecimal) : 0);
}
uint64_t ProfilingManager::GetJobId() const { return 0; }

bool ProfilingManager::ReportProfilingData(const map<uint32_t, string> &op_taskId_map) const {
if (!IsProfiling()) {


+ 1
- 43
mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -32,12 +32,8 @@
namespace mindspore {
namespace device {
namespace ascend {
constexpr uint32_t kMaxProfilingNodeNum = 100;
constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
constexpr char kFpStartNode[] = "fp_point";
constexpr char kBpEndNode[] = "bp_point";
constexpr char kIterEndNode[] = "PROFILING_ITER_END";
// PROFILING_CUSTOM_LOGID_START 3
constexpr uint64_t kProfilingFpStartLogId = 2;
constexpr uint64_t kProfilingBpEndLogId = 3;
constexpr uint64_t kProfilingIterEndLogId = 4;
@@ -58,7 +54,6 @@ nlohmann::json GetContextProfilingOption() {

ProfilingTraceInfo ProfilingUtils::GenerateProfilingTrace(const session::KernelGraph &kernel_graph) {
MS_LOG(INFO) << "Profiling graph:" << kernel_graph.graph_id() << " Start to get trace";
custom_node_index_ = 5000;
auto &cnode_exec_order = kernel_graph.execution_order();
auto profiling_option = GetContextProfilingOption();

@@ -69,7 +64,6 @@ ProfilingTraceInfo ProfilingUtils::GenerateProfilingTrace(const session::KernelG
GetTraceBegin(kernel_graph, profiling_option, &profiling_trace);
GetTraceIterEnd(kernel_graph, &profiling_trace);
GetTraceBpEnd(kernel_graph, profiling_option, &profiling_trace);
GetTraceCustomNode(&profiling_trace);
GetTraceHccl(kernel_graph, NOT_NULL(&profiling_trace));

auto set_string_converter = [](const std::set<std::string> &str_set) {
@@ -86,19 +80,6 @@ ProfilingTraceInfo ProfilingUtils::GenerateProfilingTrace(const session::KernelG
return profiling_trace;
}

void ProfilingUtils::GetTraceCustomNode(ProfilingTraceInfo *trace_info) {
MS_EXCEPTION_IF_NULL(trace_info);
for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) {
std::string env_str = std::string(kCustomNode) + std::to_string(i);
auto node_full_name = common::GetEnv(env_str);
if (node_full_name.empty()) {
break;
}
MS_LOG(INFO) << "Get custom profiling node:" << node_full_name;
trace_info->trace_custom_node.emplace(node_full_name);
}
}

void ProfilingUtils::GetTraceHccl(const session::KernelGraph &kernel_graph,
NotNull<ProfilingTraceInfo *> profiling_trace) {
for (const auto &node : kernel_graph.execution_order()) {
@@ -332,29 +313,6 @@ CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNode
return profiling_node;
}

void ProfilingUtils::InsertProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<CNodePtr> *> kernel_list) {
MS_EXCEPTION_IF_NULL(anf_node);
auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope());
if (iter == profiling_trace_info.trace_custom_node.end()) {
return;
}
MS_LOG(INFO) << "Profiling graph:" << graph_ptr->graph_id() << " Match CustomOp:" << anf_node->fullname_with_scope();
// custom op profiling job start from 10000.
auto custom_point_id = kDouble * custom_node_index_;
ProfilingContent front_profiling_content = {false, custom_point_id, 0};
CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr);
kernel_list->insert(kernel_list->end() - 1, front_node);
SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), custom_point_id);

ProfilingContent back_profiling_content = {false, custom_point_id + 1, 0};
CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr);
kernel_list->insert(kernel_list->end(), back_node);
SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), custom_point_id + 1);
++custom_node_index_;
}

void ProfilingUtils::InsertProfilingTraceBpEnd(const AnfNodePtr &anf_node,
const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,


+ 1
- 9
mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -73,12 +73,6 @@ class ProfilingUtils {
// Generate profiling trace
static ProfilingTraceInfo GenerateProfilingTrace(const session::KernelGraph &kernel_graph);

// Insert two profiling trace points, one in front and one behind
static void InsertProfilingCustomOp(const mindspore::AnfNodePtr &anf_node,
const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);

static std::map<uint32_t, std::vector<std::string>> graph_kernel_name() { return graph_kernel_name_; }

inline static constexpr char kProfiling[] = "Profiling";
@@ -97,7 +91,6 @@ class ProfilingUtils {
ProfilingTraceInfo *trace_info);
static void GetTraceIterEnd(const session::KernelGraph &kernel_graph, ProfilingTraceInfo *trace_info);
static std::string GetGraphLastKernelName(const session::KernelGraph &kernel_graph);
static void GetTraceCustomNode(ProfilingTraceInfo *trace_info);
static void GetTraceHccl(const session::KernelGraph &kernel_graph, NotNull<ProfilingTraceInfo *> profiling_trace);
static void GetCNodeOutputRealNode(const std::string &node_name, const session::KernelGraph &kernel_graph,
NotNull<std::set<std::string> *> getnext_outputs);
@@ -109,7 +102,6 @@ class ProfilingUtils {
inline static std::map<uint32_t, std::vector<CNodePtr>> graph_profiling_cnode_;
inline static std::map<uint32_t, std::vector<std::string>> graph_kernel_name_;
inline static std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> graph_point_;
inline static uint32_t custom_node_index_;
};
} // namespace ascend
} // namespace device


+ 2
- 2
mindspore/ccsrc/runtime/device/gpu/gpu_launch_kernel.cc View File

@@ -45,14 +45,14 @@ uint8_t *GPULaunchkernel::AllocDeviceMem(size_t size) {
return static_cast<uint8_t *>(device_memory);
}

void GPULaunchkernel::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) {
void GPULaunchkernel::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
auto node_list = kernel_graph->execution_order();
for (size_t i = 0; i < node_list.size(); ++i) {
device::gpu::SetKernelInfo(node_list[i]);
}
}

void GPULaunchkernel::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) {
void GPULaunchkernel::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
auto kernels = kernel_graph->execution_order();
device::gpu::CreateGPUKernel(kernels);
}


+ 2
- 2
mindspore/ccsrc/runtime/device/gpu/gpu_launch_kernel.h View File

@@ -30,8 +30,8 @@ class GPULaunchkernel : public LaunchKernel {
void FreeDeviceMem(void *addr) override;
size_t AlignSizeForLaunchKernel(size_t size) override;
uint8_t *AllocDeviceMem(size_t size) override;
void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;
void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;

void SetInputAddr(uint8_t *input_addr) override = 0;
void LaunchOpKernel() override = 0;


+ 2
- 2
mindspore/ccsrc/runtime/device/gpu/gpu_launch_mul.cc View File

@@ -28,11 +28,11 @@ size_t GPULaunchMul::AlignSizeForLaunchKernel(size_t size) { return GPULaunchker

uint8_t *GPULaunchMul::AllocDeviceMem(size_t size) { return GPULaunchkernel::AllocDeviceMem(size); }

void GPULaunchMul::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) {
void GPULaunchMul::KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
GPULaunchkernel::KernelSelect(kernel_graph);
}

void GPULaunchMul::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) {
void GPULaunchMul::KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
GPULaunchkernel::KernelBuild(kernel_graph);
}



+ 2
- 2
mindspore/ccsrc/runtime/device/gpu/gpu_launch_mul.h View File

@@ -32,8 +32,8 @@ class GPULaunchMul : public GPULaunchkernel, public LaunchMul {
void FreeDeviceMem(void *addr) override;
size_t AlignSizeForLaunchKernel(size_t size) override;
uint8_t *AllocDeviceMem(size_t size) override;
void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;
void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) override;

void LaunchOpKernel() override;
void FreeLaunchDeviceMem() override;


+ 1
- 13
mindspore/ccsrc/runtime/device/kernel_adjust.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -35,7 +35,6 @@
#include "utils/shape_utils.h"

namespace {
constexpr auto kProfilingGraphId = "PROFILING_GRAPH_ID";
constexpr auto kGradients = "Gradients";
constexpr auto kSpecifyParameter = "accu_status";
size_t kNPUShape = 8;
@@ -853,15 +852,6 @@ void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) {
MS_LOG(INFO) << "No need to profiling";
return;
}
auto graph_id_env = std::getenv(kProfilingGraphId);
if (graph_id_env != nullptr) {
auto graph_id = std::stoul(graph_id_env);
if (graph_id != kernel_graph_ptr->graph_id()) {
MS_LOG(WARNING) << "Get PROFILING_GRAPH_ID " << graph_id
<< " Not Match Current Graph Id:" << kernel_graph_ptr->graph_id();
return;
}
}
ProfilingTraceInfo profiling_trace_info = ProfilingUtils::GenerateProfilingTrace(*kernel_graph_ptr);
if (!profiling_trace_info.IsValid()) {
MS_LOG(INFO) << "[profiling] no profiling node found!";
@@ -887,8 +877,6 @@ void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_tra
ProfilingUtils::InsertProfilingTraceFp(cnode_ptr, profiling_trace_info, kernel_graph_ptr,
NOT_NULL(&new_cnode_list));
new_cnode_list.emplace_back(cnode_ptr);
ProfilingUtils::InsertProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr,
NOT_NULL(&new_cnode_list));
ProfilingUtils::InsertProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr,
NOT_NULL(&new_cnode_list));
ProfilingUtils::InsertProfilingTraceIterEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr,


+ 2
- 2
mindspore/ccsrc/runtime/device/launch_kernel.h View File

@@ -34,8 +34,8 @@ class LaunchKernel {
virtual void FreeDeviceMem(void *addr) = 0;
virtual size_t AlignSizeForLaunchKernel(size_t size) = 0;
virtual uint8_t *AllocDeviceMem(size_t size) = 0;
virtual void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) = 0;
virtual void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) = 0;
virtual void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) = 0;
virtual void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) = 0;

virtual void SetInputAddr(uint8_t *input_addr) = 0;
virtual void LaunchOpKernel() = 0;


+ 2
- 2
mindspore/ccsrc/runtime/device/launch_mul.h View File

@@ -36,8 +36,8 @@ class LaunchMul {
virtual void FreeDeviceMem(void *addr) = 0;
virtual size_t AlignSizeForLaunchKernel(size_t size) = 0;
virtual uint8_t *AllocDeviceMem(size_t size) = 0;
virtual void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) = 0;
virtual void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) = 0;
virtual void KernelSelect(const std::shared_ptr<session::KernelGraph> &kernel_graph) = 0;
virtual void KernelBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) = 0;
virtual void CopyHostMemToDevice(size_t origin_size, size_t dst_size) = 0;

std::shared_ptr<session::KernelGraph> ObtainMulKernelGraph();


Loading…
Cancel
Save