Browse Source

!23940 code check

Merge pull request !23940 from zhaosida/code_clean_master
tags/v1.6.0
i-robot Gitee 4 years ago
parent
commit
9e17c29b91
68 changed files with 335 additions and 45 deletions
  1. +4
    -0
      mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc
  2. +4
    -0
      mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.cc
  3. +9
    -2
      mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc
  4. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.cc
  5. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc
  6. +6
    -1
      mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.cc
  7. +9
    -1
      mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.cc
  8. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc
  9. +14
    -0
      mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
  10. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
  11. +3
    -1
      mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
  12. +7
    -2
      mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
  13. +3
    -0
      mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_property_checker.cc
  14. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
  15. +8
    -0
      mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
  16. +1
    -0
      mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc
  17. +1
    -0
      mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_rnn.cc
  18. +1
    -0
      mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_depend_for_all_gather.cc
  19. +0
    -2
      mindspore/ccsrc/backend/optimizer/ascend/enhancer/split_inputs_for_reduce_scatter.cc
  20. +2
    -0
      mindspore/ccsrc/backend/optimizer/ascend/enhancer/split_n_optimizer.cc
  21. +2
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc
  22. +1
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
  23. +4
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.cc
  24. +1
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
  25. +2
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc
  26. +4
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.cc
  27. +2
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.cc
  28. +1
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/trans_op_format_refine.cc
  29. +7
    -3
      mindspore/ccsrc/backend/optimizer/ascend/mindir/dropout_unify_mindir.cc
  30. +2
    -4
      mindspore/ccsrc/backend/optimizer/ascend/mindir/maxpool_with_argmax_unify_mindir.cc
  31. +1
    -1
      mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc
  32. +1
    -0
      mindspore/ccsrc/backend/optimizer/pass/add_training_attr.cc
  33. +2
    -0
      mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.cc
  34. +6
    -0
      mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
  35. +1
    -0
      mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.cc
  36. +1
    -0
      mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
  37. +3
    -0
      mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.cc
  38. +6
    -0
      mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
  39. +3
    -0
      mindspore/ccsrc/backend/session/executor.cc
  40. +1
    -1
      mindspore/ccsrc/backend/session/kernel_build_client.h
  41. +17
    -4
      mindspore/ccsrc/backend/session/kernel_graph.cc
  42. +2
    -2
      mindspore/ccsrc/backend/session/kernel_graph.h
  43. +1
    -0
      mindspore/ccsrc/backend/session/single_kernel_graph.cc
  44. +1
    -0
      mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
  45. +6
    -0
      mindspore/ccsrc/debug/data_dump/dump_utils.cc
  46. +2
    -0
      mindspore/ccsrc/debug/data_dump/e2e_dump.cc
  47. +9
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
  48. +8
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
  49. +9
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
  50. +3
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.cc
  51. +1
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
  52. +1
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc
  53. +24
    -3
      mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
  54. +19
    -4
      mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
  55. +10
    -0
      mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
  56. +17
    -3
      mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc
  57. +8
    -0
      mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
  58. +9
    -0
      mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
  59. +6
    -0
      mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc
  60. +1
    -0
      mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.cc
  61. +1
    -1
      mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.cc
  62. +1
    -1
      mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.cc
  63. +19
    -3
      mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
  64. +1
    -0
      mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
  65. +2
    -1
      mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
  66. +2
    -1
      mindspore/ccsrc/runtime/device/kernel_runtime.cc
  67. +1
    -0
      mindspore/ccsrc/runtime/device/memory_manager.cc
  68. +26
    -0
      mindspore/core/utils/convert_utils_base.h

+ 4
- 0
mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc View File

@@ -34,6 +34,8 @@ bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vect
return false;
}

MS_EXCEPTION_IF_NULL(inputs[0]);
MS_EXCEPTION_IF_NULL(inputs[1]);
if (inputs[0]->addr == inputs[1]->addr) {
MS_LOG(INFO) << "first addr is same with second addr , no need assign";
return true;
@@ -54,6 +56,8 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in
}
stream_id_ = stream_id;

MS_EXCEPTION_IF_NULL(inputs[0]);
MS_EXCEPTION_IF_NULL(inputs[1]);
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
std::make_shared<MemcpyAsyncTaskInfo>(unique_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr,
inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false);


+ 4
- 0
mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.cc View File

@@ -63,6 +63,10 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr
uint32_t stream_id) {
MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id;
std::vector<TaskInfoPtr> task_info_list;
if (inputs.empty()) {
MS_LOG(EXCEPTION) << "LabelSwitchKernel is empty";
}
MS_EXCEPTION_IF_NULL(inputs[0]);
cond_ = inputs[0]->addr;
auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(unique_name_, stream_id, label_size_, label_list_, cond_);
MS_EXCEPTION_IF_NULL(task_info_ptr);


+ 9
- 2
mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc View File

@@ -47,6 +47,8 @@ bool MemCpyAsyncKernel::Launch(const std::vector<AddressPtr> &inputs, const std:
return false;
}

MS_EXCEPTION_IF_NULL(inputs[0]);
MS_EXCEPTION_IF_NULL(outputs[0]);
if (inputs[0]->addr == outputs[0]->addr) {
MS_LOG(INFO) << "input addr is same with output addr , no need exe memcpy async";
return true;
@@ -93,9 +95,9 @@ void MemCpyAsyncKernel::GetInputOutputTotalCount(const AnfNodePtr &anf_node) {
std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, 0);
size_t total_size = 1;
for (size_t i = 0; i < shape_i.size(); i++) {
total_size = total_size * shape_i[i];
total_size = SizetMulWithOverflowCheck(total_size, shape_i[i]);
}
total_size *= type_size;
total_size = SizetMulWithOverflowCheck(total_size, type_size);
MS_LOG(INFO) << "MemCpyAsync size[" << total_size << "]";
input_size_list_.emplace_back(total_size);
output_size_list_.emplace_back(total_size);
@@ -112,6 +114,8 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
MS_LOG(EXCEPTION) << "MemCpyAsync op output is not one";
}

MS_EXCEPTION_IF_NULL(outputs[0]);
MS_EXCEPTION_IF_NULL(inputs[0]);
if (outputs[0]->size < inputs[0]->size) {
MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax < src size";
}
@@ -127,6 +131,7 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}

device::DynamicKernelPtr MemCpyAsyncKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
AddressPtrList kernel_inputs;
AddressPtrList kernel_workspaces;
@@ -141,6 +146,8 @@ device::DynamicKernelPtr MemCpyAsyncKernel::GenDynamicKernel(const CNodePtr &cno
MS_LOG(EXCEPTION) << "MemCpyAsync op output is not one, got " << kernel_outputs.size();
}

MS_EXCEPTION_IF_NULL(kernel_outputs[0]);
MS_EXCEPTION_IF_NULL(kernel_inputs[0]);
if (kernel_outputs[0]->size < kernel_inputs[0]->size) {
MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax " << kernel_outputs[0]->size << " is less than src size "
<< kernel_inputs[0]->size;


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.cc View File

@@ -33,6 +33,7 @@ bool ProfilingKernelMod::Init(const AnfNodePtr &anf_node) {
MS_LOG(INFO) << "[profiling] init profiling kernel mod";
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);

MS_EXCEPTION_IF_NULL(primitive);
ValuePtr notify_ptr = primitive->GetAttr(ProfilingUtils::kNotify);
MS_EXCEPTION_IF_NULL(notify_ptr);



+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc View File

@@ -68,6 +68,7 @@ void GetRtKelInfo(const CNodePtr &kernel_node,
auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
// set input infos
auto input_num = AnfAlgo::GetInputTensorNum(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_build_info_builder);
kernel_build_info_builder->SetInputsFormat(std::vector<std::string>(input_num, kOpFormat_DEFAULT));
std::vector<TypeId> input_types = {};
for (size_t i = 0; i < input_num; i++) {


+ 6
- 1
mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.cc View File

@@ -52,8 +52,13 @@ bool StreamActiveKernel::Launch(const std::vector<AddressPtr> &, const std::vect

rtStream_t act_stream;
rtError_t status;
MS_EXCEPTION_IF_NULL(kernel::TaskStream::GetInstance());
auto stream_list = kernel::TaskStream::GetInstance()->gen_stream_list();
for (auto index : active_streams_index_) {
act_stream = kernel::TaskStream::GetInstance()->gen_stream_list()[index];
if (index >= stream_list.size()) {
MS_LOG(EXCEPTION) << "Invalid index: " << index << " stream_list size: " << stream_list.size();
}
act_stream = stream_list[index];
status = rtStreamActive(act_stream, stream_ptr);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Stream active failed!";


+ 9
- 1
mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.cc View File

@@ -65,9 +65,17 @@ bool StreamSwitchKernel::Launch(const std::vector<AddressPtr> &inputs, const std
MS_LOG(EXCEPTION) << "Stream switch inputs size is " << inputs.size() << ", only support 2";
}

MS_EXCEPTION_IF_NULL(inputs[0]);
MS_EXCEPTION_IF_NULL(inputs[1]);
void *loop_cnt = inputs[0]->addr;
void *ites_per_loop = inputs[1]->addr;
rtStream_t true_stream_ = kernel::TaskStream::GetInstance()->gen_stream_list()[true_stream_index_];
MS_EXCEPTION_IF_NULL(kernel::TaskStream::GetInstance());
auto stream_list = kernel::TaskStream::GetInstance()->gen_stream_list();
if (true_stream_index_ >= stream_list.size()) {
MS_LOG(EXCEPTION) << "Invalid true_stream_index_: " << true_stream_index_
<< " total stream size: " << stream_list.size();
}
rtStream_t true_stream_ = stream_list[true_stream_index_];
rtError_t status = rtStreamSwitchEx(loop_cnt, cond_, ites_per_loop, true_stream_, stream_ptr, data_type_);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Stream switch failed!";


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc View File

@@ -330,6 +330,7 @@ bool TbeDynamicShapeUtil::IsDynamicShapeNode(const AnfNodePtr &anf_node) {
}

void TbeDynamicShapeUtil::SetDynamicShapeAttr(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
auto is_dyanmic_shape = IsDynamicShapeNode(cnode);
AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(is_dyanmic_shape), cnode);
}


+ 14
- 0
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc View File

@@ -272,6 +272,8 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor

void GenNoneInputDescJson(const std::shared_ptr<OpIOInfo> &input_ptr, size_t input_i,
std::vector<nlohmann::json> *const input_list) {
MS_EXCEPTION_IF_NULL(input_ptr);
MS_EXCEPTION_IF_NULL(input_list);
nlohmann::json input_desc_json;
auto in_name = input_ptr->name();
input_desc_json[kJName] = in_name + std::to_string(input_i);
@@ -283,6 +285,9 @@ void TbeKernelJsonCreator::GenValidInputDescJson(const std::shared_ptr<AnfNode>
bool value, const std::shared_ptr<OpIOInfo> &input_ptr,
const string &op_input_name, size_t input_i,
std::vector<nlohmann::json> *const input_list) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(input_ptr);
MS_EXCEPTION_IF_NULL(input_list);
auto def_format = kOpFormat_NCHW;
auto dtype = GetDeviceInputType(anf_node, real_input_index);
auto format = GetDeviceInputFormat(anf_node, real_input_index);
@@ -814,6 +819,7 @@ std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_no
}

void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list) {
MS_EXCEPTION_IF_NULL(input_size_list);
for (size_t i = 0; i < input_json.size(); i++) {
for (size_t m = 0; m < input_json[i].size(); m++) {
size_t size_i = 1;
@@ -841,6 +847,7 @@ void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *inp
}

void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list) {
MS_EXCEPTION_IF_NULL(output_size_list);
for (size_t i = 0; i < output_json.size(); i++) {
for (size_t m = 0; m < output_json[i].size(); m++) {
size_t size_i = 1;
@@ -871,6 +878,8 @@ void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *o

bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(output_size_list);
MS_EXCEPTION_IF_NULL(input_size_list);
if (input_size_list == nullptr || output_size_list == nullptr) {
MS_LOG(ERROR) << "Input size or output size is nullptr";
return false;
@@ -1046,6 +1055,7 @@ void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode
std::string *fusion_kernel_name) {
MS_EXCEPTION_IF_NULL(compute_op_str);
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
MS_EXCEPTION_IF_NULL(cnode);
// gen others
auto origin_type = AnfAlgo::GetCNodeName(cnode);
auto op_info_ptr = tbe::TbeDynamicShapeUtil::FindOp(origin_type, cnode);
@@ -1163,6 +1173,7 @@ void TbeKernelBuild::GenFusionOutputDescJson(const std::shared_ptr<mindspore::An
nlohmann::json *output_data_desc) {
MS_EXCEPTION_IF_NULL(output_desc);
MS_EXCEPTION_IF_NULL(output_data_desc);
MS_EXCEPTION_IF_NULL(anf_node);
GenDescJson(anf_node, node_out_idx, desc_output_idx, output_desc);
*output_data_desc = *output_desc;
(*output_data_desc)[kJDtype] = (*output_desc)[kJDataType];
@@ -1186,6 +1197,7 @@ void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNod
bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
const std::vector<mindspore::AnfNodePtr> &reorder_layer,
std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
MS_EXCEPTION_IF_NULL(spec_data_input);
if ((op_name == kReluGradV2OpName || op_name == kAddNOpName || op_name == kTensorAddOpName) &&
reorder_layer.empty()) {
MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. ";
@@ -1381,6 +1393,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(input_desc_list);
MS_EXCEPTION_IF_NULL(layer_iter);
MS_EXCEPTION_IF_NULL(index);
std::vector<nlohmann::json> input_desc_list_tmp = {};
// 1. input json
bool is_dynamic_input = IsDynamicInput(cnode);


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc View File

@@ -253,6 +253,7 @@ bool ParallelBuildManager::GenSameOpKernelMod() const {
}

bool ParallelBuildManager::GenSameFusionOpKernelMod(std::map<int64_t, KernelModPtr> *kernel_mode_ret) const {
MS_EXCEPTION_IF_NULL(kernel_mode_ret);
bool ret = true;
for (const auto &task_info : same_op_list_) {
auto kernel_pack = TbeUtils::SearchCache(task_info.json_name);


+ 3
- 1
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc View File

@@ -87,10 +87,12 @@ bool TbeKernelReduceSelecter::IsReduceSupportNDC1HWC0(SupportFormat *support_for
}

bool TbeKernelReduceSelecter::IsReduceSupportFracZ(SupportFormat *support_format) const {
MS_EXCEPTION_IF_NULL(support_format);
return IsFracZAndC1HWNCoC0Common(kOpFormat_FRAC_Z, support_format);
}

bool TbeKernelReduceSelecter::IsReduceSupportC1HWNCoC0(SupportFormat *support_format) const {
MS_EXCEPTION_IF_NULL(support_format);
return IsFracZAndC1HWNCoC0Common(kOpFormat_C1HWNCoC0, support_format);
}

@@ -132,7 +134,7 @@ bool TbeKernelReduceSelecter::IsFracZAndC1HWNCoC0Common(const std::string &forma

void TbeKernelReduceSelecter::GetReduceAttrKeepDim() {
if (!AnfAlgo::HasNodeAttr(kAttrKeepDims, cnode_ptr_)) {
MS_LOG(INFO) << "This node does't have keep_attr.";
MS_LOG(INFO) << "This node doesn't have keep_attr.";
keep_dims_ = false;
return;
}


+ 7
- 2
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc View File

@@ -352,7 +352,7 @@ bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_ind
value_depends->emplace_back(value_depend);
}
dynamic_input_index++;
real_io_tensor_index += LongToSize(dynamic_input_size);
real_io_tensor_index = SizetAddWithOverflowCheck(real_io_tensor_index, LongToSize(dynamic_input_size));
} else {
if (ios_info.size() != 1) {
MS_LOG(EXCEPTION) << "if output is dynamic, so output must has one output.";
@@ -363,7 +363,7 @@ bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_ind
reshape_types->emplace_back(reshape_type);
value_depends->emplace_back(value_depend);
}
real_io_tensor_index += real_io_tensor_num;
real_io_tensor_index = SizetAddWithOverflowCheck(real_io_tensor_index, real_io_tensor_num);
}
} else if (io_param_type == kParamTypeRequre || io_param_type == kParamTypeOptional) {
// require or optional io
@@ -472,6 +472,11 @@ std::string TbeKernelSelect::OpSelectFormat() {
void TbeKernelSelect::CreateNewOpInfo(const mindspore::kernel::OpInfo &op_info, const SupportFormat &support_format,
mindspore::kernel::OpInfo *op_info_new) {
MS_EXCEPTION_IF_NULL(op_info_new);
if (support_format.input_format.empty() || support_format.output_format.empty()) {
MS_LOG(EXCEPTION) << "Support input format and output format size can not be empty, but the input format size is: "
<< support_format.input_format.size()
<< ", output format size is: " << support_format.output_format.size();
}
if (op_info.inputs_ptr().size() != support_format.input_format[0].size() ||
op_info.outputs_ptr().size() != support_format.output_format[0].size()) {
MS_LOG(EXCEPTION) << "BroadCast input/output size not match, op info input size:" << op_info.inputs_ptr().size()


+ 3
- 0
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_property_checker.cc View File

@@ -45,8 +45,10 @@ static bool CheckStridedSlice(const CNodePtr &cnode) {
auto shrink_axis_mask = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrShrinkAxisMask));
AnfNodePtr input = cnode->input(1);
int input_dims = 0;
MS_EXCEPTION_IF_NULL(input);
if (input->isa<ValueNode>()) {
ValuePtr input_value = input->cast<ValueNodePtr>()->value();
MS_EXCEPTION_IF_NULL(input_value);
if (!input_value->isa<Tensor>()) {
MS_LOG(EXCEPTION) << "For 'StrideSlice', the first input value should be a tensor, but got "
<< input_value->ToString();
@@ -54,6 +56,7 @@ static bool CheckStridedSlice(const CNodePtr &cnode) {
input_dims = SizeToInt(input_value->cast<TensorPtr>()->shape().size());
} else if (input->isa<CNode>() || input->isa<Parameter>()) {
AbstractBasePtr input_abstract = input->abstract();
MS_EXCEPTION_IF_NULL(input_abstract);
if (!input_abstract->isa<AbstractTensor>()) {
MS_LOG(EXCEPTION) << "For 'StrideSlice', the first input value should be a tensor, but got "
<< input_abstract->ToString();


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc View File

@@ -277,6 +277,7 @@ int KernelManager::BinaryRegister(const mindspore::kernel::FlexArray &kernel_buf
uintptr_t KernelManager::GenFuncStub(const mindspore::kernel::KernelPack &kernel_pack, bool force_reload,
uint32_t *block_dim, const bool dynamic_flag, void **handle,
std::string *origin_key) {
MS_EXCEPTION_IF_NULL(block_dim);
auto kernel = kernel_pack.GetKernel();
if (kernel == nullptr) {
MS_LOG(EXCEPTION) << "Invalid kernel pack, json or kernel is nullptr.";


+ 8
- 0
mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc View File

@@ -45,6 +45,7 @@ AnfNodePtr CreateReshapeNode(const FuncGraphPtr &func_graph, const AnfNodePtr &i
trans_inputs.emplace_back(NewValueNode(prim));
trans_inputs.emplace_back(input_node);
auto reshape = func_graph->NewCNode(trans_inputs);
MS_EXCEPTION_IF_NULL(reshape);
AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(input_node, 0)}, {dst_shape}, reshape.get());
AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), reshape);
AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(dst_shape), reshape);
@@ -72,6 +73,7 @@ void SetTransNodeAttr(const CNodePtr &trans_node) {

void ReFreshInferShape(const AnfNodePtr &trans_node, const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(trans_node);
MS_EXCEPTION_IF_NULL(node);
auto real_input_node = AnfAlgo::VisitKernelWithReturnType(node, 0).first;
if (!real_input_node->isa<CNode>()) {
return;
@@ -106,6 +108,7 @@ void SetGroupAttr(const ParameterPtr &param, const AnfNodePtr &out_trans, const
AnfNodePtr GetTransInputNodePtr(const FuncGraphPtr &func_graph, const CNodePtr &node, size_t index,
const KernelSelectPtr &kernel_select) {
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(func_graph);
auto input_node = AnfAlgo::GetInputNode(node, index);
if (HasAbstractMonad(input_node)) {
// No transfer for monad inputs.
@@ -136,6 +139,7 @@ AnfNodePtr GetTransInputNodePtr(const FuncGraphPtr &func_graph, const CNodePtr &
AnfNodePtr InsertTransOpForSingleOutput(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
const KernelSelectPtr &kernel_select) {
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(func_graph);
std::string output_format = AnfAlgo::GetOutputFormat(node, 0);
std::vector<size_t> origin_shape = AnfAlgo::GetOutputInferShape(node, 0);
if (output_format == kOpFormat_NC1KHKWHWC0) {
@@ -242,6 +246,7 @@ void RefreshKernelBuildInfo(const std::string &input_format, const std::string &
auto ori_build_info = AnfAlgo::GetSelectKernelBuildInfo(trans_data);
MS_EXCEPTION_IF_NULL(ori_build_info);
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(ori_build_info);
MS_EXCEPTION_IF_NULL(builder);
builder->SetInputsFormat({input_format});
builder->SetInputsReshapeType({reshape_type});
builder->SetOutputsReshapeType({reshape_type});
@@ -406,10 +411,12 @@ AnfNodePtr InsertTransOpForInput(const FuncGraphPtr &func_graph, const AnfNodePt

CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(func_graph);
std::vector<AnfNodePtr> new_inputs = {AnfAlgo::GetCNodePrimitiveNode(cnode)};
size_t in_num = AnfAlgo::GetInputNum(cnode); // include monads.
for (size_t input_index = 0; input_index < in_num; ++input_index) {
auto cur_input = AnfAlgo::GetInputNode(cnode, input_index);
MS_EXCEPTION_IF_NULL(cur_input);
if (HasAbstractMonad(cur_input)) {
// No cast for monad inputs.
new_inputs.push_back(cur_input);
@@ -421,6 +428,7 @@ CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnod

auto kernel_with_index = AnfAlgo::VisitKernelWithReturnType(cur_input, 0);
auto real_input_node = kernel_with_index.first;
MS_EXCEPTION_IF_NULL(real_input_node);
if (kernel::IsWeightBoundary(real_input_node)) {
// weight
origin_type = AnfAlgo::GetPrevNodeOutputPrecision(cnode, input_index);


+ 1
- 0
mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc View File

@@ -57,6 +57,7 @@ const AnfNodePtr InsertPlaceholderForDynamicGRUV2::Process(const FuncGraphPtr &f
if (item != none_index.end()) {
auto value = std::make_shared<None>();
auto value_node = NewValueNode(value);
MS_EXCEPTION_IF_NULL(value_node);
value_node->set_abstract(std::make_shared<abstract::AbstractNone>());
auto new_node = kernel_graph->NewValueNode(value_node);
kernel_graph->AddValueNodeToGraph(new_node);


+ 1
- 0
mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_rnn.cc View File

@@ -56,6 +56,7 @@ const AnfNodePtr InsertPlaceholderForDynamicRNN::Process(const FuncGraphPtr &fun
if (in_idx == kInsertIdx) {
auto value = std::make_shared<None>();
auto value_node = NewValueNode(value);
MS_EXCEPTION_IF_NULL(value_node);
value_node->set_abstract(std::make_shared<abstract::AbstractNone>());
auto new_node = kernel_graph->NewValueNode(value_node);
new_inputs.push_back(new_node);


+ 1
- 0
mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_depend_for_all_gather.cc View File

@@ -44,6 +44,7 @@ bool InsertDependForAllGather::Run(const FuncGraphPtr &graph) {
for (int64_t i = 0; i < SizeToInt(all_gather_node.size()) - 1; ++i) {
auto current_node = iter->second;
auto next_node = (++iter)->second;
MS_EXCEPTION_IF_NULL(next_node);
auto next_cnode = next_node->cast<CNodePtr>();
std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name())),
AnfAlgo::GetInputNode(next_cnode, 0), current_node};


+ 0
- 2
mindspore/ccsrc/backend/optimizer/ascend/enhancer/split_inputs_for_reduce_scatter.cc View File

@@ -42,7 +42,6 @@ std::vector<AnfNodePtr> SplitInputsForReduceScatter::InsertSplitForInput(const F
size_splits.push_back(output_node_shape[0]);
}
AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split.get());

AnfAlgo::SetNodeAttr("split_dim", MakeValue(0L), split);
AnfAlgo::SetNodeAttr("num_split", MakeValue(rank_size), split);
AnfAlgo::SetNodeAttr("size_splits", MakeValue(size_splits), split);
@@ -73,7 +72,6 @@ AnfNodePtr SplitInputsForReduceScatter::RearrangeInputsForReduceScatter(const Fu
auto reduce_scatter = func_graph->NewCNode(reduce_scatter_inputs);
MS_EXCEPTION_IF_NULL(reduce_scatter);
reduce_scatter->set_abstract(node->abstract());

AnfAlgo::CopyNodeAttrs(node, reduce_scatter);
AnfAlgo::SetNodeAttr(kAttrFusion, MakeValue(1L), reduce_scatter);
kernel_select_->SelectKernel(reduce_scatter);


+ 2
- 0
mindspore/ccsrc/backend/optimizer/ascend/enhancer/split_n_optimizer.cc View File

@@ -95,6 +95,7 @@ bool InputCheck(const AnfNodePtr &node) {
auto in_nums = AnfAlgo::GetInputTensorNum(node);
for (size_t i = 0; i < in_nums; i++) {
auto in_node = VisitSplitKernel(AnfAlgo::GetInputNode(cnode, i), 0).first;
MS_EXCEPTION_IF_NULL(in_node);
if (in_node->isa<Parameter>() || in_node->isa<ValueNode>()) {
MS_LOG(INFO) << "Input is a Parameter or ValueNode, can not optimizer.";
return false;
@@ -104,6 +105,7 @@ bool InputCheck(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(in_cnode);
auto in_node_name = AnfAlgo::GetCNodeName(in_cnode);
auto trans_input = AnfAlgo::VisitKernel(in_node, 0).first;
MS_EXCEPTION_IF_NULL(trans_input);
if (in_node_name == kTransDataOpName && (trans_input->isa<Parameter>() || trans_input->isa<ValueNode>())) {
MS_LOG(INFO) << "Data->TransData->split, can not optimizer.";
return false;


+ 2
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc View File

@@ -38,6 +38,7 @@ const int64_t kAxisDim = 4;
const std::map<std::string, ConvertFunction> kReduceConvertMap = {{kOpFormat_FRAC_Z, ConvertReduceAttrFraczAnd6HD},
{kOpFormat_C1HWNCoC0, ConvertReduceAttrFraczAnd6HD}};
void SafeCheckFunction(const CNodePtr &cnode, const std::vector<int64_t> &reduce_axis) {
MS_EXCEPTION_IF_NULL(cnode);
if (reduce_axis.empty()) {
MS_LOG(EXCEPTION) << "The node " << cnode->DebugString() << "'s reduce axis got a empty vector";
}
@@ -65,6 +66,7 @@ void DynamicAttrUpdate(const AnfNodePtr &node) {
}

void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
auto axis = kernel::GetReduceAttrAxis(cnode);
std::vector<int64_t> convert_axis;
SafeCheckFunction(cnode, axis);


+ 1
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc View File

@@ -91,6 +91,7 @@ const AnfNodePtr CheckConsistency::Process(const FuncGraphPtr &, const AnfNodePt
}

CNodePtr cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
size_t in_num = AnfAlgo::GetInputTensorNum(cnode);
for (size_t i = 0; i < in_num; ++i) {
if (!CheckFormatForConsistency(cnode, i) || !CheckDataTypeForConsistency(cnode, i)) {


+ 4
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.cc View File

@@ -50,14 +50,17 @@ const AnfNodePtr ConvertCastFormat::Process(const FuncGraphPtr &func_graph, cons
continue;
}
auto cast_node = input_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cast_node);
ChangeCastFormat(cast_node, func_graph);
}
return nullptr;
}

void ConvertCastFormat::SetCastFormat(const CNodePtr &cast_node, const string &format) const {
MS_EXCEPTION_IF_NULL(cast_node);
auto info_builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(cast_node));
MS_EXCEPTION_IF_NULL(info_builder);
info_builder->SetInputsFormat({format});
info_builder->SetOutputsFormat({format});
AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get());
@@ -65,6 +68,7 @@ void ConvertCastFormat::SetCastFormat(const CNodePtr &cast_node, const string &f

void ConvertCastFormat::ChangeCastFormat(const CNodePtr &cast_node, const FuncGraphPtr &func_graph) const {
MS_EXCEPTION_IF_NULL(cast_node);
MS_EXCEPTION_IF_NULL(func_graph);
auto input_node_name = AnfAlgo::GetCNodeName(cast_node);
if (input_node_name != prim::kPrimCast->name()) {
return;


+ 1
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc View File

@@ -42,6 +42,7 @@ const AnfNodePtr ConvertUnSupportNodeToAICPU::Process(const mindspore::FuncGraph
return nullptr;
} else if (supported_checker_->CheckAICPUSupported(node, kernel_builder_info)) {
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(kernel_builder_info);
MS_EXCEPTION_IF_NULL(builder);
builder->SetKernelType(AICPU_KERNEL);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), node);


+ 2
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc View File

@@ -58,11 +58,13 @@ CNodePtr Insert(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
new_transpose_node =
NewTransOpNode(func_graph, AnfAlgo::GetInputNode(transdata_node->cast<CNodePtr>(), 0), kernel_select, false,
prim::kPrimTranspose->name(), std::vector<int64_t>{2, 3, 1, 0});
MS_EXCEPTION_IF_NULL(new_transpose_node);
AnfAlgo::SetNodeAttr("nop_op", MakeValue(true), new_transpose_node);
RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, new_transpose_node);
// trans hwcn to output_format
new_transdata_node =
NewTransOpNode(func_graph, new_transpose_node, kernel_select, false, prim::kPrimTransData->name());
MS_EXCEPTION_IF_NULL(new_transdata_node);
RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, new_transdata_node, padding_axis);
new_transdata_node->set_abstract(transdata_node->abstract());
new_node = new_transdata_node;


+ 4
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.cc View File

@@ -120,6 +120,8 @@ bool CheckIndexOutput(const CNodePtr &node, const std::shared_ptr<kernel::Kernel
}

void ChangeNodeInferInfo(const CNodePtr &cnode, const CNodePtr &cast, const size_t cast_index) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(cast);
using Shape = std::vector<size_t>;
auto cast_dtype = AnfAlgo::GetOutputInferDataType(cast, 0);
auto cast_shape = AnfAlgo::GetOutputInferShape(cast, 0);
@@ -172,6 +174,7 @@ AnfNodePtr MergeCastToNextOp(const FuncGraphPtr &graph, const CNodePtr &node, co
return nullptr;
}
auto ori_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(next_node);
MS_EXCEPTION_IF_NULL(ori_kernel_info);
MS_LOG(INFO) << "Found alternative kernel info for current anf kernel " << next_cnode->DebugString()
<< "ori kernel info" << ori_kernel_info->ToString() << "alternative kernel info"
<< (*alternative_kernel_info)->ToString();
@@ -244,6 +247,7 @@ AnfNodePtr MergeCastToPriorOp(const FuncGraphPtr &graph, const CNodePtr &cur_nod
return nullptr;
}
auto ori_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(prior_op);
MS_EXCEPTION_IF_NULL(ori_kernel_info);
MS_LOG(INFO) << "Found alternative kernel info for current anf kernel " << prior_op->DebugString()
<< "ori kernel info" << ori_kernel_info->ToString() << "alternative kernel info"
<< (*kernel_info_it)->ToString();


+ 2
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.cc View File

@@ -118,6 +118,7 @@ void RectifyDoMaskKernelInfo::RectifyDropOutDoMaskKernelInfo(const std::vector<C
if (AnfAlgo::GetInputFormat(do_mask, 0) != format) {
auto builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(do_mask));
MS_EXCEPTION_IF_NULL(builder);
builder->SetInputFormat(format, 0);
builder->SetOutputFormat(format, 0);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), do_mask.get());
@@ -139,6 +140,7 @@ AnfNodePtr RectifyDoMaskKernelInfo::RectifyKernelInfoInPynativeProcess(const Anf
if (do_mask_input_format != kOpFormat_DEFAULT) {
auto builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
MS_EXCEPTION_IF_NULL(builder);
builder->SetInputFormat(kOpFormat_DEFAULT, 0);
builder->SetOutputFormat(kOpFormat_DEFAULT, 0);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());


+ 1
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/trans_op_format_refine.cc View File

@@ -40,6 +40,7 @@ const AnfNodePtr TransOpFormatRefine::Process(const FuncGraphPtr &func_graph, co
auto out_format = AnfAlgo::GetOutputFormat(node, 0);
auto builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
MS_EXCEPTION_IF_NULL(builder);
if (in_format == kOpFormat_DEFAULT && k3DFormatSet.find(out_format) != k3DFormatSet.end()) {
builder->SetInputsFormat({kOpFormat_NCDHW});
builder->SetOutputsFormat({out_format});


+ 7
- 3
mindspore/ccsrc/backend/optimizer/ascend/mindir/dropout_unify_mindir.cc View File

@@ -38,6 +38,7 @@ constexpr auto kSeed1 = "Seed1";
constexpr auto kUint8BitSize = 8;
constexpr int64_t kMaskAlignNum = 128;
constexpr int64_t kMaskMultiNum = 16;
constexpr size_t kDropoutGradInputTensorNum = 2;
constexpr size_t kFloat16Len = 2; // size of float16
constexpr size_t kInt64Len = 8; // size of int64

@@ -69,7 +70,9 @@ ValueNodePtr CreateKeepPorbValueNode(const FuncGraphPtr &func_graph, const AnfNo
if (!AnfAlgo::HasNodeAttr(kKeepProb, cnode)) {
MS_LOG(EXCEPTION) << "Dropout node does not have attr: keep_prob.";
}
if (AnfAlgo::GetCNodePrimitive(cnode)->ToString() == kDropoutOpName) {
auto prim = AnfAlgo::GetCNodePrimitive(cnode);
MS_EXCEPTION_IF_NULL(prim);
if (prim->ToString() == kDropoutOpName) {
if (!AnfAlgo::HasNodeAttr(kSeed0, cnode) || !AnfAlgo::HasNodeAttr(kSeed1, cnode)) {
MS_LOG(EXCEPTION) << "Dropout node does not have attr: seed0 or seed1.";
}
@@ -279,7 +282,7 @@ const AnfNodePtr DropoutUnifyMindIR0::Process(const FuncGraphPtr &func_graph, co
MS_EXCEPTION_IF_NULL(func_graph);
MS_EXCEPTION_IF_NULL(node);
auto tuple_cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(tuple_cnode);
CheckCNodeInputSize(tuple_cnode, kTupleGetItemInputTensorNum);
if (!NeedUpdate(tuple_cnode)) {
return nullptr;
}
@@ -332,6 +335,7 @@ const AnfNodePtr DropoutUnifyMindIR1::Process(const FuncGraphPtr &func_graph, co
auto inputx_type_id = GetInputXDataType(dropout_node);
auto keep_prob_value = CreateKeepPorbValueNode(func_graph, dropout_node, inputx_type_id);

CheckCNodeInputSize(dropout_node, kDropoutInputTensorNum);
auto dropout_input = dropout_node->input(kIndex1);
auto input_shape = GetDropoutInputShape(dropout_input);
// CreateDropoutGenMask
@@ -363,7 +367,7 @@ const AnfNodePtr DropoutGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
MS_EXCEPTION_IF_NULL(func_graph);
MS_EXCEPTION_IF_NULL(node);
auto dropout_grad_cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(dropout_grad_cnode);
CheckCNodeInputSize(dropout_grad_cnode, kDropoutGradInputTensorNum);

auto grad_input_type_id = GetInputXDataType(dropout_grad_cnode);
auto grad_input_shape = GetInputXShape(dropout_grad_cnode);


+ 2
- 4
mindspore/ccsrc/backend/optimizer/ascend/mindir/maxpool_with_argmax_unify_mindir.cc View File

@@ -26,6 +26,7 @@
namespace mindspore {
namespace opt {
namespace {
constexpr size_t kMaxPoolGradWithArgmaxInputTensorNum = 3;
constexpr size_t kMaxPoolGradWithArgmaxInputNum = 4;
constexpr size_t kMaxPoolWithArgmaxShape = 4;
constexpr size_t kAlignBytes = 16;
@@ -40,10 +41,7 @@ bool IsC(const BaseRef &n) {
}

CNodePtr GetMaxPoolWithArgmax(const CNodePtr &maxpool_grad_with_argmax) {
MS_EXCEPTION_IF_NULL(maxpool_grad_with_argmax);
if (maxpool_grad_with_argmax->inputs().size() != kMaxPoolGradWithArgmaxInputNum) {
MS_LOG(EXCEPTION) << "MaxPoolGradWithArgmax has wrong input size.";
}
CheckCNodeInputSize(maxpool_grad_with_argmax, kMaxPoolGradWithArgmaxInputTensorNum);
auto tuple_getitem0_anf = maxpool_grad_with_argmax->input(kIndex3);
MS_EXCEPTION_IF_NULL(tuple_getitem0_anf);
return tuple_getitem0_anf->cast<CNodePtr>();


+ 1
- 1
mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc View File

@@ -292,7 +292,7 @@ CNodePtr CreateTile(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_no
}
// feature map set
std::vector<size_t> feature_map_input_indexs;
feature_map_input_indexs.push_back(0);
feature_map_input_indexs.emplace_back(0);
AnfAlgo::SetNodeAttr(kIsFeatureMapInputList, MakeValue(feature_map_input_indexs), tile_node);
return tile_node;
}


+ 1
- 0
mindspore/ccsrc/backend/optimizer/pass/add_training_attr.cc View File

@@ -37,6 +37,7 @@ std::unordered_map<std::string, std::unordered_set<std::string>> MarkOp{
bool CheckOP(const FuncGraphManagerPtr &manager, const AnfNodePtr &cnode, const std::unordered_set<std::string> &set) {
for (const auto &node_index : manager->node_users()[cnode]) {
auto output = node_index.first;
MS_EXCEPTION_IF_NULL(output);
if (AnfAlgo::CheckPrimitiveType(output, prim::kPrimTupleGetItem)) {
if (CheckOP(manager, output, set)) {
return true;


+ 2
- 0
mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.cc View File

@@ -102,7 +102,9 @@ bool BackendCSE::CheckReplace(const AnfNodePtr &main, const AnfNodePtr &node, bo
if (main->isa<ValueNode>() && node->isa<ValueNode>()) {
auto main_value = GetValueNode(main);
MS_EXCEPTION_IF_NULL(main_value);
auto node_value = GetValueNode(node);
MS_EXCEPTION_IF_NULL(node_value);
if (main_value->isa<Primitive>() && node_value->isa<Primitive>()) {
return false;
} else if (main_value->isa<tensor::Tensor>() && node_value->isa<tensor::Tensor>()) {


+ 6
- 0
mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc View File

@@ -52,6 +52,9 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CommunicationOpInfo &co
rank_size = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrRankSize);
}
size_t rank_size_t = LongToSize(rank_size);
if (rank_size_t == 0) {
MS_LOG(EXCEPTION) << "Rank size should not be zero.";
}
MS_EXCEPTION_IF_NULL(cnode);
size_t input_num = AnfAlgo::GetInputTensorNum(cnode);
for (size_t input_index = 0; input_index < input_num; ++input_index) {
@@ -336,6 +339,9 @@ AnfNodePtr CommunicationOpFusion::CreateFusedCommunicationOp(const FuncGraphPtr
rank_size = AnfAlgo::GetNodeAttr<int64_t>(final_node, kAttrRankSize);
}
size_t rank_size_t = LongToSize(rank_size);
if (rank_size_t == 0) {
MS_LOG(EXCEPTION) << "Rank size should not be zero.";
}
size_t output_num = node_num * rank_size_t;
std::vector<TypeId> dtypes(output_num, AnfAlgo::GetOutputInferDataType(final_node, 0));
std::vector<std::vector<size_t>> shapes;


+ 1
- 0
mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.cc View File

@@ -31,6 +31,7 @@ const size_t strides_index = 5;

bool GetStridesValues(const CNodePtr &strided_slice_grad, ValuePtrList *strides_values) {
MS_EXCEPTION_IF_NULL(strided_slice_grad);
MS_EXCEPTION_IF_NULL(strides_values);
constexpr size_t kSizeChange = 6;
if (strided_slice_grad->size() < kSizeChange) {
MS_LOG(DEBUG) << "Op strided_slice_grad's inputs size less than 6, graph not changed";


+ 1
- 0
mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc View File

@@ -31,6 +31,7 @@ int64_t SplitTupleInputs(const FuncGraphPtr &graph, const AnfNodePtr &tuple_inpu
std::vector<AnfNodePtr> *plant_inputs) {
if (!AnfAlgo::IsTupleOutput(tuple_input)) {
auto abs = tuple_input->abstract();
MS_EXCEPTION_IF_NULL(abs);
MS_LOG(WARNING) << "The Function only split the output type is tuple type but got" << abs->ToString();
return -1;
}


+ 3
- 0
mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.cc View File

@@ -109,6 +109,9 @@ const AnfNodePtr ProcessMatchedNodes(const FuncGraphPtr &func_graph, const CNode
(void)manager->Replace(prev_cnode, prev_cnode->input(1));
return cnode->input(1);
} else { // rebuild the pass nodes
if (pass_size < kOffset) {
MS_LOG(ERROR) << "pass_size should >= 2";
}
for (size_t idx = pass_size - kOffset; idx > 0; --idx) {
auto new_node = func_graph->NewCNode((*pass_vector)[idx].first->inputs());
if (idx == pass_size - kOffset) {


+ 6
- 0
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc View File

@@ -68,6 +68,7 @@ bool IsRealKernelCNode(const CNodePtr &cnode) {
prim::kPrimReturn, prim::kPrimPartial, prim::kPrimDepend,
prim::kPrimUpdateState, prim::kPrimLoad};
#endif
MS_EXCEPTION_IF_NULL(cnode);
if (cnode->inputs().empty()) {
MS_LOG(EXCEPTION) << "Illegal null input of cnode(%s)" << cnode->DebugString();
}
@@ -1282,6 +1283,8 @@ void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector<TypeId> &
}
// copy an abstract of a node to another node
void AnfRuntimeAlgorithm::CopyAbstract(const AnfNodePtr &from_node, AnfNode *to_node) {
MS_EXCEPTION_IF_NULL(from_node);
MS_EXCEPTION_IF_NULL(to_node);
to_node->set_abstract(from_node->abstract());
}

@@ -1555,6 +1558,7 @@ bool AnfRuntimeAlgorithm::IsFeatureMapOutput(const AnfNodePtr &node) {
}

bool AnfRuntimeAlgorithm::IsFeatureMapInput(const AnfNodePtr &node, size_t input_index) {
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "Cannot input a parameter or a valuenode to charge it's input if is a feature map"
<< " trace: " << trace::DumpSourceLines(node);
@@ -1735,6 +1739,7 @@ bool AnfRuntimeAlgorithm::IsSwitchCall(const CNodePtr &call_node) {
<< " trace: " << trace::DumpSourceLines(call_node);
}
auto input1 = call_node->input(1);
MS_EXCEPTION_IF_NULL(input1);
if (input1->isa<ValueNode>()) {
return false;
} else if (input1->isa<CNode>() && AnfAlgo::CheckPrimitiveType(input1, prim::kPrimSwitch)) {
@@ -1881,6 +1886,7 @@ TypeId AnfRuntimeAlgorithm::GetCNodeOutputPrecision(const AnfNodePtr &node) {
}

TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx) {
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(EXCEPTION) << node->DebugString() << ", input node is not CNode."
<< " trace: " << trace::DumpSourceLines(node);


+ 3
- 0
mindspore/ccsrc/backend/session/executor.cc View File

@@ -419,6 +419,9 @@ void Executor::RunOp(const SessionPtr &session, OpRunInfo *op_run_info, const Gr
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask) {
MS_EXCEPTION_IF_NULL(session);
MS_EXCEPTION_IF_NULL(input_tensors);
MS_EXCEPTION_IF_NULL(outputs);
MS_EXCEPTION_IF_NULL(op_run_info);
auto ms_context = MsContext::GetInstance();
auto target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
if (target == kGPUDevice) {


+ 1
- 1
mindspore/ccsrc/backend/session/kernel_build_client.h View File

@@ -141,7 +141,7 @@ class KernelBuildClient {
std::shared_ptr<DuplexPipe> dp_;
};

static std::string GetScriptFilePath(const std::string cmd_env, const std::string &cmd_script,
static std::string GetScriptFilePath(const std::string &cmd_env, const std::string &cmd_script,
const std::string &server_script) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);


+ 17
- 4
mindspore/ccsrc/backend/session/kernel_graph.cc View File

@@ -76,6 +76,7 @@ std::vector<AnfNodePtr> GetCallRealOutputs(const AnfNodePtr &call_node) {
std::vector<AnfNodePtr> real_inputs;
auto child_graphs = AnfAlgo::GetCallSwitchKernelGraph(node->cast<CNodePtr>());
for (const auto &child_graph : child_graphs) {
MS_EXCEPTION_IF_NULL(child_graph);
auto real_input = child_graph->output();
auto child_real_inputs = GetCallRealOutputs(real_input);
std::copy(child_real_inputs.begin(), child_real_inputs.end(), std::back_inserter(real_inputs));
@@ -138,11 +139,13 @@ std::string GetNodeGroup(const AnfNodePtr &node) {
} // namespace

AnfNodePtr KernelGraph::MakeValueNode(const AnfNodePtr &node) const {
MS_EXCEPTION_IF_NULL(node);
auto value_node = node->cast<ValueNodePtr>();
if (value_node == nullptr) {
return nullptr;
}
ValueNodePtr new_value_node = std::make_shared<ValueNode>(value_node->value());
MS_EXCEPTION_IF_NULL(new_value_node);
new_value_node->set_abstract(value_node->abstract());
this->SetKernelInfoForNode(new_value_node);
return new_value_node;
@@ -331,7 +334,7 @@ void KernelGraph::GetLoopNodesByDFS(const AnfNodePtr &node, uint32_t *loop_num)
return;
}
(void)visited_nodes_.insert(node);
for (auto input_edge : node_input_edges_[node]) {
for (auto &input_edge : node_input_edges_[node]) {
size_t input_num = node_input_num_[input_edge.first];
if (input_num == 0) {
continue;
@@ -366,9 +369,9 @@ void KernelGraph::GetLoopNodesByDFS(const AnfNodePtr &node, uint32_t *loop_num)
}
}

uint32_t KernelGraph::GetLoopNum(std::map<AnfNodePtr, size_t> none_zero_nodes) {
uint32_t KernelGraph::GetLoopNum(const std::map<AnfNodePtr, size_t> &none_zero_nodes) {
uint32_t loop_num = 0;
for (auto iter : none_zero_nodes) {
for (auto &iter : none_zero_nodes) {
auto node = iter.first;
MS_EXCEPTION_IF_NULL(node);
if (node_input_num_[node] == 0) {
@@ -477,12 +480,14 @@ void KernelGraph::ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const {
<< cnode->DebugString();
}
auto input_node = AnfAlgo::GetInputNode(cnode, 0);
MS_EXCEPTION_IF_NULL(input_node);
auto assign_value_node = AnfAlgo::GetInputNode(cnode, 1);
if (AnfAlgo::IsFeatureMapOutput(input_node)) {
return;
}
if (!AnfAlgo::IsFeatureMapOutput(input_node) && AnfAlgo::IsFeatureMapOutput(assign_value_node)) {
auto kernel_info = dynamic_cast<device::KernelInfo *>(input_node->kernel_info());
MS_EXCEPTION_IF_NULL(kernel_info);
kernel_info->set_feature_map_flag(true);
}
}
@@ -490,6 +495,7 @@ void KernelGraph::ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const {
void KernelGraph::SetKernelInfoForNode(const AnfNodePtr &node) const {
MS_EXCEPTION_IF_NULL(node);
auto kernel_info = std::make_shared<device::KernelInfo>();
MS_EXCEPTION_IF_NULL(kernel_info);
node->set_kernel_info(kernel_info);
if (node->isa<CNode>()) {
if (kOpAssignKernelNameList.find(AnfAlgo::GetCNodeName(node)) != kOpAssignKernelNameList.end()) {
@@ -520,6 +526,7 @@ void KernelGraph::SetKernelInfoForNode(const AnfNodePtr &node) const {
return;
}
auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
MS_EXCEPTION_IF_NULL(kernel_build_info_builder);
// set the format of value_node to DEFAULT_FORMAT
std::vector<TypeId> types;
std::vector<std::string> formats = {kOpFormat_DEFAULT};
@@ -589,6 +596,7 @@ ValueNodePtr KernelGraph::NewValueNode(const AbstractBasePtr &abstract, const Va
MS_EXCEPTION_IF_NULL(abstract);
MS_EXCEPTION_IF_NULL(value);
ValueNodePtr new_value_node = std::make_shared<ValueNode>(value);
MS_EXCEPTION_IF_NULL(new_value_node);
new_value_node->set_abstract(abstract);
SetKernelInfoForNode(new_value_node);
AnfAlgo::SetGraphId(graph_id(), new_value_node.get());
@@ -610,7 +618,7 @@ ValueNodePtr KernelGraph::NewValueNode(const tensor::TensorPtr &input_tensor) {
return input_value_node;
}

AnfNodePtr KernelGraph::TransValueNodeTuple(const AbstractBasePtr abstract, const ValuePtr &value) {
AnfNodePtr KernelGraph::TransValueNodeTuple(const AbstractBasePtr &abstract, const ValuePtr &value) {
MS_EXCEPTION_IF_NULL(abstract);
MS_EXCEPTION_IF_NULL(value);
if (!abstract->isa<abstract::AbstractTuple>()) {
@@ -632,6 +640,7 @@ AnfNodePtr KernelGraph::TransValueNodeTuple(const AbstractBasePtr abstract, cons
make_tuple_inputs.push_back(TransValueNodeTuple((*tuple_abstract)[index], (*value_tuple)[index]));
}
auto make_tuple = NewCNode(make_tuple_inputs);
MS_EXCEPTION_IF_NULL(make_tuple);
make_tuple->set_abstract(tuple_abstract);
return make_tuple;
}
@@ -721,6 +730,7 @@ void KernelGraph::FrontBackendlMapAdd(const AnfNodePtr &front_anf, const AnfNode
auto front_node = front_anf->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(front_node);
auto attr_input = front_node->input(kAnfPrimitiveIndex);
MS_EXCEPTION_IF_NULL(attr_input);
if (!attr_input->isa<CNode>()) {
MS_LOG(EXCEPTION) << "Kernel " << backend_anf->DebugString() << "has been exist in the backend_front_anf_map_";
}
@@ -959,6 +969,7 @@ bool KernelGraph::IsLeafGraph() const { return child_graph_order_.empty(); }
std::vector<CNodePtr> KernelGraph::FindNodeByPrimitive(const PrimitivePtr &primitive) const {
std::vector<CNodePtr> result;
for (const auto &anf : execution_order_) {
MS_EXCEPTION_IF_NULL(anf);
if (AnfAlgo::CheckPrimitiveType(anf, primitive) && AnfAlgo::GetGraphId(anf.get()) == graph_id_) {
result.push_back(anf->cast<CNodePtr>());
}
@@ -969,6 +980,7 @@ std::vector<CNodePtr> KernelGraph::FindNodeByPrimitive(const PrimitivePtr &primi
std::vector<CNodePtr> KernelGraph::FindNodeByPrimitive(const std::vector<PrimitivePtr> &primitive_list) const {
std::vector<CNodePtr> result;
for (const auto &anf : execution_order_) {
MS_EXCEPTION_IF_NULL(anf);
for (const auto &primitive : primitive_list) {
if (AnfAlgo::CheckPrimitiveType(anf, primitive) && AnfAlgo::GetGraphId(anf.get()) == graph_id_) {
result.push_back(anf->cast<CNodePtr>());
@@ -1310,6 +1322,7 @@ void KernelGraph::UpdateChildGraphOrder() {
}

void KernelGraph::RemoveNodeFromGraph(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
if (backend_front_anf_map_.find(node) != backend_front_anf_map_.end()) {
auto front_node = backend_front_anf_map_[node];
(void)backend_front_anf_map_.erase(node);


+ 2
- 2
mindspore/ccsrc/backend/session/kernel_graph.h View File

@@ -389,14 +389,14 @@ class KernelGraph : public FuncGraph {
// add node depend edge by data edge
void AddDependEdge(const AnfNodePtr &node, const AnfNodePtr &input, size_t depend_edge_num);
std::vector<AnfNodePtr> GetOutputNodes(const AnfNodePtr &node);
AnfNodePtr TransValueNodeTuple(const AbstractBasePtr abstract, const ValuePtr &value);
AnfNodePtr TransValueNodeTuple(const AbstractBasePtr &abstract, const ValuePtr &value);
AnfNodePtr TransParameterTuple(const AbstractBasePtr &abstract);
AnfNodePtr TransCNodeTuple(const CNodePtr &node);
AnfNodePtr CreatTupleGetItemNode(const AnfNodePtr &node, size_t output_idx);
std::vector<CNodePtr> SortStartLabelAndEndGoto();
// checkout whether loop exist in graph
void CheckLoop();
uint32_t GetLoopNum(std::map<AnfNodePtr, size_t> none_zero_nodes);
uint32_t GetLoopNum(const std::map<AnfNodePtr, size_t> &none_zero_nodes);
void GetLoopNodesByDFS(const AnfNodePtr &node, uint32_t *loop_num);

// members


+ 1
- 0
mindspore/ccsrc/backend/session/single_kernel_graph.cc View File

@@ -27,6 +27,7 @@ std::shared_ptr<session::KernelGraph> SingleKernelGraph::ConstructKernelGraphBas
const std::string &op_name, const std::vector<TypeId> &input_dtypes, const std::vector<ShapeVector> &input_shapes,
const std::vector<TypeId> &output_dtypes, const std::vector<std::vector<size_t>> &output_shapes) {
auto graph = std::make_shared<session::KernelGraph>();
MS_EXCEPTION_IF_NULL(graph);
std::vector<AnfNodePtr> inputs;
// set input[0]
PrimitivePtr op_prim = std::make_shared<Primitive>(op_name);


+ 1
- 0
mindspore/ccsrc/debug/data_dump/dump_json_parser.cc View File

@@ -599,6 +599,7 @@ void DumpJsonParser::UpdateNeedDumpKernels(const session::KernelGraph &kernel_gr
for (size_t i = 0; i < input_size; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
auto input = input_with_index.first;
MS_EXCEPTION_IF_NULL(input);
if (input->isa<CNode>()) {
MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << GetKernelNodeName(kernel)
<< " Input:" << GetKernelNodeName(input);


+ 6
- 0
mindspore/ccsrc/debug/data_dump/dump_utils.cc View File

@@ -60,10 +60,12 @@ void GetFileKernelName(NotNull<std::string *> kernel_name) {
}

void SetConstNodeId(const AnfNodePtr &node, std::map<std::string, size_t> *const_map) {
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<ValueNode>()) {
return;
}
std::string node_name = GetKernelNodeName(node);
MS_EXCEPTION_IF_NULL(const_map);
auto iter = const_map->find(node_name);
if (iter == const_map->end()) {
auto const_idx = const_map->size() + 1;
@@ -72,6 +74,7 @@ void SetConstNodeId(const AnfNodePtr &node, std::map<std::string, size_t> *const
}

void GetCNodeConstantId(const CNodePtr &node, std::map<std::string, size_t> *const_map) {
MS_EXCEPTION_IF_NULL(node);
auto &inputs = node->inputs();
if (inputs.empty()) {
MS_LOG(EXCEPTION) << "Inputs of apply node is empty";
@@ -79,6 +82,7 @@ void GetCNodeConstantId(const CNodePtr &node, std::map<std::string, size_t> *con
AnfNodePtr op = inputs[0];

// CNode/ConstGraph/Const/Parameter
MS_EXCEPTION_IF_NULL(op);
if (op->isa<CNode>() || IsValueNode<FuncGraph>(op) || op->isa<Parameter>()) {
MS_LOG(WARNING) << "Operator must be a primitive.";
} else {
@@ -90,6 +94,7 @@ void GetCNodeConstantId(const CNodePtr &node, std::map<std::string, size_t> *con
}

void GetConstantId(const session::KernelGraph *graph, std::map<std::string, size_t> *const_map) {
MS_EXCEPTION_IF_NULL(graph);
std::vector<AnfNodePtr> nodes = TopoSort(graph->get_return(), SuccIncoming, AlwaysInclude);
for (const AnfNodePtr &node : nodes) {
MS_EXCEPTION_IF_NULL(node);
@@ -97,6 +102,7 @@ void GetConstantId(const session::KernelGraph *graph, std::map<std::string, size
continue;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (cnode != graph->get_return()) {
GetCNodeConstantId(cnode, const_map);
} else {


+ 2
- 0
mindspore/ccsrc/debug/data_dump/e2e_dump.cc View File

@@ -197,6 +197,7 @@ void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::st
uint32_t stream_id = 0;
std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j);
MS_EXCEPTION_IF_NULL(addr);
if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, tensor_name, *addr, int_shapes, type, device_type, trans_flag, slot, debugger);
} else {
@@ -215,6 +216,7 @@ void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_
std::string node_name = GetKernelNodeName(anf_node);
std::string dump_name = node_name;
if (anf_node->isa<ValueNode>()) {
MS_EXCEPTION_IF_NULL(const_map);
auto iter = const_map->find(node_name);
if (iter == const_map->end()) {
return;


+ 9
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc View File

@@ -63,6 +63,10 @@ void AscendBucket::AllocateAllReduceAddr() {

// generate memecpy output addr
uint8_t *memcpy_output = ar_input_addr_;
if (origin_size_list.size() < bucket_size_ || align_size_list_.size() < bucket_size_) {
MS_LOG(EXCEPTION) << "Invalid bucket_size_:" << bucket_size_ << " origin_size_list.size:" << origin_size_list.size()
<< " align_size_list.size:" << align_size_list_.size();
}
for (size_t i = 0; i < bucket_size_; ++i) {
memcpy_output_addrs_.emplace_back(std::make_shared<kernel::Address>(memcpy_output, origin_size_list[i]));
memcpy_output += align_size_list_[i];
@@ -95,6 +99,11 @@ void AscendBucket::FreeAllDeviceMem() {
void AscendBucket::CopyTensorToContiguousMemory() {
// clear allreduce input addr
CleanAllReduceInputAddr();
if (memcpy_input_addrs_.size() < bucket_size_ || memcpy_output_addrs_.size() < bucket_size_) {
MS_LOG(EXCEPTION) << "Invalid bucket_size_:" << bucket_size_
<< " memcpy_input_addr_.size:" << memcpy_input_addrs_.size()
<< " memcpy_output_addr_.size:" << memcpy_output_addrs_.size();
}
for (size_t i = 0; i < bucket_size_; ++i) {
MS_EXCEPTION_IF_NULL(memcpy_input_addrs_[i]);
MS_EXCEPTION_IF_NULL(memcpy_output_addrs_[i]);


+ 8
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc View File

@@ -239,6 +239,7 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size
}

std::vector<size_t> AscendDeviceAddress::GetDeviceShape(std::vector<size_t> *host_shape) const {
MS_EXCEPTION_IF_NULL(host_shape);
std::vector<size_t> device_shape;
auto node_index = GetNodeIndex();
if (format_ == kOpFormat_FRAC_NZ || format_ == kOpFormat_NCDHW) {
@@ -504,6 +505,7 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::
std::string path = filepath + '.' + host_fmt;
MS_LOG(INFO) << "E2E Dump path is " << path;
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
MS_EXCEPTION_IF_NULL(out_tensor);
size_t host_size = out_tensor->data().nbytes();
ret = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c());
if (!ret) {
@@ -531,17 +533,21 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec
const ShapeVector &host_shape, TypeId host_type, size_t slot,
bool keep_prev) const {
bool ret = false;
if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) {
auto debugger = Debugger::GetInstance();
MS_EXCEPTION_IF_NULL(debugger);
if (debugger->TensorExistsInCurrent(tensor_name)) {
MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again.";
return true;
}
// TensorData is freed up in AscendSession class
auto tensor_data = std::make_shared<mindspore::TensorData>();
MS_EXCEPTION_IF_NULL(tensor_data);
tensor_data->SetName(tensor_name);
tensor_data->SetExecutionOrder(execution_order);
tensor_data->SetSlot(slot);

mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
MS_EXCEPTION_IF_NULL(out_tensor);
size_t host_size = out_tensor->data().nbytes();
auto ret_sync = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c());
if (!ret_sync) {
@@ -554,7 +560,7 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec
tensor_data->SetByteSize(LongToSize(out_tensor->data().nbytes()));
tensor_data->SetType((unsigned int)host_type);
tensor_data->SetShape(out_tensor->shape());
ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev);
ret = debugger->LoadNewTensor(tensor_data, keep_prev);
return ret;
}
#endif


+ 9
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc View File

@@ -255,6 +255,7 @@ void AscendKernelRuntime::ReportProfilingData() {
void AscendKernelRuntime::ReleaseDeviceRes() {
MS_LOG(INFO) << "Ascend finalize start";
#ifdef ENABLE_DEBUGGER
MS_EXCEPTION_IF_NULL(debugger_);
if (debugger_ && debugger_->debugger_enabled()) {
debugger_->SetTrainingDone(true);
bool ret = debugger_->SendMetadata(false);
@@ -373,6 +374,7 @@ bool AscendKernelRuntime::Init() {
bool AscendKernelRuntime::LoadData(const session::KernelGraph &graph) {
#ifdef ENABLE_DEBUGGER
MS_LOG(INFO) << "Start load step";
MS_EXCEPTION_IF_NULL(debugger_);
for (const auto &graph_ptr : debugger_->GetGraphPtrList()) {
debugger_->SetGraphPtr(graph_ptr);
// load output
@@ -594,6 +596,7 @@ void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) {

void AscendKernelRuntime::TaskFailCallback(rtExceptionInfo *task_fail_info) {
MS_EXCEPTION_IF_NULL(task_fail_info);
MS_EXCEPTION_IF_NULL(current_graph_);
static std::mutex exception_mutex;
constexpr uint32_t kOverflowThreshold = 5;
std::lock_guard<std::mutex> lock(exception_mutex);
@@ -628,12 +631,15 @@ CNodePtr AscendKernelRuntime::GetErrorNodeName(uint32_t streamid, uint32_t taski
}
auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(current_graph_->graph_id());
for (const auto &iter : runtime_info_map) {
MS_EXCEPTION_IF_NULL(iter.second);
auto task_id = std::get<kTupleTaskId>(*iter.second);
auto stream_id = std::get<kTupleStreamId>(*iter.second);
if (task_id == taskid && stream_id == streamid) {
auto &execute_node = current_graph_->execution_order();
auto node = std::find_if(execute_node.begin(), execute_node.end(),
[&iter](const auto &node) { return node->UniqueName() == iter.first; });
auto node = std::find_if(execute_node.begin(), execute_node.end(), [&iter](const auto &node) {
MS_EXCEPTION_IF_NULL(node);
return node->UniqueName() == iter.first;
});
if (node != execute_node.end()) {
return *node;
}
@@ -1214,6 +1220,7 @@ int AscendKernelRuntime::DeleteDumpFile(std::string path) {
if (path[path.size() - 1] != '/') {
path = path + "/";
}
MS_EXCEPTION_IF_NULL(dirinfo);
filepath = path + dirinfo->d_name;
if (strcmp(dirinfo->d_name, ".") == 0 || strcmp(dirinfo->d_name, "..") == 0) continue;
result = DeleteDumpFile(filepath);


+ 3
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.cc View File

@@ -51,6 +51,7 @@ static void UpdateLabelSwitch(NotNull<CNodePtr> node) {
std::vector<uint32_t> label_list;
for (size_t i = kLabelSwitchLabelId; i < node->size(); ++i) {
auto input = node->input(i);
MS_EXCEPTION_IF_NULL(input);
if (!input->isa<CNode>() || AnfAlgo::GetCNodeName(input) != kLabelSetOpName) {
break;
}
@@ -74,6 +75,7 @@ static void AssignLabelForLabelSet(NotNull<std::shared_ptr<session::KernelGraph>
const auto &nodes = graph->execution_order();

for (auto &node : nodes) {
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
continue;
}
@@ -104,6 +106,7 @@ static void AssignLabelForGotoSwitch(NotNull<std::shared_ptr<session::KernelGrap

const auto &nodes = graph->execution_order();
for (auto &node : nodes) {
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
continue;
}


+ 1
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc View File

@@ -201,6 +201,7 @@ void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &grap
MemoryManager::MallocSomasDynamicMem(graph);
#ifndef ENABLE_SECURITY
if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable()) {
MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_);
somas_reuse_util_ptr_->ConvertToProfilingNode(graph.graph_id());
}
#endif


+ 1
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc View File

@@ -106,6 +106,7 @@ bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
void AscendMemoryPool::ResetIdleMemBuf() {
auto idle_mem_buf_map = DynamicMemPoolBestFit::global_idle_mem_buf_map();
for (auto &it : idle_mem_buf_map) {
MS_EXCEPTION_IF_NULL(it.second);
(void)rtMemset(it.second->device_addr_, it.first, 0, it.first);
}
}


+ 24
- 3
mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc View File

@@ -152,6 +152,7 @@ uint32_t GetHcomTaskNum(const CNodePtr &cnode) {
}

CNodePtr GetHcomAndOverflowMarker(const NotNull<KernelGraphPtr> &graph_ptr, vector<CNodePtr> *hcom_nodes) {
MS_EXCEPTION_IF_NULL(hcom_nodes);
auto cnode_ptr_list = graph_ptr->execution_order();
CNodePtr overflow_marker = nullptr;
std::string kNPUGetFloatStatusOpName = "NPUGetFloatStatus";
@@ -322,6 +323,7 @@ void AscendStreamAssign::ReorderIndependentOrders(const NotNull<KernelGraphPtr>

void AscendStreamAssign::CheckScenario(const NotNull<KernelGraphPtr> &graph_ptr,
vector<CNodePtr> *last_grad_and_status) {
MS_EXCEPTION_IF_NULL(last_grad_and_status);
auto cnode_ptr_list = graph_ptr->execution_order();
vector<CNodePtr> hcom_nodes;
auto overflow_marker = GetHcomAndOverflowMarker(graph_ptr, &hcom_nodes);
@@ -376,6 +378,8 @@ CNodePtr AscendStreamAssign::GetCNodesNeededMoved(vector<CNodePtr> *moved_backwa
vector<CNodePtr> *moved_forward_cnodes,
const vector<CNodePtr> &last_grad_and_status,
const NotNull<KernelGraphPtr> &graph_ptr) {
MS_EXCEPTION_IF_NULL(moved_backward_cnodes);
MS_EXCEPTION_IF_NULL(moved_forward_cnodes);
auto cnode_ptr_list = graph_ptr->execution_order();
if (last_grad_and_status.size() != kLastGradAndStatusNum) {
return nullptr;
@@ -618,7 +622,8 @@ void AscendStreamAssign::AssignAllNodesStream(const NotNull<KernelGraphPtr> &gra
AssignIndependent(graph_ptr);
}
auto independent_stream_num = resource_manager.get_cur_stream_num() - common_stream_num - hcom_stream_num;
auto total_stream_num = resource_manager.get_cur_stream_num() + hcom_stream_num * kHcomSecondaryStreamNum;
auto total_stream_num =
resource_manager.get_cur_stream_num() + Uint32tMulWithOverflowCheck(hcom_stream_num, kHcomSecondaryStreamNum);
MS_LOG(INFO) << "Total stream number: " << total_stream_num << ", common stream number: " << common_stream_num
<< ", hcom stream number: " << hcom_stream_num << "*" << (kHcomSecondaryStreamNum + 1)
<< ", independent stream number: " << independent_stream_num << ".";
@@ -728,7 +733,7 @@ uint32_t AscendStreamAssign::AssignHcomStreamId(const CNodePtr &cur_cnode_ptr, b
} else {
if (it->second <= kMaxTaskNumPerStream - task_num) {
AnfAlgo::SetStreamId(it->first, cur_cnode_ptr.get());
it->second += task_num;
it->second = Uint32tAddWithOverflowCheck(it->second, task_num);
} else {
cur_hcom_stream_id = resource_manager.ApplyNewStream();
AnfAlgo::SetStreamId(cur_hcom_stream_id, cur_cnode_ptr.get());
@@ -743,6 +748,7 @@ void AscendStreamAssign::AssignIndependent(const NotNull<KernelGraphPtr> &graph_
std::map<uint32_t, std::vector<CNodePtr>> graph_nodes_map;
for (size_t i = 0; i < cnode_ptr_list.size(); ++i) {
CNodePtr cur_cnode_ptr = cnode_ptr_list[i];
MS_EXCEPTION_IF_NULL(cur_cnode_ptr);
if (AnfAlgo::GetStreamId(cur_cnode_ptr) != kInvalidStreamId) {
continue;
}
@@ -1176,6 +1182,8 @@ bool AscendStreamAssign::IsProcessedStream(uint32_t stream_id) {
}

bool AscendStreamAssign::IsAllOutGraphOut(const KernelGraphPtr &graph, const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(cnode);
auto cnode_out_num = AnfAlgo::GetOutputTensorNum(cnode);
auto nodes = AnfAlgo::GetAllOutput(graph->output(), {prim::kPrimTupleGetItem});
std::set<int> output_index_set;
@@ -1238,6 +1246,7 @@ void AscendStreamAssign::InsertEventCommonDependHcom(const NotNull<KernelGraphPt
}

if (target == cnodes.end()) {
MS_EXCEPTION_IF_NULL(*(it - 1));
MS_LOG(WARNING) << "Hcom node:" << (*(it - 1))->fullname_with_scope()
<< ", can't find target for insert recv op, no insert send/recv";
it = cnodes.erase(it);
@@ -1361,12 +1370,14 @@ vector<CNodePtr> AscendStreamAssign::GetLastInputCnode(const NotNull<KernelGraph
}

vector<CNodePtr> AscendStreamAssign::GetInputKernels(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
vector<CNodePtr> input_cnodes;
queue<CNodePtr> nop_nodes;
auto inputs = cnode->inputs();
for (size_t i = 1; i < inputs.size(); i++) {
auto real_input = AnfAlgo::VisitKernel(inputs[i], 0);
auto node = real_input.first;
MS_EXCEPTION_IF_NULL(node);
if (opt::IsNopNode(node)) {
nop_nodes.push(node->cast<CNodePtr>());
while (!nop_nodes.empty()) {
@@ -1376,6 +1387,7 @@ vector<CNodePtr> AscendStreamAssign::GetInputKernels(const CNodePtr &cnode) {
for (size_t j = 1; j < new_inputs.size(); j++) {
auto new_real_input = AnfAlgo::VisitKernel(new_inputs[j], 0);
auto new_node = new_real_input.first;
MS_EXCEPTION_IF_NULL(new_node);
if (opt::IsNopNode(new_node)) {
nop_nodes.push(new_node->cast<CNodePtr>());
} else if (new_node->isa<CNode>()) {
@@ -1494,6 +1506,9 @@ void AscendStreamAssign::InsertEventBetweenHcom(const NotNull<KernelGraphPtr> &g
AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
auto cnode_ptr_list = graph_ptr->execution_order();
uint32_t cur_event_id = resource_manager.ApplyNewEvent();
if (hcom_index.empty()) {
MS_LOG(EXCEPTION) << "Hcom stream number is empty";
}
size_t first_stream_last_index = hcom_index[0].second.back();
size_t last_stream_first_index = hcom_index.back().second.front();
MS_LOG(INFO) << "First stream last index:" << first_stream_last_index
@@ -1652,8 +1667,9 @@ void AscendStreamAssign::GetIndependentMaxTarget(const NotNull<KernelGraphPtr> &
auto inputs = target_node->inputs();
for (size_t m = 1; m < inputs.size(); m++) {
auto input = inputs[m];
MS_EXCEPTION_IF_NULL(input);
if (opt::IsNopNode(input)) {
CNodePtr cnode = input->cast<CNodePtr>();
auto cnode = input->cast<CNodePtr>();
auto new_inputs = cnode->inputs();
for (size_t k = 1; k < new_inputs.size(); k++) {
auto new_real_input = AnfAlgo::VisitKernel(new_inputs[k], 0);
@@ -1974,6 +1990,7 @@ vector<CNodePtr>::iterator AscendStreamAssign::FindTargetOp(vector<CNodePtr>::it
auto inputs = (*begin)->inputs();
for (size_t i = 1; i < inputs.size(); i++) {
auto input = inputs[i];
MS_EXCEPTION_IF_NULL(input);
if (opt::IsNopNode(input)) {
if (IsNopNodeTarget(input, node, *begin, exclude_hcom)) {
return begin;
@@ -2084,6 +2101,7 @@ bool AscendStreamAssign::IsVecExist(const std::vector<uint32_t> &group) {
}

void AscendStreamAssign::DFS(uint32_t start, std::vector<uint32_t> *group) {
MS_EXCEPTION_IF_NULL(group);
auto it = stream_relations_.find(start);
if (it == stream_relations_.end()) {
if (!IsVecExist(*group)) {
@@ -2169,6 +2187,9 @@ void AscendStreamAssign::GetStreamActiveStreamRelation(const NotNull<KernelGraph
}

auto orders = graph_ptr->execution_order();
if (index >= orders.size()) {
MS_LOG(EXCEPTION) << "Invalid index.";
}
auto cur_cnode = orders[index];
auto cur_stream_id = AnfAlgo::GetStreamId(cur_cnode);
auto active_list = AnfAlgo::GetNodeAttr<vector<uint32_t>>(cur_cnode, kAttrActiveStreamList);


+ 19
- 4
mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc View File

@@ -22,6 +22,7 @@
#include <limits>
#include "utility"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/convert_utils_base.h"
#include "runtime/mem.h"
#include "runtime/kernel.h"
#include "runtime/rt_model.h"
@@ -71,22 +72,31 @@ DataDumper::~DataDumper() {

#ifndef ENABLE_SECURITY
void DataDumper::GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const {
MS_EXCEPTION_IF_NULL(kernel_graph_);
for (const auto &kernel : kernel_graph_->execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL &&
DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) {
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
for (size_t i = 0; i < input_size; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
auto input = input_with_index.first;
MS_EXCEPTION_IF_NULL(input);
if (input->isa<CNode>()) {
MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope()
<< " Input:" << input->fullname_with_scope();
kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>());
auto it = kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>());
if (!it.second) {
MS_LOG(INFO) << "Node name already exist: " << input->fullname_with_scope();
}
}
}
} else if (KernelNeedDump(kernel)) {
MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope();
kernel_map->try_emplace(kernel->fullname_with_scope(), kernel);
auto it = kernel_map->try_emplace(kernel->fullname_with_scope(), kernel);
if (!it.second) {
MS_LOG(INFO) << "Node name already exist: " << kernel->fullname_with_scope();
}
}
}
}
@@ -276,6 +286,7 @@ void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo
task.set_end_graph(false);
task.set_task_id(debug_task_id_);
task.set_stream_id(debug_stream_id_);
MS_EXCEPTION_IF_NULL(task.mutable_op());
task.mutable_op()->set_op_name(kNodeNameOpDebug);
task.mutable_op()->set_op_type(kOpTypeOpDebug);

@@ -283,6 +294,7 @@ void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo
output.set_data_type(ge::proto::DataType::DT_UINT8);
output.set_format(ge::Format::FORMAT_ND);

MS_EXCEPTION_IF_NULL(output.mutable_shape());
output.mutable_shape()->add_dim(kOpDebugShape);

output.set_original_name(kNodeNameOpDebug);
@@ -293,7 +305,9 @@ void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo
output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_dump_args_)));
output.set_size(kOpDebugHostMemSize);

MS_EXCEPTION_IF_NULL(task.mutable_output());
task.mutable_output()->Add(std::move(output));
MS_EXCEPTION_IF_NULL(dump_info->mutable_task());
dump_info->mutable_task()->Add(std::move(task));
}

@@ -419,7 +433,7 @@ void DataDumper::DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<ai
MS_LOG(INFO) << "[DataDump] output " << i << " address size:" << output.size();
MS_EXCEPTION_IF_NULL(task->mutable_output());
task->mutable_output()->Add(std::move(output));
offset += sizeof(void *);
offset = SizetAddWithOverflowCheck(offset, sizeof(void *));
}
}

@@ -428,6 +442,7 @@ void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aic
MS_LOG(INFO) << "Skip dump input";
return;
}
MS_EXCEPTION_IF_NULL(kernel);
if (AnfAlgo::IsNodeInputContainMonad(kernel)) {
MS_LOG(WARNING) << "Skip Monad node:" << kernel->fullname_with_scope();
return;
@@ -462,7 +477,7 @@ void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aic
MS_LOG(INFO) << "[DataDump] input " << i << " address size:" << input.size();
MS_EXCEPTION_IF_NULL(task->mutable_input());
task->mutable_input()->Add(std::move(input));
offset += sizeof(void *);
offset = SizetAddWithOverflowCheck(offset, sizeof(void *));
}
}
#endif


+ 10
- 0
mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc View File

@@ -81,6 +81,7 @@ void AiCpuDynamicKernel::Initialize() {
if (is_dynamic_shape_) {
ext_info_handler_ =
std::make_shared<AicpuExtInfoHandler>(cnode->fullname_with_scope(), input_num_, output_num_, shape_type);
MS_EXCEPTION_IF_NULL(ext_info_handler_);
ext_info_handler_->Parse(ext_info_data_);
}

@@ -103,6 +104,7 @@ void AiCpuDynamicKernel::Initialize() {
}

auto aicpu_param_head = reinterpret_cast<kernel::AicpuParamHead *>(args_.data());
MS_EXCEPTION_IF_NULL(aicpu_param_head);
aicpu_param_head->extInfoLength = SizeToUint(ext_info_size_);
aicpu_param_head->extInfoAddr = reinterpret_cast<uint64_t>(ext_info_addr_dev_);
}
@@ -114,11 +116,13 @@ bool AiCpuDynamicKernel::UpdateInputOutputAddr() {
MS_EXCEPTION_IF_NULL(cnode);
for (size_t i = 0; i < input_num_; ++i) {
auto input_addr = AnfAlgo::GetPrevNodeOutputAddr(cnode, i);
MS_EXCEPTION_IF_NULL(input_addr);
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(input_addr->GetMutablePtr()));
}

for (size_t i = 0; i < output_num_; ++i) {
auto output_addr = AnfAlgo::GetOutputAddr(cnode, i);
MS_EXCEPTION_IF_NULL(output_addr);
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(output_addr->GetMutablePtr()));
}

@@ -128,6 +132,10 @@ bool AiCpuDynamicKernel::UpdateInputOutputAddr() {
}

auto io_ptr = args_.data() + sizeof(kernel::AicpuParamHead);
if (io_addrs.empty()) {
MS_LOG(ERROR) << "The io_addrs is empty";
return false;
}
auto ret =
memcpy_s(io_ptr, args_.size() - sizeof(kernel::AicpuParamHead), &io_addrs[0], sizeof(uint64_t) * io_addrs.size());
if (ret != 0) {
@@ -146,6 +154,7 @@ bool AiCpuDynamicKernel::UpdateExtInfo() {
return true;
}

MS_EXCEPTION_IF_NULL(ext_info_handler_);
for (size_t i = 0; i < input_num_; ++i) {
ext_info_handler_->UpdateInputShapeAndType(i, NOT_NULL(cnode));
}
@@ -171,6 +180,7 @@ bool AiCpuDynamicKernel::UpdateOutputShapeFromExtInfo() {
auto cnode = cnode_ptr_.lock();
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(INFO) << "UpdateOutputShapeFromExtInfo start. Op name " << cnode->fullname_with_scope();
MS_EXCEPTION_IF_NULL(ext_info_handler_);
auto ret = rtMemcpy(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_,
ext_info_size_, RT_MEMCPY_DEVICE_TO_HOST);
if (ret != RT_ERROR_NONE) {


+ 17
- 3
mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc View File

@@ -71,8 +71,8 @@ bool AicpuExtInfoHandler::Parse(const std::string &ext_info) {
<< " infoLen:" << aicpu_ext_info->infoLen;
break;
}
offset += sizeof(AicpuExtInfo);
offset += aicpu_ext_info->infoLen;
offset = SizetAddWithOverflowCheck(offset, sizeof(AicpuExtInfo));
offset = SizetAddWithOverflowCheck(offset, aicpu_ext_info->infoLen);
}

if (offset != ext_info_len_) {
@@ -84,6 +84,7 @@ bool AicpuExtInfoHandler::Parse(const std::string &ext_info) {
}

bool AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) {
MS_EXCEPTION_IF_NULL(aicpu_ext_info);
if (aicpu_ext_info->infoLen != sizeof(int32_t)) {
MS_LOG(ERROR) << "Node:" << node_name_ << " parse ext shape type failed as infoLen must be " << sizeof(int32_t)
<< " but got:" << aicpu_ext_info->infoLen;
@@ -120,6 +121,7 @@ bool AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) {

bool AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) {
auto need_len = output_num_ * sizeof(AicpuShapeAndType);
MS_EXCEPTION_IF_NULL(aicpu_ext_info);
if (aicpu_ext_info->infoLen != need_len) {
MS_LOG(INFO) << "Node:" << node_name_
<< " parse ext output shape failed, aicpu_ext_info->infoLen:" << aicpu_ext_info->infoLen
@@ -144,6 +146,10 @@ bool AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const No
auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
std::vector<int64_t> tmp_shape;
std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(tmp_shape), SizeToLong);
if (input_index >= input_shape_and_type_.size()) {
MS_LOG(EXCEPTION) << "Invalid input_index: " << input_index
<< " the size of input_shape_and_type_ is: " << input_shape_and_type_.size();
}
return UpdateShapeAndType(tmp_shape, NOT_NULL(input_shape_and_type_[input_index]));
}

@@ -170,12 +176,20 @@ bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const

std::vector<int64_t> tmp_shape;
std::transform(shape.begin(), shape.end(), std::back_inserter(tmp_shape), SizeToLong);
if (output_index >= output_shape_and_type_.size()) {
MS_LOG(EXCEPTION) << "Invalid output_index: " << output_index
<< " the size of output_shape_and_type_ is: " << output_shape_and_type_.size();
}
return UpdateShapeAndType(tmp_shape, NOT_NULL(output_shape_and_type_[output_index]));
}

bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<std::vector<int64_t> *> shape,
NotNull<TypeId *> data_type) {
MS_LOG(INFO) << "Get " << node_name_ << " Output:" << output_index << " Shape And Type";
if (output_index >= output_shape_and_type_.size()) {
MS_LOG(EXCEPTION) << "Invalid output_index: " << output_index
<< " the size of output_shape_and_type_ is: " << output_shape_and_type_.size();
}
GetShapeAndType(NOT_NULL(output_shape_and_type_[output_index]), shape, data_type);
return true;
}
@@ -183,7 +197,7 @@ bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<s
bool AicpuExtInfoHandler::UpdateShapeAndType(const std::vector<int64_t> &shape,
NotNull<AicpuShapeAndType *> shape_and_type) {
if (shape.empty() || shape.size() > kernel::kMaxShapeDims) {
MS_LOG(ERROR) << "Invalid shape:" << shape.size();
MS_LOG(ERROR) << "Invalid shape:" << shape.size() << " Only support 0-8";
return false;
}



+ 8
- 0
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc View File

@@ -64,6 +64,7 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
break;
}
default: {
MS_EXCEPTION_IF_NULL(anf_node);
MS_LOG(EXCEPTION) << "node [" << anf_node->DebugString() << "] Unsupported kernel_type:" << kernel_type;
}
}
@@ -100,6 +101,7 @@ static bool KernelBuildParallelCompile(const std::vector<CNodePtr> &kernels) {
bool tbe_ret = true;
bool akg_ret = true;
auto bin_map = kernel::tbe::KernelMeta::GetInstance();
MS_EXCEPTION_IF_NULL(bin_map);
if (!tbe_nodes.empty()) {
std::string old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
if (!old_build.empty()) {
@@ -171,6 +173,7 @@ static void AddTbeClearZeroNode(mindspore::session::KernelGraph *const kernel_gr
MS_EXCEPTION_IF_NULL(abstract);
clear_zero->set_abstract(abstract);
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
MS_EXCEPTION_IF_NULL(builder);
builder->SetKernelType(KernelType::TBE_KERNEL);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clear_zero.get());
auto clean_size = CalCleanZerosSize(pre_node);
@@ -197,6 +200,7 @@ static void AddFusionTbeClearZeroNode(mindspore::session::KernelGraph *const ker
MS_EXCEPTION_IF_NULL(abstract);
clear_zero->set_abstract(abstract);
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
MS_EXCEPTION_IF_NULL(builder);
builder->SetKernelType(KernelType::TBE_KERNEL);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clear_zero.get());
AnfAlgo::SetNodeAttr(kAttrAtomicAddMemSize, MakeValue(clean_size_list), clear_zero);
@@ -286,17 +290,21 @@ bool KernelBuild(const std::vector<CNodePtr> &kernels) {

std::map<AnfNodePtr, std::vector<size_t>> GetCommunicationOpInputInfo(
const mindspore::session::KernelGraph *kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
std::map<AnfNodePtr, std::vector<size_t>> comm_input_info_map;
for (auto &kernel : kernel_graph->execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
auto input_num = AnfAlgo::GetInputTensorNum(kernel);
if (mindspore::session::AnfRuntimeAlgorithm::IsCommunicationOp(kernel)) {
for (size_t i = 0; i < input_num; i++) {
auto input_node = kernel->input(i + 1);
auto kernel_input = AnfAlgo::VisitKernelWithReturnType(input_node, 0, true);
MS_EXCEPTION_IF_NULL(kernel_input.first);
if (!kernel_input.first->isa<CNode>()) {
continue;
}
auto cnode = kernel_input.first->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (AnfAlgo::IsCommunicationOp(cnode) || AnfAlgo::IsIndependentNode(cnode) ||
AnfAlgo::GetCNodeName(cnode) == kGetNextOpName) {
// no need to add atomic for communication/independent/getnext op 's output


+ 9
- 0
mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc View File

@@ -289,6 +289,7 @@ bool TagRaiseReduce(const std::shared_ptr<kernel::KernelBuildInfo> &kernel_build
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecisionMatchedKernelInfo(
const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list,
bool *precision_reduce) {
MS_EXCEPTION_IF_NULL(precision_reduce);
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_kernel_info_list;
const std::map<TypeId, TypeId> raise_map = {{kNumberTypeFloat16, kNumberTypeFloat32}};
const std::map<TypeId, TypeId> reduce_map = {{kNumberTypeInt64, kNumberTypeInt32},
@@ -350,6 +351,7 @@ void SetCastAndWeightFormat(const CNodePtr &kernel_node) {
auto format = iter->second[next_index];
auto info_builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(kernel_node));
MS_EXCEPTION_IF_NULL(info_builder);
info_builder->SetInputsFormat({format});
info_builder->SetOutputsFormat({format});
AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), kernel_node.get());
@@ -372,12 +374,14 @@ void SetWeightFormat(const AnfNodePtr &real_input_node, std::vector<string> outp
output_format = {AnfAlgo::GetOutputFormat(real_input_node, 0)};
}
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
MS_EXCEPTION_IF_NULL(builder);
// we set special device info of a input tensor.
auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node);
if (op_info != nullptr) {
force_fresh = op_info->is_ref() || force_fresh;
}
auto selected_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(kernel_node);
MS_EXCEPTION_IF_NULL(selected_kernel_info);
if (IsValueNode<tensor::Tensor>(real_input_node) &&
AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown) {
builder->SetOutputsFormat(output_format);
@@ -403,6 +407,7 @@ bool RefreshCastAndParamWeightFormat(const AnfNodePtr &input_node, const string
return false;
}
auto cast_node = input_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cast_node);
if (AnfAlgo::GetCNodeName(cast_node) != prim::kPrimCast->name()) {
return true;
}
@@ -414,6 +419,7 @@ bool RefreshCastAndParamWeightFormat(const AnfNodePtr &input_node, const string
}
auto info_builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(input_node));
MS_EXCEPTION_IF_NULL(info_builder);
info_builder->SetInputsFormat({format});
info_builder->SetOutputsFormat({format});
AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get());
@@ -433,6 +439,7 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) {
auto input_with_index = AnfAlgo::VisitKernelWithReturnType(input_kernel_node, 0);
MS_EXCEPTION_IF_NULL(input_with_index.first);
auto real_input_node = input_with_index.first;
MS_EXCEPTION_IF_NULL(real_input_node);
if (RefreshCastAndParamWeightFormat(real_input_node, selected_kernel_info->GetInputFormat(input_index))) {
continue;
}
@@ -534,6 +541,7 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
}

void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) {
MS_EXCEPTION_IF_NULL(kernel_node);
auto kernel_info = dynamic_cast<device::KernelInfo *>(kernel_node->kernel_info());
MS_EXCEPTION_IF_NULL(kernel_info);
auto kernel_build_info = kernel_info->select_kernel_build_info();
@@ -544,6 +552,7 @@ void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) {
}

auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
MS_EXCEPTION_IF_NULL(builder);
builder->SetOriginDataFormat(kernel_build_info->GetOriginDataFormat());
builder->SetInputsFormat(kernel_build_info->GetAllInputFormats());
builder->SetInputsDeviceType(kernel_build_info->GetAllInputDeviceTypes());


+ 6
- 0
mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc View File

@@ -134,6 +134,7 @@ void ProfilingUtils::GetTraceBegin(const session::KernelGraph &kernel_graph, con
fp_start_str = first_node->fullname_with_scope();
} else {
for (auto &cnode : execution_orders) {
MS_EXCEPTION_IF_NULL(cnode);
if (getnext_outputs.count(cnode->fullname_with_scope()) != 0) {
fp_start_str = cnode->fullname_with_scope();
break;
@@ -149,6 +150,7 @@ void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const
MS_EXCEPTION_IF_NULL(cnode);
for (const auto &input : cnode->inputs()) {
auto prev_cnode = AnfAlgo::VisitKernel(input, 0);
MS_EXCEPTION_IF_NULL(prev_cnode.first);
if (!prev_cnode.first->isa<CNode>()) {
continue;
}
@@ -190,12 +192,14 @@ void ProfilingUtils::GetTraceBpEnd(const session::KernelGraph &kernel_graph, con
for (size_t i = 0; i < input_num; ++i) {
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(*iter, i);
auto input_node = input_node_with_index.first;
MS_EXCEPTION_IF_NULL(input_node);
ar_input_node_names.insert(input_node->fullname_with_scope());
}
// start from previous node
++iter;
// find input names in previous node
while (iter != execution_orders.rend()) {
MS_EXCEPTION_IF_NULL(*iter);
if (ar_input_node_names.find((*iter)->fullname_with_scope()) != ar_input_node_names.end()) {
bp_end_str = (*iter)->fullname_with_scope();
break;
@@ -219,6 +223,7 @@ std::string ProfilingUtils::GetGraphLastKernelName(const session::KernelGraph &k
auto &execution_order = kernel_graph.execution_order();
// find last tbe_kernel
for (auto iter = execution_order.rbegin(); iter != execution_order.rend(); ++iter) {
MS_EXCEPTION_IF_NULL(*iter);
if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL || AnfAlgo::GetKernelType(*iter) == AKG_KERNEL ||
AnfAlgo::IsCommunicationOp(*iter)) {
last_tbe_kernel_name = (*iter)->fullname_with_scope();
@@ -297,6 +302,7 @@ void ProfilingUtils::InsertProfilingTraceFp(const mindspore::AnfNodePtr &anf_nod
const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> graph_ptr,
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
MS_EXCEPTION_IF_NULL(anf_node);
if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) {
MS_LOG(INFO) << "Profiling graph:" << graph_ptr->graph_id()
<< " Match FpStart:" << profiling_trace_info.trace_begin;


+ 1
- 0
mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.cc View File

@@ -55,6 +55,7 @@ void DescReporter::ReportByLine(const std::string &data, const std::string &file

void DescReporter::ReportAllLine() {
for (const auto &desc : prof_desc_list_) {
MS_EXCEPTION_IF_NULL(desc);
auto data = desc->ToString();
ReportByLine(data, file_name_);
}


+ 1
- 1
mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.cc View File

@@ -24,13 +24,13 @@ namespace device {
namespace ascend {
void GraphDescReporter::ReportData() {
for (const auto &node : cnode_list_) {
MS_EXCEPTION_IF_NULL(node);
if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AKG_KERNEL) {
MS_LOG(INFO) << "Skip non tbe kernel:" << node->fullname_with_scope();
continue;
}
std::vector<DataElement> input_data_list;
std::vector<DataElement> output_data_list;
MS_EXCEPTION_IF_NULL(node);
auto op_name = node->fullname_with_scope();
auto op_type = AnfAlgo::GetCNodeName(node);
auto input_size = AnfAlgo::GetInputTensorNum(node);


+ 1
- 1
mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.cc View File

@@ -31,6 +31,7 @@ void TaskDescReporter::ReportData() {

size_t task_index = 0;
for (const auto &node : cnode_list_) {
MS_EXCEPTION_IF_NULL(node);
if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AKG_KERNEL) {
MS_LOG(INFO) << "Skip non tbe kernel:" << node->fullname_with_scope();
++task_index;
@@ -38,7 +39,6 @@ void TaskDescReporter::ReportData() {
}
auto kernel_mod = AnfAlgo::GetKernelMod(node);
auto ascend_kernel_mod = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod);
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(ascend_kernel_mod);
// Check task_id and stream_id valid
CheckStreamTaskValid(task_index, task_index);


+ 19
- 3
mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc View File

@@ -84,6 +84,7 @@ void TaskGenerator::LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, Addre
auto device_address = AnfAlgo::GetOutputAddr(post_node, index);
kernel::AddressPtr input = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(input);
MS_EXCEPTION_IF_NULL(device_address);
input->addr = device_address->ptr_;
input->size = device_address->size_;
kernel_inputs->push_back(input);
@@ -112,6 +113,7 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP
auto device_address = AnfAlgo::GetOutputAddr(pre_node, index);
kernel::AddressPtr input = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(input);
MS_EXCEPTION_IF_NULL(device_address);
input->addr = device_address->ptr_;
MS_EXCEPTION_IF_NULL(input->addr);
input->size = device_address->size_;
@@ -126,6 +128,7 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP
auto device_address = AnfAlgo::GetWorkspaceAddr(pre_node, index);
kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(workspace);
MS_EXCEPTION_IF_NULL(device_address);
workspace->addr = device_address->ptr_;
MS_EXCEPTION_IF_NULL(workspace->addr);
workspace->size = device_address->size_;
@@ -158,6 +161,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, anf_node_ptr)) {
MS_LOG(INFO) << "Skip task generation for NonTask op " << anf_node_ptr->fullname_with_scope();
auto debug_info = std::make_shared<TaskDebugInfo>();
MS_EXCEPTION_IF_NULL(debug_info);
debug_info->op_name_ = anf_node_ptr->fullname_with_scope() + "-NonTask";
debug_info->task_num_ = 0;
task_debug_info_list_.push_back(debug_info);
@@ -180,10 +184,12 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
auto device_address = AnfAlgo::GetPrevNodeOutputAddr(anf_node_ptr, real_input_index);
AddressPtr input = std::make_shared<Address>();
MS_EXCEPTION_IF_NULL(input);
input->addr = device_address->ptr_;
input->size = device_address->size_;

auto prenode_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i);
MS_EXCEPTION_IF_NULL(prenode_with_index.first);
if (AnfAlgo::IsRealCNodeKernel(prenode_with_index.first)) {
if ((AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitOpName ||
AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitVOpName) &&
@@ -192,6 +198,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
// when op A -> split(NonTask) -> op B, op B's input addr is split's input0's addr + offset
// offset is split's output index * split's output size
auto split_input0_device_address = AnfAlgo::GetPrevNodeOutputAddr(prenode_with_index.first, 0);
MS_EXCEPTION_IF_NULL(split_input0_device_address);
input->addr =
static_cast<uint8_t *>(split_input0_device_address->ptr_) + (prenode_with_index.second * input->size);
MS_LOG(INFO) << "Change " << anf_node_ptr->fullname_with_scope() << "'s input " << i << " address to "
@@ -231,6 +238,11 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
ascend_kernel_mod->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
task_info_list->insert(task_info_list->end(), task_info_ptrs.begin(), task_info_ptrs.end());
auto debug_info = std::make_shared<TaskDebugInfo>();
MS_EXCEPTION_IF_NULL(debug_info);
if (task_info_ptrs.empty()) {
MS_LOG(ERROR) << "Empty task_info_ptrs.";
return false;
}
debug_info->op_name_ = anf_node_ptr->fullname_with_scope();
debug_info->task_num_ = task_info_ptrs.size();
debug_info->stream_id_ = task_info_ptrs[0]->stream_id();
@@ -338,6 +350,7 @@ void TaskGenerator::SaveTaskDebugInfoToFile(const std::string &real_filename,

size_t index = 0;
for (auto &task_debug_info : task_debug_info_list) {
MS_EXCEPTION_IF_NULL(task_debug_info);
fout << "op_name:" << task_debug_info->op_name_ << "\n"
<< "task_index:" << index << "\t"
<< "task_num:" << task_debug_info->task_num_ << "\t"
@@ -345,25 +358,28 @@ void TaskGenerator::SaveTaskDebugInfoToFile(const std::string &real_filename,
<< "task0_type:" << task_debug_info->type_ << "\t"
<< "task0_dump_flag:" << task_debug_info->dump_flag_ << "\n";
index++;
if (task_debug_info->input_addrs_.size()) {
if (!task_debug_info->input_addrs_.empty()) {
fout << "input address:";
for (auto &input : task_debug_info->input_addrs_) {
MS_EXCEPTION_IF_NULL(input);
fout << input->addr << "(" << input->size << ")\t";
}
fout << "\n";
}

if (task_debug_info->output_addrs_.size()) {
if (!task_debug_info->output_addrs_.empty()) {
fout << "output address:";
for (auto &output : task_debug_info->output_addrs_) {
MS_EXCEPTION_IF_NULL(output);
fout << output->addr << "(" << output->size << ")\t";
}
fout << "\n";
}

if (task_debug_info->workspace_addrs_.size()) {
if (!task_debug_info->workspace_addrs_.empty()) {
fout << "workspace address:";
for (auto &workspace : task_debug_info->workspace_addrs_) {
MS_EXCEPTION_IF_NULL(workspace);
fout << workspace->addr << "(" << workspace->size << ")\t";
}
fout << "\n";


+ 1
- 0
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc View File

@@ -382,6 +382,7 @@ void CPUKernelRuntime::BindOutputTensorAddressPtr(const VectorRef *outputs) {
void CPUKernelRuntime::BindInputOutput(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs) {
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_EXCEPTION_IF_NULL(outputs);
BindInputTensorAddressPtr(*kernel_graph, inputs);
BindOutputTensorAddressPtr(outputs);
}


+ 2
- 1
mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc View File

@@ -279,7 +279,7 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr,
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (kernel_attr.GetOutputSize() != output_num) {
MS_LOG(DEBUG) << "Output num is not equal!";
MS_LOG(EXCEPTION) << "Output num is not equal!";
continue;
}
int input_dtype_matched_num =
@@ -299,6 +299,7 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr,
}

void SetKernelInfo(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
// Select for dynamic kernel(both the number and data type are undetermined).
const std::string &op_name = AnfAlgo::GetCNodeName(kernel_node);
if (IsDynamicParamKernel(op_name)) {


+ 2
- 1
mindspore/ccsrc/runtime/device/kernel_runtime.cc View File

@@ -1168,7 +1168,8 @@ void KernelRuntime::GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList
const std::shared_ptr<MemScheduler> &mem_scheduler) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(kernel_inputs);
if (cnode->inputs().size() != 2) {
const size_t kNodeInputSize = 2;
if (cnode->inputs().size() != kNodeInputSize) {
MS_LOG(EXCEPTION) << "Atomic Addr clean Node Input nodes not equal 2.";
}
MS_EXCEPTION_IF_NULL(cnode->inputs()[1]);


+ 1
- 0
mindspore/ccsrc/runtime/device/memory_manager.cc View File

@@ -144,6 +144,7 @@ bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t
if (!device_ptr) {
return false;
}
MS_EXCEPTION_IF_NULL(address);
address->ptr_ = device_ptr;
address->size_ = size;
address->from_mem_pool_ = true;


+ 26
- 0
mindspore/core/utils/convert_utils_base.h View File

@@ -169,6 +169,32 @@ inline size_t SizetMulWithOverflowCheck(size_t a, size_t b) {
return out;
}

inline uint32_t Uint32tMulWithOverflowCheck(uint32_t a, uint32_t b) {
uint32_t out = a * b;
if (a != 0) {
if ((out / a) != b) {
MS_LOG(EXCEPTION) << "Mul: a(" << a << ") * b(" << b << ") result is overflow";
}
}
return out;
}

inline size_t SizetAddWithOverflowCheck(size_t x, size_t y) {
size_t sum = x + y;
if (sum < x || sum < y) {
MS_LOG(EXCEPTION) << "Add: a(" << x << ") + b(" << y << ") result is overflow";
}
return sum;
}

inline uint32_t Uint32tAddWithOverflowCheck(uint32_t x, uint32_t y) {
uint32_t sum = x + y;
if (sum < x || sum < y) {
MS_LOG(EXCEPTION) << "Add: a(" << x << ") + b(" << y << ") result is overflow";
}
return sum;
}

inline uint8_t *AddressOffset(void *address, size_t offset) {
MS_EXCEPTION_IF_NULL(address);
return static_cast<uint8_t *>(address) + offset;


Loading…
Cancel
Save