| @@ -42,14 +42,14 @@ void DescReporter::ReportByLine(const std::string &data, const std::string &file | |||||
| report_data.data = (unsigned char *)data.c_str() + cur_size; | report_data.data = (unsigned char *)data.c_str() + cur_size; | ||||
| auto ret = memcpy_s(report_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, file_name.c_str(), file_name.length()); | auto ret = memcpy_s(report_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, file_name.c_str(), file_name.length()); | ||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(EXCEPTION) << "memcpy_s report data tag failed"; | |||||
| MS_LOG(EXCEPTION) << "Memcpy_s report data tag failed"; | |||||
| } | } | ||||
| auto report_ret = reporter->Report(&report_data); | auto report_ret = reporter->Report(&report_data); | ||||
| if (report_ret != 0) { | if (report_ret != 0) { | ||||
| MS_LOG(EXCEPTION) << "report data failed"; | |||||
| MS_LOG(EXCEPTION) << "Report data failed"; | |||||
| } | } | ||||
| if (report_size == 0) { | if (report_size == 0) { | ||||
| MS_LOG(WARNING) << "report_size is 0"; | |||||
| MS_LOG(WARNING) << "Report_size is 0"; | |||||
| break; | break; | ||||
| } | } | ||||
| cur_size += report_size; | cur_size += report_size; | ||||
| @@ -30,6 +30,7 @@ void GraphDescReporter::ReportData() { | |||||
| } | } | ||||
| std::vector<DataElement> input_data_list; | std::vector<DataElement> input_data_list; | ||||
| std::vector<DataElement> output_data_list; | std::vector<DataElement> output_data_list; | ||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| auto op_name = node->fullname_with_scope(); | auto op_name = node->fullname_with_scope(); | ||||
| auto op_type = AnfAlgo::GetCNodeName(node); | auto op_type = AnfAlgo::GetCNodeName(node); | ||||
| auto input_size = AnfAlgo::GetInputTensorNum(node); | auto input_size = AnfAlgo::GetInputTensorNum(node); | ||||
| @@ -129,7 +129,7 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu | |||||
| const std::vector<AddressPtr> &workspace, | const std::vector<AddressPtr> &workspace, | ||||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) { | const std::vector<AddressPtr> &outputs, uint32_t stream_id) { | ||||
| if (inputs.empty() || outputs.empty()) { | if (inputs.empty() || outputs.empty()) { | ||||
| MS_LOG(EXCEPTION) << "inputs or outputs is empty"; | |||||
| MS_LOG(EXCEPTION) << "Inputs or outputs is empty"; | |||||
| } | } | ||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| std::string hccl_type = AnfAlgo::GetCNodeName(anf_node_); | std::string hccl_type = AnfAlgo::GetCNodeName(anf_node_); | ||||
| @@ -32,7 +32,12 @@ bool HcomAllBroadCastKernel::Launch(const std::vector<AddressPtr> &inputs, | |||||
| if (context_ptr->enable_task_sink()) { | if (context_ptr->enable_task_sink()) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| if (inputs.empty() || hccl_data_type_list_.empty()) { | |||||
| MS_LOG(ERROR) << "BroadCast param is empty"; | |||||
| return false; | |||||
| } | |||||
| const char *tag = "Hccl-BroadCast"; | const char *tag = "Hccl-BroadCast"; | ||||
| MS_EXCEPTION_IF_NULL(inputs[0]); | |||||
| hcclResult_t ret = | hcclResult_t ret = | ||||
| hcom_broadcast(tag, inputs[0]->addr, hccl_count_, hccl_data_type_list_[0], root_id_, nullptr, stream_ptr); | hcom_broadcast(tag, inputs[0]->addr, hccl_count_, hccl_data_type_list_[0], root_id_, nullptr, stream_ptr); | ||||
| if (ret != HCCL_SUCCESS) { | if (ret != HCCL_SUCCESS) { | ||||
| @@ -31,6 +31,10 @@ bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> &inputs, const st | |||||
| if (context_ptr->enable_task_sink()) { | if (context_ptr->enable_task_sink()) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| if (inputs.empty() || hccl_data_type_list_.empty()) { | |||||
| MS_LOG(ERROR) << "AllGather param is empty"; | |||||
| return false; | |||||
| } | |||||
| const char *tag = "Hccl-AllGather"; | const char *tag = "Hccl-AllGather"; | ||||
| hcclResult_t ret = | hcclResult_t ret = | ||||
| hcom_all_gather(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], nullptr, stream_ptr); | hcom_all_gather(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], nullptr, stream_ptr); | ||||
| @@ -31,6 +31,10 @@ bool HcomAllReduceKernel::Launch(const std::vector<AddressPtr> &inputs, const st | |||||
| if (context_ptr->enable_task_sink()) { | if (context_ptr->enable_task_sink()) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) { | |||||
| MS_LOG(ERROR) << "AllReduce param is empty"; | |||||
| return false; | |||||
| } | |||||
| const char *tag = "Hccl-AllReduce"; | const char *tag = "Hccl-AllReduce"; | ||||
| hcclResult_t ret = hcom_all_reduce(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], | hcclResult_t ret = hcom_all_reduce(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], | ||||
| op_type_, nullptr, stream_ptr); | op_type_, nullptr, stream_ptr); | ||||
| @@ -32,6 +32,10 @@ bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> &inputs, | |||||
| if (context_ptr->enable_task_sink()) { | if (context_ptr->enable_task_sink()) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) { | |||||
| MS_LOG(ERROR) << "ReduceScatter param is empty"; | |||||
| return false; | |||||
| } | |||||
| const char *tag = "Hccl-ReduceScatter"; | const char *tag = "Hccl-ReduceScatter"; | ||||
| hcclResult_t ret = hcom_reduce_scatter(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], | hcclResult_t ret = hcom_reduce_scatter(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], | ||||
| op_type_, nullptr, stream_ptr); | op_type_, nullptr, stream_ptr); | ||||
| @@ -66,6 +66,7 @@ bool HcomUtil::GetHcomDataType(const AnfNodePtr &anf_node, vector<hcclDataType_t | |||||
| } | } | ||||
| bool HcomUtil::GetHcclOpSize(const hcclDataType_t &data_type, const vector<size_t> &shape, size_t *size) { | bool HcomUtil::GetHcclOpSize(const hcclDataType_t &data_type, const vector<size_t> &shape, size_t *size) { | ||||
| MS_EXCEPTION_IF_NULL(size); | |||||
| int tmp_size = 1; | int tmp_size = 1; | ||||
| uint32_t type_size = 4; | uint32_t type_size = 4; | ||||
| for (size_t i = 0; i < shape.size(); i++) { | for (size_t i = 0; i < shape.size(); i++) { | ||||
| @@ -84,6 +85,7 @@ bool HcomUtil::GetHcclOpSize(const hcclDataType_t &data_type, const vector<size_ | |||||
| } | } | ||||
| bool HcomUtil::GetHcomTypeSize(const hcclDataType_t &data_type, uint32_t *size) { | bool HcomUtil::GetHcomTypeSize(const hcclDataType_t &data_type, uint32_t *size) { | ||||
| MS_EXCEPTION_IF_NULL(size); | |||||
| auto iter = CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.find(data_type); | auto iter = CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.find(data_type); | ||||
| if (iter == CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.end()) { | if (iter == CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.end()) { | ||||
| MS_LOG(ERROR) << "HcomUtil::HcomDataTypeSize, No DataTypeSize!"; | MS_LOG(ERROR) << "HcomUtil::HcomDataTypeSize, No DataTypeSize!"; | ||||