diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc b/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc index 1f88bbb89a..6bd1e7747c 100644 --- a/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc @@ -618,6 +618,5 @@ size_t AkgKernelBuild::GetOutputTensorIdxInc() { size_t idx = output_tensor_idx_++; return idx; } - } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_build.h b/mindspore/ccsrc/kernel/akg/akg_kernel_build.h index d32bd48ce6..15fa03f45b 100644 --- a/mindspore/ccsrc/kernel/akg/akg_kernel_build.h +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_build.h @@ -70,7 +70,6 @@ void SetTensorName(const std::string &tag, const std::string &new_name, const st nlohmann::json *const node_json); std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, const std::pair &position); - } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc index 454b8052ab..7200a91ac0 100644 --- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc @@ -35,7 +35,6 @@ namespace mindspore { namespace kernel { - constexpr int32_t PARALLEL_ARGS_SIZE = 3; constexpr int32_t PROCESS_NUM = 16; constexpr int32_t TIME_OUT = 300; @@ -48,76 +47,9 @@ constexpr auto kOutputDesc = "output_desc"; constexpr auto kTensorName = "tensor_name"; constexpr auto kCompileAkgKernelParallelFunc = "compile_akg_kernel_parallel"; constexpr auto kMultiProcModule = "mindspore._extends.parallel_compile.akg_compiler.multi_process_compiler"; - -bool AkgAscendKernelBuilder::CollectJson(const AnfNodePtr &anf_node) { - MS_EXCEPTION_IF_NULL(anf_node); - std::string op_name = AnfAlgo::GetCNodeName(anf_node); - MS_LOG(INFO) << "AKG start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; - auto it = kAkgKernelAttrsProcessMap.find(op_name); - if (it != kAkgKernelAttrsProcessMap.end()) { - it->second(anf_node); - } - MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; - nlohmann::json node_json; - if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { - MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed."; - } - - kernel_json_ = node_json.dump(); - - if (!GetIOSize(node_json, &input_size_list_, &output_size_list_)) { - MS_LOG(ERROR) << "Cal mem size failed."; - return false; - } - - return true; -} - -bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf_nodes, - const std::vector &input_list, - const std::vector &output_list) { - if (anf_nodes.empty() || input_list.empty()) { - MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size() - << "]."; - return false; - } - MS_LOG(INFO) << "anf_nodes [" << output_list.size() << "], input_list [" << anf_nodes.size() << "], output_list [" - << input_list.size() << "]."; - - std::map node_json_map; - - for (auto const &anf_node : anf_nodes) { - MS_EXCEPTION_IF_NULL(anf_node); - std::string op_name = AnfAlgo::GetCNodeName(anf_node); - if (!AnfAlgo::IsRealKernel(anf_node)) { - MS_LOG(ERROR) << "Invalid anf node to build [" << anf_node->fullname_with_scope() << "]."; - return false; - } - auto it = kAkgKernelAttrsProcessMap.find(op_name); - if (it != kAkgKernelAttrsProcessMap.end()) { - it->second(anf_node); - } - - nlohmann::json node_json; - if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { - MS_LOG(ERROR) << "Op [" << op_name << "] create single kernel json failed."; - return false; - } - // No need for composite op. - node_json.erase("id"); - node_json.erase("op"); - node_json.erase("composite"); - - auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); - MS_EXCEPTION_IF_NULL(primitive); - - if (primitive->GetAttr("fusion") != nullptr) { - node_json["fusion"] = primitive->GetAttr("fusion")->ToString(); - } - - node_json_map[anf_node] = node_json; - } - +namespace { +void UpdateTensorNameInJson(const std::vector &anf_nodes, + std::map *node_json_map) { for (auto const &anf_node : anf_nodes) { std::vector dyn_input_sizes; auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); @@ -134,11 +66,11 @@ bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf size_t input_tensor_num = is_dynamic_input ? IntToSize(dyn_input_sizes[i]) : 1; for (size_t j = 0; j < input_tensor_num; ++j) { auto tmp_input = GetKernelInput(anf_node, real_input_index); - std::string tensor_name = GetTensorName(node_json_map[anf_node], kInputDesc, std::make_pair(i, j)); - if (node_json_map.find(tmp_input.first) != node_json_map.end()) { + std::string tensor_name = GetTensorName((*node_json_map)[anf_node], kInputDesc, std::make_pair(i, j)); + if (node_json_map->find(tmp_input.first) != node_json_map->end()) { std::string new_tensor_name = - GetTensorName(node_json_map[tmp_input.first], kOutputDesc, std::make_pair(0, tmp_input.second)); - SetTensorName(kInputDesc, new_tensor_name, std::make_pair(i, j), &(node_json_map[anf_node])); + GetTensorName((*node_json_map)[tmp_input.first], kOutputDesc, std::make_pair(0, tmp_input.second)); + SetTensorName(kInputDesc, new_tensor_name, std::make_pair(i, j), &((*node_json_map)[anf_node])); MS_LOG(DEBUG) << "Update [" << real_input_index << "] input [" << tensor_name << "] of [" << anf_node->fullname_with_scope() << "] to [" << tmp_input.second << "] output [" << new_tensor_name << "] of [" << tmp_input.first->fullname_with_scope() << "]."; @@ -150,13 +82,10 @@ bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf } } } +} - nlohmann::json fused_node_json; - std::vector node_json_desc; - std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc), - [&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; }); - fused_node_json[kOpDesc] = node_json_desc; - +nlohmann::json GetInputsJson(const std::vector &anf_nodes, const std::vector &input_list, + std::map *node_json_map) { nlohmann::json inputs_json; auto input_index = GetInputIndex(anf_nodes, input_list); for (size_t i = 0; i < input_index.size(); ++i) { @@ -164,13 +93,18 @@ bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf auto type_id = AnfAlgo::GetInputDeviceDataType(tmp_input.first, tmp_input.second.first); std::string dtype = TypeId2String(type_id); nlohmann::json input_desc_json; - input_desc_json[kTensorName] = GetTensorName(node_json_map[tmp_input.first], kInputDesc, tmp_input.second); + input_desc_json[kTensorName] = GetTensorName((*node_json_map)[tmp_input.first], kInputDesc, tmp_input.second); input_desc_json[kDataType] = dtype; input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(tmp_input.first, tmp_input.second.first); inputs_json.emplace_back(std::vector{input_desc_json}); } - fused_node_json[kInputDesc] = inputs_json; + return inputs_json; +} + +nlohmann::json GetOutputsJson(const std::vector &anf_nodes, const std::vector &input_list, + const std::vector &output_list, const nlohmann::json &inputs_json, + std::map *node_json_map) { nlohmann::json outputs_json; auto output_index = GetOutputIndex(anf_nodes, input_list, output_list); for (size_t i = 0; i < output_index.size(); ++i) { @@ -188,7 +122,7 @@ bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf auto type_id = AnfAlgo::GetOutputDeviceDataType(tmp_output.first, tmp_output.second); std::string dtype = TypeId2String(type_id); output_desc_json[kTensorName] = - GetTensorName(node_json_map[tmp_output.first], kOutputDesc, std::make_pair(0, tmp_output.second)); + GetTensorName((*node_json_map)[tmp_output.first], kOutputDesc, std::make_pair(0, tmp_output.second)); output_desc_json[kDataType] = dtype; auto output_shape = AnfAlgo::GetOutputDeviceShape(tmp_output.first, tmp_output.second); if (output_shape.empty()) { @@ -198,7 +132,166 @@ bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf } outputs_json.emplace_back(output_desc_json); } - fused_node_json[kOutputDesc] = outputs_json; + + return outputs_json; +} + +std::pair, std::vector>> PreProcessJsonForBuild( + const std::vector> &build_args) { + // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess. + std::vector jsons; + std::vector> repeat_nodes; + std::unordered_set json_name_set; + for (const auto &[builder, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto json_name = builder.json_name(); + MS_LOG(DEBUG) << "Akg start compile op: " << json_name; + auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (cached_kernel_pack != nullptr) { + MS_LOG(DEBUG) << "Use cached kernel, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + auto kernel_mod_ptr = std::make_shared(cached_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + continue; + } + + if (json_name_set.count(json_name) != 0) { + repeat_nodes.push_back({builder, anf_node}); + continue; + } + json_name_set.insert(json_name); + auto node_json = builder.kernel_json(); + kernel::SaveJsonInfo(json_name, node_json); + jsons.push_back(node_json); + } + + return std::make_pair(jsons, repeat_nodes); +} + +bool PostProcessAfterCompile(const std::vector> &build_args, + const std::vector> &repeat_nodes) { + for (const auto &[builder, anf_node] : build_args) { + auto json_name = builder.json_name(); + auto new_kernel_pack = tbe::TbeUtils::InsertCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (new_kernel_pack == nullptr) { + MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + return false; + } + auto kernel_mod_ptr = std::make_shared(new_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + MS_LOG(DEBUG) << "Akg compile " << json_name << " kernel and insert cache successfully!"; + } + + for (const auto &[builder, anf_node] : repeat_nodes) { + auto node_json = builder.kernel_json(); + auto json_name = builder.json_name(); + auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (cached_kernel_pack == nullptr) { + return false; + } + MS_LOG(INFO) << "Use just compiled kernel, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + auto kernel_mod_ptr = std::make_shared(cached_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + } + + return true; +} +} // namespace + +bool AkgAscendKernelBuilder::CollectJson(const AnfNodePtr &anf_node) { + MS_EXCEPTION_IF_NULL(anf_node); + std::string op_name = AnfAlgo::GetCNodeName(anf_node); + MS_LOG(INFO) << "AKG start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; + auto it = kAkgKernelAttrsProcessMap.find(op_name); + if (it != kAkgKernelAttrsProcessMap.end()) { + it->second(anf_node); + } + MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; + nlohmann::json node_json; + if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { + MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed."; + } + + kernel_json_ = node_json.dump(); + + if (!GetIOSize(node_json, &input_size_list_, &output_size_list_)) { + MS_LOG(ERROR) << "Cal mem size failed."; + return false; + } + + return true; +} + +bool AkgAscendKernelBuilder::GenJsonAndPreprocess4Fused(const std::vector &anf_nodes, + std::map *node_json_map) { + for (auto const &anf_node : anf_nodes) { + MS_EXCEPTION_IF_NULL(anf_node); + std::string op_name = AnfAlgo::GetCNodeName(anf_node); + if (!AnfAlgo::IsRealKernel(anf_node)) { + MS_LOG(ERROR) << "Invalid anf node to build [" << anf_node->fullname_with_scope() << "]."; + return false; + } + auto it = kAkgKernelAttrsProcessMap.find(op_name); + if (it != kAkgKernelAttrsProcessMap.end()) { + it->second(anf_node); + } + + nlohmann::json node_json; + if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { + MS_LOG(ERROR) << "Op [" << op_name << "] create single kernel json failed."; + return false; + } + // No need for composite op. + node_json.erase("id"); + node_json.erase("op"); + node_json.erase("composite"); + + auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(primitive); + + if (primitive->GetAttr("fusion") != nullptr) { + node_json["fusion"] = primitive->GetAttr("fusion")->ToString(); + } + + (*node_json_map)[anf_node] = node_json; + } + return true; +} + +bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf_nodes, + const std::vector &input_list, + const std::vector &output_list) { + if (anf_nodes.empty() || input_list.empty()) { + MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size() + << "]."; + return false; + } + MS_LOG(INFO) << "anf_nodes [" << output_list.size() << "], input_list [" << anf_nodes.size() << "], output_list [" + << input_list.size() << "]."; + + std::map node_json_map; + if (!GenJsonAndPreprocess4Fused(anf_nodes, &node_json_map)) { + return false; + } + + UpdateTensorNameInJson(anf_nodes, &node_json_map); + + nlohmann::json fused_node_json; + std::vector node_json_desc; + std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc), + [&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; }); + fused_node_json[kOpDesc] = node_json_desc; + fused_node_json[kInputDesc] = GetInputsJson(anf_nodes, input_list, &node_json_map); + fused_node_json[kOutputDesc] = + GetOutputsJson(anf_nodes, input_list, output_list, fused_node_json[kInputDesc], &node_json_map); size_t hash_id = std::hash()(fused_node_json.dump()); json_name_ = "Fused_"; @@ -243,36 +336,7 @@ void GenParallelCompileFuncArgs(const std::vector &kernel_jsons, Py } bool AkgOpParallelBuild(const std::vector> &build_args) { - // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess. - std::vector jsons; - std::unordered_set json_name_set; - std::vector> repeat_nodes; - for (const auto &[builder, anf_node] : build_args) { - MS_EXCEPTION_IF_NULL(anf_node); - auto json_name = builder.json_name(); - MS_LOG(DEBUG) << "Akg start compile op: " << json_name; - auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); - if (cached_kernel_pack != nullptr) { - MS_LOG(DEBUG) << "Use cached kernel, json_name_[" << json_name << "], fullname_with_scope[" - << anf_node->fullname_with_scope() << "]."; - auto kernel_mod_ptr = std::make_shared(cached_kernel_pack); - kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); - kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); - AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); - continue; - } - - if (json_name_set.count(json_name) != 0) { - repeat_nodes.push_back({builder, anf_node}); - continue; - } - json_name_set.insert(json_name); - auto node_json = builder.kernel_json(); - kernel::SaveJsonInfo(json_name, node_json); - jsons.push_back(node_json); - } - - // No nodes need to be compiled! + auto [jsons, repeat_nodes] = PreProcessJsonForBuild(build_args); if (jsons.empty()) { return true; } @@ -307,34 +371,8 @@ bool AkgOpParallelBuild(const std::vectorfullname_with_scope() << "]."; - return false; - } - auto kernel_mod_ptr = std::make_shared(new_kernel_pack); - kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); - kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); - AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); - MS_LOG(DEBUG) << "Akg compile " << json_name << " kernel and insert cache successfully!"; - } - - // Handle repeated nodes. - for (const auto &[builder, anf_node] : repeat_nodes) { - auto node_json = builder.kernel_json(); - auto json_name = builder.json_name(); - auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); - if (cached_kernel_pack == nullptr) return false; - MS_LOG(INFO) << "Use just compiled kernel, json_name_[" << json_name << "], fullname_with_scope[" - << anf_node->fullname_with_scope() << "]."; - auto kernel_mod_ptr = std::make_shared(cached_kernel_pack); - kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); - kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); - AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + if (!PostProcessAfterCompile(build_args, repeat_nodes)) { + return false; } return true; @@ -380,6 +418,5 @@ bool AkgAscendKernelParallelBuild(const std::vector &anf_nodes) { return AkgOpParallelBuild(json_and_node); } - } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h index 619b583fde..01752911ed 100644 --- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "ir/anf.h" #include "kernel/kernel.h" #include "kernel/akg/akg_kernel_build.h" @@ -40,6 +41,9 @@ class AkgAscendKernelBuilder : public AkgKernelBuild { const std::vector &output_size_list() const { return output_size_list_; } private: + bool GenJsonAndPreprocess4Fused(const std::vector &anf_nodes, + std::map *node_json_map); + std::string kernel_json_; std::vector input_size_list_; std::vector output_size_list_; diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc index bad6de64aa..69fc82aad3 100644 --- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc @@ -55,7 +55,7 @@ const std::vector &AkgKernelMod::GetWorkspaceSizeList() const { return w bool AkgKernelMod::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) { - if (stream_ptr == 0) { + if (stream_ptr == nullptr) { MS_LOG(ERROR) << "stream_ptr should not be nullptr."; return false; }