From: @linqingke Reviewed-by: Signed-off-by:tags/v1.2.0-rc1
| @@ -366,7 +366,7 @@ class TbeProcess: | |||
| return ret | |||
| if self.__running_tune_tasks: | |||
| query_count = 0 | |||
| total_query_count = len(self.__running_tune_tasks) * 2 * 10 | |||
| total_query_count = len(self.__running_tune_tasks) * 2 * 60 | |||
| while query_count < total_query_count: | |||
| ret = self.__tuner.get_finish_tasks() | |||
| if not ret: | |||
| @@ -53,6 +53,7 @@ static size_t GenFusionJsonHash(const nlohmann::json &fusion_json) { | |||
| std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) { | |||
| std::map<int64_t, KernelModPtr> kernel_mod_ret; | |||
| static std::set<std::string> processed_fusion_kernel; | |||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | |||
| MS_EXCEPTION_IF_NULL(build_manger); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| @@ -99,6 +100,13 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||
| continue; | |||
| } | |||
| } | |||
| // same op not need build, but need wait build finish to set kernel mode | |||
| if (processed_fusion_kernel.find(json_name) != processed_fusion_kernel.end()) { | |||
| build_manger->SaveSameFusionOpInfo(fusion_scope_iter.scope_id, json_name, tbe::kProcessorAiCore, input_size_list, | |||
| output_size_list); | |||
| continue; | |||
| } | |||
| (void)processed_fusion_kernel.insert(json_name); | |||
| // generate soc info json | |||
| nlohmann::json soc_info_json; | |||
| TbeUtils::GenSocInfo(&soc_info_json); | |||
| @@ -138,6 +146,11 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||
| (void)kernel_mod_ret.emplace(kernel_mod_item); | |||
| } | |||
| } | |||
| bool ret = build_manger->GenSameFusionOpKernelMod(&kernel_mod_ret); | |||
| if (!ret) { | |||
| MS_LOG(INFO) << "Fusion warning: Fuison op has cache failed."; | |||
| } | |||
| MS_LOG(INFO) << "Build Fusion Kernel Failed Num: " << build_failed_num; | |||
| return kernel_mod_ret; | |||
| } | |||
| @@ -150,12 +150,18 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||
| static int32_t dynamic_index = 0; | |||
| op_info_json[kJDynamicIndex] = dynamic_index++; | |||
| } | |||
| std::string json_str = op_info_json.dump(); | |||
| size_t hash_id = std::hash<std::string>()(json_str); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||
| auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | |||
| // generate soc info json | |||
| nlohmann::json soc_info_json; | |||
| TbeUtils::GenSocInfo(&soc_info_json); | |||
| soc_info_json[kAutoTilingMode] = tune_mode; | |||
| std::string json_str = op_info_json.dump() + soc_info_json.dump(); | |||
| size_t hash_id = std::hash<std::string>()(json_str); | |||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||
| op_info_json[kJFullName] = anf_node->fullname_with_scope(); | |||
| json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); | |||
| json_info_ = json_str; | |||
| @@ -175,10 +181,7 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||
| op_info_json[kJAttrDesc] = attr_desc; | |||
| } | |||
| // generate soc info json | |||
| nlohmann::json soc_info_json; | |||
| TbeUtils::GenSocInfo(&soc_info_json); | |||
| soc_info_json[kAutoTilingMode] = tune_mode; | |||
| // merge json | |||
| soc_info_json[kJSocVersion] = soc_version; | |||
| (*kernel_json)[kJSocInfo] = soc_info_json; | |||
| (*kernel_json)[kJOpInfo] = op_info_json; | |||
| @@ -36,7 +36,7 @@ using mindspore::kernel::tbe::TbeUtils; | |||
| bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | |||
| MS_EXCEPTION_IF_NULL(build_manger); | |||
| set<std::string> processed_kernel; | |||
| static set<std::string> processed_kernel; | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | |||
| @@ -201,6 +201,19 @@ void ParallelBuildManager::SaveSameOpInfo(const mindspore::AnfNodePtr &anf_node, | |||
| same_op_list_.push_back(task_info); | |||
| } | |||
| void ParallelBuildManager::SaveSameFusionOpInfo(const int64_t scope_id, const std::string &json_name, | |||
| const std::string &processor, | |||
| const std::vector<size_t> &input_size_list, | |||
| const std::vector<size_t> &output_size_list) { | |||
| struct KernelBuildTaskInfo task_info; | |||
| task_info.scope_id = scope_id; | |||
| task_info.json_name = json_name; | |||
| task_info.processor = processor; | |||
| task_info.input_size_list.assign(input_size_list.begin(), input_size_list.end()); | |||
| task_info.output_size_list.assign(output_size_list.begin(), output_size_list.end()); | |||
| same_op_list_.push_back(task_info); | |||
| } | |||
| bool ParallelBuildManager::GenSameOpKernelMod() const { | |||
| for (const auto &task_info : same_op_list_) { | |||
| bool ret = SearchInCache(task_info.json_name, task_info.processor, task_info.input_size_list, | |||
| @@ -213,6 +226,24 @@ bool ParallelBuildManager::GenSameOpKernelMod() const { | |||
| return true; | |||
| } | |||
| bool ParallelBuildManager::GenSameFusionOpKernelMod(std::map<int64_t, KernelModPtr> *kernel_mode_ret) const { | |||
| bool ret = true; | |||
| for (const auto &task_info : same_op_list_) { | |||
| auto kernel_pack = TbeUtils::SearchCache(task_info.json_name, tbe::kProcessorAiCore); | |||
| if (kernel_pack != nullptr) { | |||
| auto kernel_mode = GenKernelMod(task_info.json_name, tbe::kProcessorAiCore, task_info.input_size_list, | |||
| task_info.output_size_list, kernel_pack); | |||
| if (kernel_mode != nullptr) { | |||
| (*kernel_mode_ret)[task_info.scope_id] = kernel_mode; | |||
| continue; | |||
| } | |||
| } | |||
| MS_LOG(INFO) << "can't find " << task_info.json_name << " in cache."; | |||
| ret = false; | |||
| } | |||
| return ret; | |||
| } | |||
| bool ParallelBuildManager::SearchInCache(const std::string &json_name, const std::string &processor, | |||
| const std::vector<size_t> &input_size_list, | |||
| const std::vector<size_t> &output_size_list, mindspore::AnfNode *node) const { | |||
| @@ -36,7 +36,7 @@ struct KernelBuildTaskInfo { | |||
| std::string json_name; | |||
| std::vector<size_t> input_size_list; | |||
| std::vector<size_t> output_size_list; | |||
| int32_t scope_id; | |||
| int64_t scope_id; | |||
| }; | |||
| class ParallelBuildManager { | |||
| @@ -48,7 +48,10 @@ class ParallelBuildManager { | |||
| int32_t scope_id = 0); | |||
| void SaveSameOpInfo(const AnfNodePtr &anf_node, const std::string &json_name, | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list); | |||
| void SaveSameFusionOpInfo(const int64_t scope_id, const std::string &json_name, const std::string &processor, | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list); | |||
| bool GenSameOpKernelMod() const; | |||
| bool GenSameFusionOpKernelMod(std::map<int64_t, KernelModPtr> *kernel_mode_ret) const; | |||
| bool SearchInCache(const std::string &json_name, const std::string &processor, | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | |||
| AnfNode *node) const; | |||