From: @linqingke Reviewed-by: Signed-off-by:tags/v1.2.0-rc1
| @@ -366,7 +366,7 @@ class TbeProcess: | |||||
| return ret | return ret | ||||
| if self.__running_tune_tasks: | if self.__running_tune_tasks: | ||||
| query_count = 0 | query_count = 0 | ||||
| total_query_count = len(self.__running_tune_tasks) * 2 * 10 | |||||
| total_query_count = len(self.__running_tune_tasks) * 2 * 60 | |||||
| while query_count < total_query_count: | while query_count < total_query_count: | ||||
| ret = self.__tuner.get_finish_tasks() | ret = self.__tuner.get_finish_tasks() | ||||
| if not ret: | if not ret: | ||||
| @@ -53,6 +53,7 @@ static size_t GenFusionJsonHash(const nlohmann::json &fusion_json) { | |||||
| std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) { | std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) { | ||||
| std::map<int64_t, KernelModPtr> kernel_mod_ret; | std::map<int64_t, KernelModPtr> kernel_mod_ret; | ||||
| static std::set<std::string> processed_fusion_kernel; | |||||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | auto build_manger = std::make_shared<ParallelBuildManager>(); | ||||
| MS_EXCEPTION_IF_NULL(build_manger); | MS_EXCEPTION_IF_NULL(build_manger); | ||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| @@ -99,6 +100,13 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| continue; | continue; | ||||
| } | } | ||||
| } | } | ||||
| // same op not need build, but need wait build finish to set kernel mode | |||||
| if (processed_fusion_kernel.find(json_name) != processed_fusion_kernel.end()) { | |||||
| build_manger->SaveSameFusionOpInfo(fusion_scope_iter.scope_id, json_name, tbe::kProcessorAiCore, input_size_list, | |||||
| output_size_list); | |||||
| continue; | |||||
| } | |||||
| (void)processed_fusion_kernel.insert(json_name); | |||||
| // generate soc info json | // generate soc info json | ||||
| nlohmann::json soc_info_json; | nlohmann::json soc_info_json; | ||||
| TbeUtils::GenSocInfo(&soc_info_json); | TbeUtils::GenSocInfo(&soc_info_json); | ||||
| @@ -138,6 +146,11 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| (void)kernel_mod_ret.emplace(kernel_mod_item); | (void)kernel_mod_ret.emplace(kernel_mod_item); | ||||
| } | } | ||||
| } | } | ||||
| bool ret = build_manger->GenSameFusionOpKernelMod(&kernel_mod_ret); | |||||
| if (!ret) { | |||||
| MS_LOG(INFO) << "Fusion warning: Fuison op has cache failed."; | |||||
| } | |||||
| MS_LOG(INFO) << "Build Fusion Kernel Failed Num: " << build_failed_num; | MS_LOG(INFO) << "Build Fusion Kernel Failed Num: " << build_failed_num; | ||||
| return kernel_mod_ret; | return kernel_mod_ret; | ||||
| } | } | ||||
| @@ -150,12 +150,18 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||||
| static int32_t dynamic_index = 0; | static int32_t dynamic_index = 0; | ||||
| op_info_json[kJDynamicIndex] = dynamic_index++; | op_info_json[kJDynamicIndex] = dynamic_index++; | ||||
| } | } | ||||
| std::string json_str = op_info_json.dump(); | |||||
| size_t hash_id = std::hash<std::string>()(json_str); | |||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||||
| auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | ||||
| // generate soc info json | |||||
| nlohmann::json soc_info_json; | |||||
| TbeUtils::GenSocInfo(&soc_info_json); | |||||
| soc_info_json[kAutoTilingMode] = tune_mode; | |||||
| std::string json_str = op_info_json.dump() + soc_info_json.dump(); | |||||
| size_t hash_id = std::hash<std::string>()(json_str); | |||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||||
| op_info_json[kJFullName] = anf_node->fullname_with_scope(); | op_info_json[kJFullName] = anf_node->fullname_with_scope(); | ||||
| json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); | json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); | ||||
| json_info_ = json_str; | json_info_ = json_str; | ||||
| @@ -175,10 +181,7 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||||
| op_info_json[kJAttrDesc] = attr_desc; | op_info_json[kJAttrDesc] = attr_desc; | ||||
| } | } | ||||
| // generate soc info json | |||||
| nlohmann::json soc_info_json; | |||||
| TbeUtils::GenSocInfo(&soc_info_json); | |||||
| soc_info_json[kAutoTilingMode] = tune_mode; | |||||
| // merge json | |||||
| soc_info_json[kJSocVersion] = soc_version; | soc_info_json[kJSocVersion] = soc_version; | ||||
| (*kernel_json)[kJSocInfo] = soc_info_json; | (*kernel_json)[kJSocInfo] = soc_info_json; | ||||
| (*kernel_json)[kJOpInfo] = op_info_json; | (*kernel_json)[kJOpInfo] = op_info_json; | ||||
| @@ -36,7 +36,7 @@ using mindspore::kernel::tbe::TbeUtils; | |||||
| bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | ||||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | auto build_manger = std::make_shared<ParallelBuildManager>(); | ||||
| MS_EXCEPTION_IF_NULL(build_manger); | MS_EXCEPTION_IF_NULL(build_manger); | ||||
| set<std::string> processed_kernel; | |||||
| static set<std::string> processed_kernel; | |||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | ||||
| @@ -201,6 +201,19 @@ void ParallelBuildManager::SaveSameOpInfo(const mindspore::AnfNodePtr &anf_node, | |||||
| same_op_list_.push_back(task_info); | same_op_list_.push_back(task_info); | ||||
| } | } | ||||
| void ParallelBuildManager::SaveSameFusionOpInfo(const int64_t scope_id, const std::string &json_name, | |||||
| const std::string &processor, | |||||
| const std::vector<size_t> &input_size_list, | |||||
| const std::vector<size_t> &output_size_list) { | |||||
| struct KernelBuildTaskInfo task_info; | |||||
| task_info.scope_id = scope_id; | |||||
| task_info.json_name = json_name; | |||||
| task_info.processor = processor; | |||||
| task_info.input_size_list.assign(input_size_list.begin(), input_size_list.end()); | |||||
| task_info.output_size_list.assign(output_size_list.begin(), output_size_list.end()); | |||||
| same_op_list_.push_back(task_info); | |||||
| } | |||||
| bool ParallelBuildManager::GenSameOpKernelMod() const { | bool ParallelBuildManager::GenSameOpKernelMod() const { | ||||
| for (const auto &task_info : same_op_list_) { | for (const auto &task_info : same_op_list_) { | ||||
| bool ret = SearchInCache(task_info.json_name, task_info.processor, task_info.input_size_list, | bool ret = SearchInCache(task_info.json_name, task_info.processor, task_info.input_size_list, | ||||
| @@ -213,6 +226,24 @@ bool ParallelBuildManager::GenSameOpKernelMod() const { | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool ParallelBuildManager::GenSameFusionOpKernelMod(std::map<int64_t, KernelModPtr> *kernel_mode_ret) const { | |||||
| bool ret = true; | |||||
| for (const auto &task_info : same_op_list_) { | |||||
| auto kernel_pack = TbeUtils::SearchCache(task_info.json_name, tbe::kProcessorAiCore); | |||||
| if (kernel_pack != nullptr) { | |||||
| auto kernel_mode = GenKernelMod(task_info.json_name, tbe::kProcessorAiCore, task_info.input_size_list, | |||||
| task_info.output_size_list, kernel_pack); | |||||
| if (kernel_mode != nullptr) { | |||||
| (*kernel_mode_ret)[task_info.scope_id] = kernel_mode; | |||||
| continue; | |||||
| } | |||||
| } | |||||
| MS_LOG(INFO) << "can't find " << task_info.json_name << " in cache."; | |||||
| ret = false; | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| bool ParallelBuildManager::SearchInCache(const std::string &json_name, const std::string &processor, | bool ParallelBuildManager::SearchInCache(const std::string &json_name, const std::string &processor, | ||||
| const std::vector<size_t> &input_size_list, | const std::vector<size_t> &input_size_list, | ||||
| const std::vector<size_t> &output_size_list, mindspore::AnfNode *node) const { | const std::vector<size_t> &output_size_list, mindspore::AnfNode *node) const { | ||||
| @@ -36,7 +36,7 @@ struct KernelBuildTaskInfo { | |||||
| std::string json_name; | std::string json_name; | ||||
| std::vector<size_t> input_size_list; | std::vector<size_t> input_size_list; | ||||
| std::vector<size_t> output_size_list; | std::vector<size_t> output_size_list; | ||||
| int32_t scope_id; | |||||
| int64_t scope_id; | |||||
| }; | }; | ||||
| class ParallelBuildManager { | class ParallelBuildManager { | ||||
| @@ -48,7 +48,10 @@ class ParallelBuildManager { | |||||
| int32_t scope_id = 0); | int32_t scope_id = 0); | ||||
| void SaveSameOpInfo(const AnfNodePtr &anf_node, const std::string &json_name, | void SaveSameOpInfo(const AnfNodePtr &anf_node, const std::string &json_name, | ||||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list); | const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list); | ||||
| void SaveSameFusionOpInfo(const int64_t scope_id, const std::string &json_name, const std::string &processor, | |||||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list); | |||||
| bool GenSameOpKernelMod() const; | bool GenSameOpKernelMod() const; | ||||
| bool GenSameFusionOpKernelMod(std::map<int64_t, KernelModPtr> *kernel_mode_ret) const; | |||||
| bool SearchInCache(const std::string &json_name, const std::string &processor, | bool SearchInCache(const std::string &json_name, const std::string &processor, | ||||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | ||||
| AnfNode *node) const; | AnfNode *node) const; | ||||