From: @liubuyu Reviewed-by: @zhoufeng54,@jjfeing,@kisnwang Signed-off-by: @kisnwangpull/13604/MERGE
| @@ -128,7 +128,7 @@ def build_op(build_type, json_str, tune_mode=None): | |||||
| else: | else: | ||||
| res = op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | res = op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | ||||
| if tune_mode is not None: | if tune_mode is not None: | ||||
| return res, (inputs_args, outputs_args, attrs_args), op_module_name | |||||
| return None, (inputs_args, outputs_args, attrs_args), op_module_name | |||||
| return res | return res | ||||
| except Exception as e: | except Exception as e: | ||||
| @@ -331,6 +331,8 @@ class TbeProcess: | |||||
| if tune_mode == RL_TUNE: | if tune_mode == RL_TUNE: | ||||
| ret, job_type, compile_info = self.__tuner.rl_tune(task_id, op_json) | ret, job_type, compile_info = self.__tuner.rl_tune(task_id, op_json) | ||||
| if isinstance(compile_info, dict): | |||||
| compile_info = json.dumps(compile_info) | |||||
| if job_type is RL_OFFLINE or job_type is RL_ONLINE: | if job_type is RL_OFFLINE or job_type is RL_ONLINE: | ||||
| if not ret: | if not ret: | ||||
| # offline and online hit will return false | # offline and online hit will return false | ||||
| @@ -388,7 +390,7 @@ class TbeProcess: | |||||
| for item in ret: | for item in ret: | ||||
| task_id = item['task_id'] | task_id = item['task_id'] | ||||
| status_code = item['status_code'] | status_code = item['status_code'] | ||||
| compile_info = item["op_res"] if "op_res" in item else "{}" | |||||
| compile_info = json.dumps(item["op_res"] if "op_res" in item else None) | |||||
| res = None | res = None | ||||
| if status_code == 0: | if status_code == 0: | ||||
| res = task_id, "Success", compile_info | res = task_id, "Success", compile_info | ||||
| @@ -296,7 +296,7 @@ class TbeTuner: | |||||
| # todo build with build_single_op_from_c | # todo build with build_single_op_from_c | ||||
| base_kernel = './kernel_meta/' + kernel_name + '.o' | base_kernel = './kernel_meta/' + kernel_name + '.o' | ||||
| job_type = RL_COMPILE | job_type = RL_COMPILE | ||||
| compile_info = "{}" | |||||
| compile_info = None | |||||
| try: | try: | ||||
| compile_info, op_args, op_module_name = build_op(OP_BUILD, json.dumps(json_info), tune_mode) | compile_info, op_args, op_module_name = build_op(OP_BUILD, json.dumps(json_info), tune_mode) | ||||
| # pylint: disable=broad-except | # pylint: disable=broad-except | ||||
| @@ -317,7 +317,7 @@ class TbeTuner: | |||||
| self.module_list[op_module_name] = 1 | self.module_list[op_module_name] = 1 | ||||
| self.fusion_need_sync += 1 | self.fusion_need_sync += 1 | ||||
| return ret, job_type, json.dumps(compile_info) | |||||
| return ret, job_type, compile_info | |||||
| def fusion_rl_tune(self, task_id, json_info): | def fusion_rl_tune(self, task_id, json_info): | ||||
| """ | """ | ||||
| @@ -334,6 +334,7 @@ class TbeTuner: | |||||
| converted_json = fusion_to_fusion(json.dumps(json_info), tune_mode="RL") | converted_json = fusion_to_fusion(json.dumps(json_info), tune_mode="RL") | ||||
| job_type = RL_COMPILE | job_type = RL_COMPILE | ||||
| base_kernel = './kernel_meta/' + kernel_name + '.o' | base_kernel = './kernel_meta/' + kernel_name + '.o' | ||||
| compile_info = None | |||||
| try: | try: | ||||
| fusion_op(converted_json) | fusion_op(converted_json) | ||||
| # pylint: disable=broad-except | # pylint: disable=broad-except | ||||
| @@ -341,7 +342,7 @@ class TbeTuner: | |||||
| exc_type, exc_value, _ = sys.exc_info() | exc_type, exc_value, _ = sys.exc_info() | ||||
| log.error( | log.error( | ||||
| "exc_type:{}, exc_value:{}, exc_traceback:{}".format(exc_type, exc_value, traceback.format_exc())) | "exc_type:{}, exc_value:{}, exc_traceback:{}".format(exc_type, exc_value, traceback.format_exc())) | ||||
| return False, job_type | |||||
| return False, job_type, compile_info | |||||
| if self.offline_tune: | if self.offline_tune: | ||||
| job_type = RL_OFFLINE | job_type = RL_OFFLINE | ||||
| dump_fusion_json(converted_json, self.offline_dump_path) | dump_fusion_json(converted_json, self.offline_dump_path) | ||||
| @@ -351,7 +352,7 @@ class TbeTuner: | |||||
| l1size = 0 | l1size = 0 | ||||
| ret = dispatch_fusion_tune_task(graph_id, task_id, l1size, base_kernel, kernel_name, full_name, | ret = dispatch_fusion_tune_task(graph_id, task_id, l1size, base_kernel, kernel_name, full_name, | ||||
| converted_json) | converted_json) | ||||
| return ret, job_type | |||||
| return ret, job_type, compile_info | |||||
| def fusion_ga_tune(self, task_id, json_info): | def fusion_ga_tune(self, task_id, json_info): | ||||
| """ | """ | ||||
| @@ -289,7 +289,6 @@ void ParallelBuildManager::ResetTaskInfo() { | |||||
| } | } | ||||
| task_map_.clear(); | task_map_.clear(); | ||||
| same_op_list_.clear(); | same_op_list_.clear(); | ||||
| AscendKernelBuildClient::Instance().TbeReset(); | |||||
| } | } | ||||
| AnfNodePtr ParallelBuildManager::GetAnfNodeByTaskID(int32_t task_id) { | AnfNodePtr ParallelBuildManager::GetAnfNodeByTaskID(int32_t task_id) { | ||||
| @@ -41,7 +41,7 @@ struct KernelBuildTaskInfo { | |||||
| class ParallelBuildManager { | class ParallelBuildManager { | ||||
| public: | public: | ||||
| ParallelBuildManager() = default; | |||||
| ParallelBuildManager() { AscendKernelBuildClient::Instance().TbeReset(); } | |||||
| ~ParallelBuildManager(); | ~ParallelBuildManager(); | ||||
| void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name, | void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name, | ||||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | ||||