| @@ -26,6 +26,7 @@ build_in_impl_path = get_built_in_impl_path() | |||||
| # op function list | # op function list | ||||
| op_build = "compile" | op_build = "compile" | ||||
| def _initialize(impl_path): | def _initialize(impl_path): | ||||
| """Initialize""" | """Initialize""" | ||||
| if impl_path == "": | if impl_path == "": | ||||
| @@ -37,6 +38,7 @@ def _initialize(impl_path): | |||||
| sys.path.insert(0, op_module_name) | sys.path.insert(0, op_module_name) | ||||
| def _replace_range(args): | def _replace_range(args): | ||||
| for arg in args: | for arg in args: | ||||
| if not arg.__contains__('range'): | if not arg.__contains__('range'): | ||||
| @@ -47,6 +49,7 @@ def _replace_range(args): | |||||
| if value < 0: | if value < 0: | ||||
| range_item[index] = None | range_item[index] = None | ||||
| def build_op(build_type, json_str): | def build_op(build_type, json_str): | ||||
| """ | """ | ||||
| call op functions with function name and input args json_str | call op functions with function name and input args json_str | ||||
| @@ -89,9 +92,9 @@ def build_op(build_type, json_str): | |||||
| op_module = __import__(op_name) | op_module = __import__(op_name) | ||||
| else: | else: | ||||
| if is_dynamic_shape: | if is_dynamic_shape: | ||||
| op_module = __import__("impl.dynamic."+op_name, globals(), locals(), [op_name], 0) | |||||
| op_module = __import__("impl.dynamic." + op_name, globals(), locals(), [op_name], 0) | |||||
| else: | else: | ||||
| op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0) | |||||
| op_module = __import__("impl." + op_name, globals(), locals(), [op_name], 0) | |||||
| # get function | # get function | ||||
| if build_type == op_build: | if build_type == op_build: | ||||
| if custom_flag: | if custom_flag: | ||||
| @@ -149,6 +152,7 @@ def compile_with_json(json_str): | |||||
| ret = build_op(op_build, json_str) | ret = build_op(op_build, json_str) | ||||
| return ret | return ret | ||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| in_args = sys.stdin.readline() | in_args = sys.stdin.readline() | ||||
| result = compile_with_json(in_args) | result = compile_with_json(in_args) | ||||
| @@ -15,6 +15,7 @@ | |||||
| """tbe common""" | """tbe common""" | ||||
| import os | import os | ||||
| class TBEException(Exception): | class TBEException(Exception): | ||||
| """tbe exception class""" | """tbe exception class""" | ||||
| @@ -64,6 +65,7 @@ def _check_arg_info(item): | |||||
| if 'param_type' not in item or not item['param_type']: | if 'param_type' not in item or not item['param_type']: | ||||
| raise ValueError("Json string Errors, key:param_type not found.") | raise ValueError("Json string Errors, key:param_type not found.") | ||||
| def get_input_output(io_info, args): | def get_input_output(io_info, args): | ||||
| """ | """ | ||||
| Parse args. | Parse args. | ||||
| @@ -100,6 +102,7 @@ def get_input_output(io_info, args): | |||||
| if len(item) > 1: | if len(item) > 1: | ||||
| args.append(arg) | args.append(arg) | ||||
| def get_attr(attr_info, args): | def get_attr(attr_info, args): | ||||
| """ | """ | ||||
| Parse args. | Parse args. | ||||
| @@ -118,6 +121,7 @@ def get_attr(attr_info, args): | |||||
| if item["name"] != "isRef": | if item["name"] != "isRef": | ||||
| args.append(item['value']) | args.append(item['value']) | ||||
| def get_args(op_info, arg_type): | def get_args(op_info, arg_type): | ||||
| """ | """ | ||||
| Parse args. | Parse args. | ||||
| @@ -19,10 +19,21 @@ import multiprocessing | |||||
| import subprocess | import subprocess | ||||
| import sys | import sys | ||||
| import os | import os | ||||
| import time | |||||
| import json | import json | ||||
| from mindspore import log | |||||
| from .tbe_common import check_kernel_info, TBEException | from .tbe_common import check_kernel_info, TBEException | ||||
| from .helper import _op_select_format, _check_supported | from .helper import _op_select_format, _check_supported | ||||
| # tune type | |||||
| NO_TUNE = "NO_TUNE" | |||||
| GA_TUNE = "GA" | |||||
| RL_TUNE = "RL" | |||||
| # job type | |||||
| RL_COMPILE = "RL_COMPILE" | |||||
| RL_OFFLINE = "RL_OFFLINE" | |||||
| RL_ONLINE = "RL_ONLINE" | |||||
| def create_tbe_parallel_process(): | def create_tbe_parallel_process(): | ||||
| """ | """ | ||||
| @@ -105,20 +116,32 @@ class TbeProcess: | |||||
| """tbe process""" | """tbe process""" | ||||
| def __init__(self): | def __init__(self): | ||||
| self.__processe_num = multiprocessing.cpu_count() | |||||
| self.default_num = 24 | |||||
| self.__process_num = multiprocessing.cpu_count() | |||||
| self.compile_process_num = 24 | |||||
| self.__pool = None | self.__pool = None | ||||
| self.__next_task_id = 1 | self.__next_task_id = 1 | ||||
| self.__running_tasks = [] | self.__running_tasks = [] | ||||
| self.__all_tune_tasks = [] | |||||
| self.__running_tune_tasks = [] | |||||
| self.__finish_tune_task = [] | |||||
| self.__failed_tune_task = [] | |||||
| self.__task_info = {} | |||||
| self.__tuner = None | |||||
| self.tune_process_num = 0 | |||||
| self.tune_mode = None | |||||
| self.offline_tune = False | |||||
| self.auto_tune_op_list = None | |||||
| self.tune_ops_name = os.getenv("TUNE_OPS_NAME") | |||||
| self.selected_tune_ops = self.tune_ops_name.split(",") if self.tune_ops_name is not None else None | |||||
| def __del__(self): | def __del__(self): | ||||
| if self.__pool is not None: | if self.__pool is not None: | ||||
| self.__pool.terminate() | self.__pool.terminate() | ||||
| self.__pool.join() | self.__pool.join() | ||||
| del self.__pool | del self.__pool | ||||
| def init_auto_tune_env(self, mode): | |||||
| return "Success" | |||||
| if self.__tuner is not None: | |||||
| self.__tuner.deinit() | |||||
| del self.__tuner | |||||
| def init_process_num(self): | def init_process_num(self): | ||||
| """ | """ | ||||
| @@ -129,28 +152,140 @@ class TbeProcess: | |||||
| process_num = os.getenv("MS_BUILD_PROCESS_NUM") | process_num = os.getenv("MS_BUILD_PROCESS_NUM") | ||||
| res = "Success" | res = "Success" | ||||
| if process_num is None: | if process_num is None: | ||||
| res = "Success, using default build process num: " + str(self.default_num) | |||||
| res = "Success, using default build process num: " + str(self.compile_process_num) | |||||
| elif process_num.isdigit(): | elif process_num.isdigit(): | ||||
| if int(process_num) in range(1, 25): | if int(process_num) in range(1, 25): | ||||
| self.default_num = int(process_num) | |||||
| res = "Success, using custom build process num: " + str(self.default_num) | |||||
| self.compile_process_num = int(process_num) | |||||
| res = "Success, using custom build process num: " + str(self.compile_process_num) | |||||
| else: | else: | ||||
| res = "TBEException",\ | |||||
| res = "TBEException", \ | |||||
| "ERROR: [MS_BUILD_PROCESS_NUM] should be in range(1, 25), but got : " + str(process_num) | "ERROR: [MS_BUILD_PROCESS_NUM] should be in range(1, 25), but got : " + str(process_num) | ||||
| elif not process_num.isdigit(): | elif not process_num.isdigit(): | ||||
| res = "TBEException", "ERROR: [MS_BUILD_PROCESS_NUM] type should be a int num, but got :" + process_num | res = "TBEException", "ERROR: [MS_BUILD_PROCESS_NUM] type should be a int num, but got :" + process_num | ||||
| return res | return res | ||||
| def init_auto_tune_env(self, tune_mode): | |||||
| """ | |||||
| Init tbe auto tune env | |||||
| :param tune_mode: RL, GA or NO_TUNE | |||||
| :return: Success or failed info | |||||
| """ | |||||
| self.tune_mode = tune_mode | |||||
| if os.getenv("ENABLE_TUNE_DUMP", "").lower() == "true": | |||||
| self.offline_tune = True | |||||
| log.info("Tune offline mode is on...") | |||||
| if self.tune_mode == "NO_TUNE" and not self.offline_tune: | |||||
| log.info("[NO_TUNE] There is no need to initialize auto_tune related variables.") | |||||
| return "Success" | |||||
| try: | |||||
| # just for checking the following module if exist, will be used in tuner.py | |||||
| import auto_tune_main | |||||
| import schedule_search # pylint: disable=unused-import | |||||
| self.auto_tune_op_list = auto_tune_main.enable_auto_tune_support() | |||||
| except ImportError: | |||||
| res = "TBEException", \ | |||||
| "No module named `auto_tune` or `schedule_search`. If you want tune your op's performance," \ | |||||
| "please configure `auto_tune` or `schedule_search` related environment variables." \ | |||||
| "Try to set the following environment variables:" \ | |||||
| "export fwk_path=/usr/local/Ascend/fwkacllib" \ | |||||
| "export PYTHONPATH=${fwk_path}/python/site-packages:$PYTHONPATH" \ | |||||
| "export PYTHONPATH=${fwk_path}/python/site-packages/auto_tune.egg/auto_tune:$PYTHONPATH" \ | |||||
| "export PYTHONPATH=${fwk_path}/python/site-packages/schedule_search.egg:$PYTHONPATH" | |||||
| return res | |||||
| from .tuner import TbeTuner | |||||
| if self.compile_process_num > 2: | |||||
| self.tune_process_num = self.compile_process_num / 2 | |||||
| if self.__tuner is None: | |||||
| self.__tuner = TbeTuner(self.offline_tune, self.tune_mode) | |||||
| return "Success" | |||||
| def close_pool(self): | def close_pool(self): | ||||
| """ | |||||
| close tbe compilation pool | |||||
| """ | |||||
| self.__pool.terminate() | self.__pool.terminate() | ||||
| self.__pool.join() | self.__pool.join() | ||||
| del self.__pool | del self.__pool | ||||
| def close_tuner(self): | |||||
| """ | |||||
| close tbe tuner | |||||
| """ | |||||
| self.__tuner.deinit() | |||||
| del self.__tuner | |||||
| def exit(self): | def exit(self): | ||||
| """ | |||||
| exit tbe process | |||||
| """ | |||||
| log.info("start to exit tbe process...") | |||||
| if self.__pool is not None: | if self.__pool is not None: | ||||
| stop_thread = threading.Thread(target=self.close_pool) | stop_thread = threading.Thread(target=self.close_pool) | ||||
| stop_thread.daemon = True | stop_thread.daemon = True | ||||
| stop_thread.start() | stop_thread.start() | ||||
| log.info("tbe process poll exited.") | |||||
| if self.__tuner is not None: | |||||
| stop_tuner = threading.Thread(target=self.close_tuner) | |||||
| stop_tuner.daemon = True | |||||
| stop_tuner.start() | |||||
| log.info("tbe process tuner exited.") | |||||
| def _if_tune_ops(self, op_json): | |||||
| """ | |||||
| Check if user assign ops that need tune | |||||
| :param op_json: ori json | |||||
| :return: bool True or False | |||||
| """ | |||||
| if self.tune_ops_name is None: | |||||
| return True | |||||
| if "fusion_op" in op_json: | |||||
| full_name = op_json["fusion_op"]["full_name"] | |||||
| else: | |||||
| full_name = op_json["op_info"]["full_name"] | |||||
| return full_name in self.selected_tune_ops | |||||
| def select_tune_mode(self, op_json): | |||||
| """ | |||||
| Select the corresponding tune mode from op json and env info for the op | |||||
| :param op_json: ori json | |||||
| :return: NO_TUNE RL_TUNE or GA_TUNE | |||||
| """ | |||||
| json_info = json.loads(op_json) | |||||
| tune_mode = json_info["SocInfo"]["autoTilingMode"] | |||||
| kernel_names = self.get_kernel_names(json_info) | |||||
| if self.offline_tune: | |||||
| if not self._if_tune_ops(json_info): | |||||
| return NO_TUNE | |||||
| return RL_TUNE | |||||
| if not self._if_tune_ops(json_info): | |||||
| tune_mode = NO_TUNE | |||||
| if GA_TUNE in tune_mode: | |||||
| for kernel_name in kernel_names: | |||||
| if kernel_name in self.auto_tune_op_list: | |||||
| return GA_TUNE | |||||
| if RL_TUNE in tune_mode: | |||||
| return RL_TUNE | |||||
| return NO_TUNE | |||||
| def get_kernel_names(self, json_info): | |||||
| """ | |||||
| Get kernel names from op json | |||||
| :param json_info: ori json | |||||
| :return: kernel names | |||||
| """ | |||||
| kernel_names = [] | |||||
| if "fusion_op" in json_info: | |||||
| for op in json_info["fusion_op"]["op_list"]: | |||||
| if "func_name" in op: | |||||
| kernel_names.append(op["func_name"]) | |||||
| else: | |||||
| kernel_names.append(json_info['op_info']['name']) | |||||
| return kernel_names | |||||
| def start_compile_op(self, op_json): | def start_compile_op(self, op_json): | ||||
| """ | """ | ||||
| @@ -162,14 +297,49 @@ class TbeProcess: | |||||
| Returns: | Returns: | ||||
| int, task id(>0). -1 if error | int, task id(>0). -1 if error | ||||
| """ | """ | ||||
| if self.__processe_num > self.default_num: | |||||
| self.__processe_num = self.default_num | |||||
| task_id = self.__next_task_id | task_id = self.__next_task_id | ||||
| self.__next_task_id = self.__next_task_id + 1 | self.__next_task_id = self.__next_task_id + 1 | ||||
| if self.__pool is None: | |||||
| self.__pool = multiprocessing.Pool(processes=self.__processe_num) | |||||
| task_future = self.__pool.apply_async(func=run_compiler, args=(op_json,)) | |||||
| self.__running_tasks.append((task_id, task_future)) | |||||
| tune_mode = self.select_tune_mode(op_json) | |||||
| self.__task_info[task_id] = op_json | |||||
| if tune_mode == NO_TUNE: | |||||
| if self.__process_num > self.compile_process_num: | |||||
| self.__process_num = self.compile_process_num | |||||
| if self.__pool is None: | |||||
| self.__pool = multiprocessing.Pool(processes=self.__process_num) | |||||
| task_future = self.__pool.apply_async(func=run_compiler, args=(op_json,)) | |||||
| self.__running_tasks.append((task_id, task_future)) | |||||
| else: | |||||
| log.info("start_compile_op: op json:\n {}".format(op_json)) | |||||
| if self.__tuner is None: | |||||
| log.error("Please confirm that the mode isn't NO_TUNE and auto_tune already initialized.") | |||||
| return task_id | |||||
| if not self.__tuner.tune_init: | |||||
| status = self.__tuner.init_tune_interface(op_json, self.tune_process_num) | |||||
| if not status: | |||||
| log.error("Auto tune init failed!") | |||||
| return task_id | |||||
| self.__tuner.tune_init = True | |||||
| self.__all_tune_tasks.append(task_id) | |||||
| self.__running_tune_tasks.append(task_id) | |||||
| if tune_mode == RL_TUNE: | |||||
| ret, job_type = self.__tuner.rl_tune(task_id, op_json) | |||||
| if job_type is RL_OFFLINE or job_type is RL_ONLINE: | |||||
| if not ret: | |||||
| # offline and online hit will return false | |||||
| res = task_id, "Success", "Success" | |||||
| self.__finish_tune_task.append(res) | |||||
| self.__running_tune_tasks.remove(task_id) | |||||
| elif job_type is RL_COMPILE: | |||||
| if not ret: | |||||
| res = task_id, "Fail", "Fail" | |||||
| self.__finish_tune_task.append(res) | |||||
| self.__running_tune_tasks.remove(task_id) | |||||
| elif tune_mode == GA_TUNE: | |||||
| self.__tuner.ga_tune(task_id, op_json) | |||||
| else: | |||||
| log.error("Unsupported Tune Mode!") | |||||
| return task_id | return task_id | ||||
| def wait_one(self): | def wait_one(self): | ||||
| @@ -180,7 +350,7 @@ class TbeProcess: | |||||
| int, id of the finished task. -1 if error,0 if no unfinished task | int, id of the finished task. -1 if error,0 if no unfinished task | ||||
| str, result of compile task | str, result of compile task | ||||
| """ | """ | ||||
| ret = 0, "Success" | |||||
| ret = 0, "Failed", "Failed" | |||||
| if self.__running_tasks: | if self.__running_tasks: | ||||
| task_id, task_future = self.__running_tasks.pop(0) | task_id, task_future = self.__running_tasks.pop(0) | ||||
| ret_type, result = task_future.get(330) | ret_type, result = task_future.get(330) | ||||
| @@ -190,7 +360,46 @@ class TbeProcess: | |||||
| ret = task_id, ret_type + ":" + result, "_" | ret = task_id, ret_type + ":" + result, "_" | ||||
| else: | else: | ||||
| ret = task_id, "Exception: Not support return type:" + str(ret_type), "_" | ret = task_id, "Exception: Not support return type:" + str(ret_type), "_" | ||||
| return ret | |||||
| return ret | |||||
| if self.__finish_tune_task: | |||||
| ret = self.__finish_tune_task.pop() | |||||
| return ret | |||||
| if self.__running_tune_tasks: | |||||
| query_count = 0 | |||||
| total_query_count = len(self.__running_tune_tasks) * 2 * 10 | |||||
| while query_count < total_query_count: | |||||
| ret = self.__tuner.get_finish_tasks() | |||||
| if not ret: | |||||
| query_count = query_count + 1 | |||||
| time.sleep(30) | |||||
| log.info("{} of {} Task is Tuning({} Tasks tune fail),wait more 30 seconds...".format( | |||||
| len(self.__running_tune_tasks), | |||||
| len(self.__all_tune_tasks), len(self.__failed_tune_task))) | |||||
| else: | |||||
| for item in ret: | |||||
| task_id = item['task_id'] | |||||
| status_code = item['status_code'] | |||||
| res = None | |||||
| if status_code == 0: | |||||
| res = task_id, "Success", "Success" | |||||
| else: | |||||
| self.__failed_tune_task.append(task_id) | |||||
| log.error("task_id:{}, json:{}".format(task_id, self.__task_info[task_id])) | |||||
| res = task_id, "Failed", "Failed" | |||||
| self.__finish_tune_task.append(res) | |||||
| self.__running_tune_tasks.remove(task_id) | |||||
| ret = self.__finish_tune_task.pop() | |||||
| return ret | |||||
| log.error("Tune Task Timeout!!!") | |||||
| log.error("AllTaskNum:{}, RunningTaskNum:{}, FailedTaskNum:{}".format(len(self.__all_tune_tasks), | |||||
| len(self.__running_tune_tasks), | |||||
| len(self.__failed_tune_task))) | |||||
| return 0, "Failed", "Failed" | |||||
| log.error("All Task Is Done!!!") | |||||
| log.error("AllTaskNum:{}, RunningTaskNum:{}, FailedTaskNum:{}".format(len(self.__all_tune_tasks), | |||||
| len(self.__running_tune_tasks), | |||||
| len(self.__failed_tune_task))) | |||||
| return -1, "Failed", "Failed" | |||||
| def reset_task_info(self): | def reset_task_info(self): | ||||
| """ | """ | ||||
| @@ -198,6 +407,14 @@ class TbeProcess: | |||||
| """ | """ | ||||
| if self.__running_tasks: | if self.__running_tasks: | ||||
| self.__running_tasks.clear() | self.__running_tasks.clear() | ||||
| if self.__all_tune_tasks: | |||||
| self.__all_tune_tasks.clear() | |||||
| if self.__running_tune_tasks: | |||||
| self.__running_tune_tasks.clear() | |||||
| if self.__finish_tune_task: | |||||
| self.__finish_tune_task.clear() | |||||
| if self.__failed_tune_task: | |||||
| self.__failed_tune_task.clear() | |||||
| tbe_process = TbeProcess() | tbe_process = TbeProcess() | ||||
| @@ -0,0 +1,372 @@ | |||||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """tuner process""" | |||||
| import os | |||||
| import datetime | |||||
| import json | |||||
| import sys | |||||
| import traceback | |||||
| from te.platform.cce_conf import te_set_version | |||||
| from te.platform.fusion_manager import set_current_op_name | |||||
| from te.platform.fusion_util import fusion_op, dump_fusion_json | |||||
| from te.platform.parallel_compilation import init_multi_process_env, get_finished_compilation_task, \ | |||||
| deinit_multi_process_env, dispatch_autotune_task, start_ga_multi_process | |||||
| import auto_tune | |||||
| from schedule_search.rl_online_tune import rl_tune_init, dispatch_fusion_tune_task, dispatch_single_tune_task, \ | |||||
| rl_tune_deinit | |||||
| from mindspore import log | |||||
| from .tbe_common import get_args | |||||
| from .re_construct_json import single_to_fusion, fusion_to_fusion | |||||
| TE_LOG_LEVEL = ["DEBUG", "INFO", "WARNING", "ERROR"] | |||||
| RL_COMPILE = "RL_COMPILE" | |||||
| RL_OFFLINE = "RL_OFFLINE" | |||||
| RL_ONLINE = "RL_ONLINE" | |||||
| PLATFORM_FLAG = ["ascend310", "ascend910", "Hi3796CV300ES", "ascend710", "ascend610", "Hi3796CV300CS", "SD3403"] | |||||
| class TbeTuner: | |||||
| """tbe tuner for ga tune or rl tune""" | |||||
| def __init__(self, offline_tune, tune_mode): | |||||
| self.offline_tune = offline_tune | |||||
| self.tune_init = False | |||||
| self.rl_init = False | |||||
| self.offline_dump_path = "./tune_dump" | |||||
| if os.environ.get("TUNE_DUMP_PATH") is not None: | |||||
| self.offline_dump_path = os.getenv("TUNE_DUMP_PATH", "") | |||||
| self._creating_custom_path(tune_mode) | |||||
| def init_tune_interface(self, json_str, process_num): | |||||
| """ | |||||
| Initialize tuner interface | |||||
| :param json_str: ori json | |||||
| :param process_num : process num for tuner | |||||
| :return: bool True or False | |||||
| """ | |||||
| json_info = json.loads(json_str) | |||||
| soc_info = self.get_soc_info(json_info) | |||||
| cur_cce_product_params = te_set_version(*soc_info) | |||||
| if cur_cce_product_params is None: | |||||
| log.warning("Set Soc Info failed.") | |||||
| tune_mode = self.get_tune_mode(json_info) | |||||
| ret = self.parallel_compilation_init(soc_info, tune_mode, process_num) | |||||
| if not ret: | |||||
| log.error("Init parallel compilation env failed") | |||||
| return False | |||||
| return True | |||||
| def deinit(self): | |||||
| """ | |||||
| DeInitialize tuner interface | |||||
| """ | |||||
| deinit_multi_process_env() | |||||
| if self.rl_init: | |||||
| rl_tune_deinit() | |||||
| def get_tune_mode(self, json_info): | |||||
| """ | |||||
| Get the corresponding tune mode from op json and env info | |||||
| :param json_info: ori json | |||||
| :return: NO_TUNE RL_TUNE GA_TUNE or RL,GA | |||||
| """ | |||||
| tune_mode = json_info["SocInfo"]["autoTilingMode"] | |||||
| if self.offline_tune: | |||||
| tune_mode = "RL" | |||||
| return tune_mode | |||||
| def __directory_creation(self, path, concat_path): | |||||
| """ | |||||
| Create directory | |||||
| """ | |||||
| path = os.path.join(path, concat_path) | |||||
| if not os.path.isdir(path): | |||||
| os.makedirs(path, 0o750) | |||||
| return path | |||||
| def __creating_default_custom_path(self, tune_mode, base_custom_path): | |||||
| """ | |||||
| Create default custom path | |||||
| """ | |||||
| base_custom_path = self.__directory_creation(base_custom_path, "data") | |||||
| tune_flag = [] | |||||
| if "RL" in tune_mode: | |||||
| tune_flag.append("rl") | |||||
| if "GA" in tune_mode: | |||||
| tune_flag.append("tiling") | |||||
| for tune_path in tune_flag: | |||||
| real_path = self.__directory_creation(base_custom_path, tune_path) | |||||
| for soc_version in PLATFORM_FLAG: | |||||
| final_path = self.__directory_creation(real_path, soc_version) | |||||
| final_path = self.__directory_creation(final_path, "custom") | |||||
| def _creating_custom_path(self, tune_mode): | |||||
| """ | |||||
| Create custom path | |||||
| """ | |||||
| if "NO_TUNE" in tune_mode: | |||||
| return | |||||
| base_custom_path = os.getenv("TUNE_BANK_PATH", None) | |||||
| tune_bank_flag = True | |||||
| if not base_custom_path: | |||||
| base_custom_path = os.path.dirname(os.path.realpath(auto_tune.__file__)) | |||||
| base_custom_path = os.path.realpath(os.path.join(base_custom_path, "../../../")) | |||||
| tune_bank_flag = False | |||||
| if not os.path.isdir(base_custom_path): | |||||
| log.error("Check whether the tuning path [{}] exists.".format(base_custom_path)) | |||||
| return | |||||
| if not os.access(base_custom_path, os.R_OK | os.W_OK | os.X_OK): | |||||
| log.error("Check whether the permission on the tuning path [{}] is correct.".format(base_custom_path)) | |||||
| return | |||||
| if not tune_bank_flag: | |||||
| self.__creating_default_custom_path(tune_mode, base_custom_path) | |||||
| def get_soc_info(self, json_info): | |||||
| """ | |||||
| Get soc info | |||||
| :param json_info: ori json | |||||
| :return: soc info | |||||
| """ | |||||
| soc_param = {} | |||||
| soc_param["op_impl_mode"] = json_info["SocInfo"]["op_impl_mode"] | |||||
| soc_param["op_debug_level"] = json_info["SocInfo"]["op_debug_level"] | |||||
| soc_param["op_impl_mode_list"] = json_info["SocInfo"]["op_impl_mode_list"] | |||||
| soc_param["op_debug_dir"] = '' | |||||
| soc_param["vector_fp_ceiling"] = '' | |||||
| soc_param['mdl_bank_path'] = '' | |||||
| soc_param['op_bank_path'] = '' | |||||
| soc_info = [] | |||||
| soc_info.append(json_info["SocInfo"]["socVersion"]) | |||||
| soc_info.append(json_info["SocInfo"]["coreType"]) | |||||
| soc_info.append(json_info["SocInfo"]["coreNum"]) | |||||
| soc_info.append(json_info["SocInfo"]["l1Fusion"]) | |||||
| soc_info.append(json_info["SocInfo"]["l2Mode"]) | |||||
| soc_info.append(json_info["SocInfo"]["l2Fusion"]) | |||||
| soc_info.append(soc_param) | |||||
| return soc_info | |||||
| def parallel_compilation_init(self, soc_info, tune_mode, process_num): | |||||
| """ | |||||
| Initialize parallel compilation framework for tuner | |||||
| :param soc_info: soc info | |||||
| :param tune_mode: tuner mode | |||||
| :param process_num : process num for tuner | |||||
| :return: bool True or False | |||||
| """ | |||||
| env_count = process_num | |||||
| if "TE_PARALLEL_COMPILER" in os.environ: | |||||
| env_count = os.getenv("TE_PARALLEL_COMPILER") | |||||
| log.info("TE_PARALLEL_COMPILER is set to {}".format(env_count)) | |||||
| if int(env_count) > process_num: | |||||
| env_count = process_num | |||||
| log.info("change process count to {}".format(process_num)) | |||||
| os.environ["TE_PARALLEL_COMPILER"] = str(int(env_count)) | |||||
| pid_str = os.getpid() | |||||
| time_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S%f')[:-3] | |||||
| pid_ts = "{}_pid{}".format(time_str, pid_str) | |||||
| embedding = False | |||||
| enable_event = False | |||||
| te_log_level = os.environ.get("TE_LOGLEVEL") | |||||
| glog_level = os.environ.get("GLOG_v") | |||||
| if glog_level is not None and te_log_level is None: | |||||
| os.environ["TE_LOGLEVEL"] = TE_LOG_LEVEL[int(glog_level)] | |||||
| global_loglevel = int(glog_level) | |||||
| elif glog_level is None and te_log_level is None: | |||||
| os.environ["TE_LOGLEVEL"] = TE_LOG_LEVEL[2] | |||||
| global_loglevel = 3 | |||||
| else: | |||||
| if te_log_level > len(TE_LOG_LEVEL): | |||||
| log.error("Invalid environment TE_LOGLEVEL:{}".format(te_log_level)) | |||||
| te_log_level = 2 | |||||
| os.environ["TE_LOGLEVEL"] = TE_LOG_LEVEL[int(te_log_level)] | |||||
| global_loglevel = int(te_log_level) | |||||
| ret = init_multi_process_env(embedding, soc_info, tune_mode, global_loglevel, enable_event, pid_ts) | |||||
| if ret is None: | |||||
| log.error("Init multiprocess env failed") | |||||
| return False | |||||
| process_count = ret[0] | |||||
| log.info("Init multiprocess env success with {} process".format(process_count)) | |||||
| if "RL" in tune_mode: | |||||
| res_queue = ret[1] | |||||
| live_checker = ret[2] | |||||
| termin_event = ret[3] | |||||
| ret = rl_tune_init(soc_info, res_queue, live_checker, termin_event, global_loglevel, pid_ts) | |||||
| if not ret: | |||||
| log.error("RL env init failed!") | |||||
| return False | |||||
| self.rl_init = True | |||||
| log.info("RL Tune init success.") | |||||
| if "GA" in tune_mode: | |||||
| start_ga_multi_process(tune_mode) | |||||
| log.info("GA Tune init success.") | |||||
| return True | |||||
| def rl_tune(self, task_id, op_json): | |||||
| """ | |||||
| RL tune for single op and fusion op | |||||
| :param task_id: task id for this op to tune | |||||
| :param op_json: op's info | |||||
| :return: tune result | |||||
| """ | |||||
| json_info = json.loads(op_json) | |||||
| if "fusion_op" in json_info: | |||||
| ret = self.fusion_rl_tune(task_id, json_info) | |||||
| else: | |||||
| ret = self.single_rl_tune(task_id, json_info) | |||||
| return ret | |||||
| def ga_tune(self, task_id, op_json): | |||||
| """ | |||||
| GA tune for single op and fusion op | |||||
| :param task_id: task id for this op to tune | |||||
| :param op_json: op's info | |||||
| """ | |||||
| json_info = json.loads(op_json) | |||||
| if "fusion_op" in json_info: | |||||
| self.fusion_ga_tune(task_id, json_info) | |||||
| else: | |||||
| self.single_ga_tune(task_id, json_info) | |||||
| def single_rl_tune(self, task_id, json_info): | |||||
| """ | |||||
| RL tune for single op | |||||
| :param task_id: task id for this op to tune | |||||
| :param json_info: op's info | |||||
| :return: tune result | |||||
| """ | |||||
| if self.offline_tune: | |||||
| converted_json = single_to_fusion(json.dumps(json_info), tune_mode="RL") | |||||
| op_type = json_info['op_info']['name'] | |||||
| kernel_name = json_info['op_info']['kernel_name'] | |||||
| op_module = __import__("impl." + op_type, globals(), locals(), [op_type], 0) | |||||
| op_module_name = "impl." + op_type | |||||
| py_fn_name = json_info['op_info']['name'] | |||||
| op_func = getattr(op_module, py_fn_name, None) | |||||
| set_current_op_name(kernel_name) | |||||
| inputs_args = get_args(json_info['op_info'], 'inputs') | |||||
| outputs_args = get_args(json_info['op_info'], 'outputs') | |||||
| attrs_args = get_args(json_info['op_info'], 'attrs') | |||||
| op_args = inputs_args, outputs_args, attrs_args | |||||
| # todo build with build_single_op_from_c | |||||
| base_kernel = './kernel_meta/' + kernel_name + '.o' | |||||
| job_type = RL_COMPILE | |||||
| try: | |||||
| op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | |||||
| # pylint: disable=broad-except | |||||
| except Exception: | |||||
| exc_type, exc_value, _ = sys.exc_info() | |||||
| log.error( | |||||
| "exc_type:{}, exc_value:{}, exc_traceback:{}".format(exc_type, exc_value, traceback.format_exc())) | |||||
| return False, job_type | |||||
| if self.offline_tune: | |||||
| job_type = RL_OFFLINE | |||||
| dump_fusion_json(converted_json, self.offline_dump_path) | |||||
| else: | |||||
| job_type = RL_ONLINE | |||||
| graph_id = 0 | |||||
| l1size = 0 # todo need to verify | |||||
| ret = dispatch_single_tune_task(graph_id, task_id, l1size, base_kernel, kernel_name, op_module_name, | |||||
| op_module_name + "@" + op_module_name, op_type, op_type, op_args) | |||||
| return ret, job_type | |||||
| def get_op_module_names(self, json_info): | |||||
| """ | |||||
| Get op module names from op info json | |||||
| :param json_info: op's info | |||||
| :return: op module names | |||||
| """ | |||||
| op_module_name = "" | |||||
| for op in json_info["fusion_op"]["op_list"]: | |||||
| if "module_name" in op: | |||||
| op_module_name = op_module_name + op["module_name"] + "," | |||||
| return op_module_name[:-1] | |||||
| def fusion_rl_tune(self, task_id, json_info): | |||||
| """ | |||||
| RL tune for fusion op | |||||
| :param task_id: task id for this op to tune | |||||
| :param json_info: op's info | |||||
| :return: tune result | |||||
| """ | |||||
| if 'fusion_op' not in json_info or not json_info['fusion_op']: | |||||
| raise ValueError("Json string Errors, key:fusion_op not found.") | |||||
| kernel_name = json_info["fusion_op"]["fusion_op_name"] | |||||
| set_current_op_name(kernel_name) | |||||
| converted_json = fusion_to_fusion(json.dumps(json_info), tune_mode="RL") | |||||
| job_type = RL_COMPILE | |||||
| base_kernel = './kernel_meta/' + kernel_name + '.o' | |||||
| try: | |||||
| fusion_op(converted_json) | |||||
| # pylint: disable=broad-except | |||||
| except Exception: | |||||
| exc_type, exc_value, _ = sys.exc_info() | |||||
| log.error( | |||||
| "exc_type:{}, exc_value:{}, exc_traceback:{}".format(exc_type, exc_value, traceback.format_exc())) | |||||
| return False, job_type | |||||
| if self.offline_tune: | |||||
| job_type = RL_OFFLINE | |||||
| dump_fusion_json(converted_json, self.offline_dump_path) | |||||
| else: | |||||
| job_type = RL_ONLINE | |||||
| graph_id = 0 | |||||
| l1size = 0 | |||||
| op_model_name = self.get_op_module_names(json_info) | |||||
| ret = dispatch_fusion_tune_task(graph_id, task_id, l1size, base_kernel, kernel_name, op_model_name, | |||||
| converted_json) | |||||
| return ret, job_type | |||||
| def fusion_ga_tune(self, task_id, json_info): | |||||
| """ | |||||
| GA tune for fusion op | |||||
| :param task_id: task id for this op to tune | |||||
| :param json_info: op's info | |||||
| """ | |||||
| if 'fusion_op' not in json_info or not json_info['fusion_op']: | |||||
| raise ValueError("Json string Errors, key:fusion_op not found.") | |||||
| kernel_name = json_info["fusion_op"]["fusion_op_name"] | |||||
| converted_json = fusion_to_fusion(json.dumps(json_info), tune_mode="GA") | |||||
| graph_id = 0 | |||||
| l1size = 0 | |||||
| dispatch_autotune_task(graph_id, task_id, l1size, converted_json, [], kernel_name) | |||||
| def single_ga_tune(self, task_id, json_info): | |||||
| """ | |||||
| GA tune for single op | |||||
| :param task_id: task id for this op to tune | |||||
| :param json_info: op's info | |||||
| """ | |||||
| converted_json = single_to_fusion(json.dumps(json_info), tune_mode="GA") | |||||
| graph_id = 0 | |||||
| l1size = 0 | |||||
| kernel_name = json_info["fusion_op_name"] | |||||
| dispatch_autotune_task(graph_id, task_id, l1size, converted_json, [], kernel_name) | |||||
| def get_finish_tasks(self): | |||||
| """ | |||||
| Get finish task from parallel compilation framework | |||||
| :return task info list | |||||
| """ | |||||
| ret = get_finished_compilation_task(0) | |||||
| return ret | |||||
| @@ -80,8 +80,8 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| size_t hash_id = GenFusionJsonHash(fusion_op); | size_t hash_id = GenFusionJsonHash(fusion_op); | ||||
| auto json_name = | auto json_name = | ||||
| fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id)); | fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id)); | ||||
| fusion_op["graph_id"] = fusion_scope_iter.graph_id; | |||||
| fusion_op["fusion_op_name"] = json_name; | fusion_op["fusion_op_name"] = json_name; | ||||
| fusion_op["full_name"] = fusion_scope_iter.full_name; | |||||
| // get io size | // get io size | ||||
| std::vector<size_t> input_size_list; | std::vector<size_t> input_size_list; | ||||
| std::vector<size_t> output_size_list; | std::vector<size_t> output_size_list; | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #include <map> | #include <map> | ||||
| #include <string> | |||||
| #include "backend/kernel_compiler/kernel.h" | #include "backend/kernel_compiler/kernel.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| @@ -26,15 +27,15 @@ namespace kernel { | |||||
| * @brief fuse op and return a callable mod | * @brief fuse op and return a callable mod | ||||
| */ | */ | ||||
| struct FusionScopeInfo { | struct FusionScopeInfo { | ||||
| FusionScopeInfo(int64_t id, uint32_t g_id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, | |||||
| FusionScopeInfo(int64_t id, std::string f_name, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, | |||||
| std::vector<AnfNodePtr> out) | std::vector<AnfNodePtr> out) | ||||
| : scope_id(id), | : scope_id(id), | ||||
| graph_id(g_id), | |||||
| full_name(f_name), | |||||
| input_nodes(std::move(in)), | input_nodes(std::move(in)), | ||||
| compute_nodes(std::move(comp)), | compute_nodes(std::move(comp)), | ||||
| output_nodes(std::move(out)) {} | output_nodes(std::move(out)) {} | ||||
| int64_t scope_id{}; | int64_t scope_id{}; | ||||
| uint32_t graph_id{}; | |||||
| std::string full_name{}; | |||||
| std::vector<AnfNodePtr> input_nodes; | std::vector<AnfNodePtr> input_nodes; | ||||
| std::vector<AnfNodePtr> compute_nodes; | std::vector<AnfNodePtr> compute_nodes; | ||||
| std::vector<AnfNodePtr> output_nodes; | std::vector<AnfNodePtr> output_nodes; | ||||
| @@ -123,7 +123,6 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||||
| nlohmann::json op_info_json; | nlohmann::json op_info_json; | ||||
| op_info_json[kJIsDynamicShape] = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node->cast<CNodePtr>()); | op_info_json[kJIsDynamicShape] = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node->cast<CNodePtr>()); | ||||
| auto func_name = op_info_ptr->kernel_name(); | auto func_name = op_info_ptr->kernel_name(); | ||||
| op_info_json["graph_id"] = AnfAlgo::GetGraphId(anf_node.get()); | |||||
| op_info_json[kJName] = func_name; | op_info_json[kJName] = func_name; | ||||
| op_info_json[kJModuleName] = std::string("impl.") + func_name; | op_info_json[kJModuleName] = std::string("impl.") + func_name; | ||||
| op_info_json[kJPyModulePath] = kPyPath; | op_info_json[kJPyModulePath] = kPyPath; | ||||
| @@ -163,7 +162,6 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||||
| op_info_json["Type"] = op_name; | op_info_json["Type"] = op_name; | ||||
| op_info_json[kJKernelName] = json_name_; | op_info_json[kJKernelName] = json_name_; | ||||
| op_info_json[kGenModel] = kSingle; | op_info_json[kGenModel] = kSingle; | ||||
| op_info_json[kJFullName] = anf_node->fullname_with_scope(); | |||||
| // create attr_desc | // create attr_desc | ||||
| nlohmann::json attr_desc; | nlohmann::json attr_desc; | ||||
| @@ -43,7 +43,7 @@ const int8_t MULTI_ELTWISE_SIZE = 4; | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | ||||
| struct BufferFusionInfo_t { | struct BufferFusionInfo_t { | ||||
| uint32_t graph_id; | |||||
| std::string full_name; | |||||
| std::vector<AnfNodePtr> anf_nodes; | std::vector<AnfNodePtr> anf_nodes; | ||||
| std::vector<AnfNodePtr> inputs_list; | std::vector<AnfNodePtr> inputs_list; | ||||
| std::vector<AnfNodePtr> outputs_list; | std::vector<AnfNodePtr> outputs_list; | ||||
| @@ -387,7 +387,6 @@ void RemoveCircle(const session::KernelGraph &kernel_graph, | |||||
| void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | ||||
| std::unordered_map<int64_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | std::unordered_map<int64_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | ||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | ||||
| auto graph_id = kernel_graph->graph_id(); | |||||
| GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | ||||
| GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); | GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); | ||||
| GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | ||||
| @@ -397,7 +396,11 @@ void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | for (auto &buffer_fusion_info : *buffer_fusion_infos) { | ||||
| buffer_fusion_info.second.kernel_build_info = | buffer_fusion_info.second.kernel_build_info = | ||||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | ||||
| buffer_fusion_info.second.graph_id = graph_id; | |||||
| // just for full_name_with_scope for every buffer_fusion_info. | |||||
| auto fusion_node = CreateFusionOp(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list, | |||||
| buffer_fusion_info.second.anf_nodes, kernel_graph); | |||||
| MS_EXCEPTION_IF_NULL(fusion_node); | |||||
| buffer_fusion_info.second.full_name = fusion_node->fullname_with_scope(); | |||||
| } | } | ||||
| } | } | ||||
| @@ -412,7 +415,7 @@ bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph | |||||
| buffer_fusion_infos.begin(), buffer_fusion_infos.end(), std::back_inserter(fusion_scope_infos), | buffer_fusion_infos.begin(), buffer_fusion_infos.end(), std::back_inserter(fusion_scope_infos), | ||||
| [](const std::pair<int64_t, BufferFusionInfo_t> &buffer_fusion_info) -> mindspore::kernel::FusionScopeInfo { | [](const std::pair<int64_t, BufferFusionInfo_t> &buffer_fusion_info) -> mindspore::kernel::FusionScopeInfo { | ||||
| return mindspore::kernel::FusionScopeInfo( | return mindspore::kernel::FusionScopeInfo( | ||||
| buffer_fusion_info.first, buffer_fusion_info.second.graph_id, buffer_fusion_info.second.inputs_list, | |||||
| buffer_fusion_info.first, buffer_fusion_info.second.full_name, buffer_fusion_info.second.inputs_list, | |||||
| buffer_fusion_info.second.anf_nodes, buffer_fusion_info.second.outputs_list); | buffer_fusion_info.second.anf_nodes, buffer_fusion_info.second.outputs_list); | ||||
| }); | }); | ||||
| auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); | auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); | ||||
| @@ -447,6 +450,7 @@ bool UbPatternFusion::ReplaceFusionOp(std::unordered_map<int64_t, BufferFusionIn | |||||
| TraceGuard guard(std::make_shared<TraceOpt>(buffer_fusion_info.anf_nodes[0]->debug_info())); | TraceGuard guard(std::make_shared<TraceOpt>(buffer_fusion_info.anf_nodes[0]->debug_info())); | ||||
| auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | ||||
| buffer_fusion_info.anf_nodes, kernel_graph); | buffer_fusion_info.anf_nodes, kernel_graph); | ||||
| buffer_fusion->set_fullname_with_scope(buffer_fusion_info.full_name); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | ||||
| // Set abstract of fusion_op node | // Set abstract of fusion_op node | ||||
| std::vector<TypeId> types; | std::vector<TypeId> types; | ||||