Browse Source

bug fix for auto tune init failed

tags/v1.2.0-rc1
liubuyu 4 years ago
parent
commit
792d9b7200
2 changed files with 10 additions and 3 deletions
  1. +6
    -2
      mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py
  2. +4
    -1
      mindspore/_extends/parallel_compile/tbe_compiler/tuner.py

+ 6
- 2
mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py View File

@@ -127,6 +127,7 @@ class TbeProcess:
self.__failed_tune_task = [] self.__failed_tune_task = []
self.__task_info = {} self.__task_info = {}
self.__tuner = None self.__tuner = None
self.tune_init = True
self.tune_process_num = 0 self.tune_process_num = 0
self.tune_mode = None self.tune_mode = None
self.offline_tune = False self.offline_tune = False
@@ -175,7 +176,7 @@ class TbeProcess:
if os.getenv("ENABLE_TUNE_DUMP", "").lower() == "true": if os.getenv("ENABLE_TUNE_DUMP", "").lower() == "true":
self.offline_tune = True self.offline_tune = True
log.info("Tune offline mode is on...") log.info("Tune offline mode is on...")
if self.tune_mode == "NO_TUNE" and not self.offline_tune:
if self.tune_mode == NO_TUNE and not self.offline_tune:
log.info("[NO_TUNE] There is no need to initialize auto_tune related variables.") log.info("[NO_TUNE] There is no need to initialize auto_tune related variables.")
return "Success" return "Success"


@@ -301,6 +302,8 @@ class TbeProcess:
""" """
task_id = self.__next_task_id task_id = self.__next_task_id
error_id = -1 error_id = -1
if not self.tune_init:
return error_id
self.__next_task_id = self.__next_task_id + 1 self.__next_task_id = self.__next_task_id + 1
tune_mode = self.select_tune_mode(op_json) tune_mode = self.select_tune_mode(op_json)
self.__task_info[task_id] = op_json self.__task_info[task_id] = op_json
@@ -319,7 +322,8 @@ class TbeProcess:
if not self.__tuner.tune_init: if not self.__tuner.tune_init:
status = self.__tuner.init_tune_interface(op_json, self.tune_process_num) status = self.__tuner.init_tune_interface(op_json, self.tune_process_num)
if not status: if not status:
log.error("Auto tune init failed!")
log.error("Auto tune init failed, place check your hardware config or go back to normal compile!")
self.tune_init = False
return error_id return error_id
self.__tuner.tune_init = True self.__tuner.tune_init = True
self.__all_tune_tasks.append(task_id) self.__all_tune_tasks.append(task_id)


+ 4
- 1
mindspore/_extends/parallel_compile/tbe_compiler/tuner.py View File

@@ -46,6 +46,7 @@ class TbeTuner:
self.offline_tune = offline_tune self.offline_tune = offline_tune
self.tune_init = False self.tune_init = False
self.rl_init = False self.rl_init = False
self.multi_init = False
self.offline_dump_path = "./tune_dump" self.offline_dump_path = "./tune_dump"
if os.environ.get("TUNE_DUMP_PATH") is not None: if os.environ.get("TUNE_DUMP_PATH") is not None:
self.offline_dump_path = os.getenv("TUNE_DUMP_PATH", "") self.offline_dump_path = os.getenv("TUNE_DUMP_PATH", "")
@@ -77,7 +78,8 @@ class TbeTuner:
""" """
DeInitialize tuner interface DeInitialize tuner interface
""" """
deinit_multi_process_env()
if self.multi_init:
deinit_multi_process_env()
if self.rl_init: if self.rl_init:
rl_tune_deinit() rl_tune_deinit()


@@ -213,6 +215,7 @@ class TbeTuner:
if ret is None: if ret is None:
log.error("Init multiprocess env failed") log.error("Init multiprocess env failed")
return False return False
self.multi_init = True
process_count = ret[0] process_count = ret[0]
log.info("Init multiprocess env success with {} process".format(process_count)) log.info("Init multiprocess env success with {} process".format(process_count))
if "RL" in tune_mode: if "RL" in tune_mode:


Loading…
Cancel
Save