| @@ -28,7 +28,8 @@ build_in_impl_path = get_build_in_impl_path() | |||
| # op function list | |||
| op_build = "compile" | |||
| op_pre_build = "pre_build" | |||
| fusion_type_map = {'Convolution': 0, 'ElemWise': 1, 'CommReduce': 2, | |||
| 'Segment': 3, 'Opaque': 4} | |||
| def _initialize(impl_path): | |||
| """Initialize""" | |||
| @@ -108,7 +109,7 @@ def build_op(build_type, json_str): | |||
| # pre build | |||
| if build_type == op_pre_build: | |||
| op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name) | |||
| op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | |||
| # disable only pattern configuration | |||
| op_build_cfg_en() | |||
| return get_op_pattern() | |||
| @@ -159,11 +160,16 @@ def compile_with_json(json_str): | |||
| json_info = json.loads(json_str) | |||
| if "fusion_op" in json_info: | |||
| ret = compile_fusion_op(json_str) | |||
| elif "compile_type" in json_info: | |||
| ret = build_op(op_pre_build, json_str) | |||
| else: | |||
| ret = build_op(op_build, json_str) | |||
| return ret | |||
| if __name__ == "__main__": | |||
| in_args = sys.stdin.readline() | |||
| compile_with_json(in_args) | |||
| result = compile_with_json(in_args) | |||
| if result in fusion_type_map: | |||
| exit(fusion_type_map[result]) | |||
| else: | |||
| exit(100) | |||
| @@ -75,7 +75,6 @@ def check_supported(op_json: str): | |||
| return ret | |||
| def run_compiler(op_json): | |||
| """ | |||
| run compiler to compile op with subprocess | |||
| @@ -88,15 +87,16 @@ def run_compiler(op_json): | |||
| """ | |||
| try: | |||
| tbe_compiler = os.path.join(os.path.split(os.path.realpath(__file__))[0], "compiler.py") | |||
| subprocess.run([sys.executable, tbe_compiler], input=op_json, timeout=300, | |||
| text=True, capture_output=True, check=True) | |||
| return "Success", "Success" | |||
| completed_object = subprocess.run([sys.executable, tbe_compiler], input=op_json, timeout=300, | |||
| text=True, capture_output=True, check=False) | |||
| if completed_object: | |||
| code = completed_object.returncode | |||
| return "Success", str(code) | |||
| except subprocess.TimeoutExpired: | |||
| tb = traceback.format_exc() | |||
| return "TBEException", "CompileTimeOut: " + tb + "\ninput_args: " + op_json | |||
| return "TBEException", "PreCompileTimeOut: " + tb + "\ninput_args: " + op_json | |||
| except subprocess.CalledProcessError as e: | |||
| return "TBEException", "CompileProcessFailed:\n" + e.stdout + "\n" + e.stderr + "\ninput_args: " + op_json | |||
| return "TBEException", "PreCompileProcessFailed:\n" + e.stdout + "\n" + e.stderr + "\ninput_args: " + op_json | |||
| class CompilerPool: | |||
| """compiler pool""" | |||
| @@ -154,11 +154,11 @@ class CompilerPool: | |||
| task_id, task_future = self.__running_tasks.pop(0) | |||
| ret_type, result = task_future.get(330) | |||
| if ret_type == "Success": | |||
| ret = task_id, "Success" | |||
| ret = task_id, "Success", result | |||
| elif ret_type in ("Exception", "TBEException"): | |||
| ret = task_id, ret_type + ":" + result | |||
| ret = task_id, ret_type + ":" + result, "_" | |||
| else: | |||
| ret = task_id, "Exception: Not support return type:" + str(ret_type) | |||
| ret = task_id, "Exception: Not support return type:" + str(ret_type), "_" | |||
| return ret | |||
| def reset_task_info(self): | |||
| @@ -62,6 +62,31 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) { | |||
| return kernel_mod_ptr; | |||
| } | |||
| static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| std::vector<AnfNodePtr> tbe_nodes; | |||
| for (const auto &anf_node : kernel_graph_ptr->execution_order()) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| if (!AnfAlgo::IsRealKernel(anf_node)) { | |||
| continue; | |||
| } | |||
| KernelType kernel_type = AnfAlgo::GetKernelType(anf_node); | |||
| switch (kernel_type) { | |||
| case KernelType::TBE_KERNEL: { | |||
| if (AnfAlgo::GetKernelMod(anf_node) == nullptr) { | |||
| tbe_nodes.push_back(anf_node); | |||
| } | |||
| break; | |||
| } | |||
| default: { | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes); | |||
| return ret; | |||
| } | |||
| static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| std::vector<AnfNodePtr> tbe_nodes; | |||
| @@ -188,6 +213,12 @@ bool IsAtomicNode(const CNodePtr &kernel_node) { | |||
| return atomic_flag; | |||
| } | |||
| bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr); | |||
| return ret; | |||
| } | |||
| bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| TbeUtils::LoadCache(); | |||
| @@ -22,6 +22,10 @@ | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| /** | |||
| * @brief kernel pre build for ascend. | |||
| */ | |||
| bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr); | |||
| /** | |||
| * @brief kernel build for ascend. | |||
| */ | |||
| @@ -102,7 +102,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||
| while (!build_manger->IsAllTaskFinish()) { | |||
| int task_id = -1; | |||
| char *task_result = nullptr; | |||
| auto ret = build_manger->WaitOne(&task_id, &task_result); | |||
| char *pre_build_result = nullptr; | |||
| auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); | |||
| if (!ret) { | |||
| MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id; | |||
| } | |||
| @@ -27,7 +27,7 @@ | |||
| // the TBE back-end operator implementation difference | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED }; | |||
| enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; | |||
| namespace tbe { | |||
| using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||
| nlohmann::json *attrs_json); | |||
| @@ -42,6 +42,40 @@ constexpr auto kStartCompileOp = "start_compile_op"; | |||
| constexpr auto kWaitOne = "wait_one"; | |||
| constexpr auto kResetTaskInfo = "reset_task_info"; | |||
| bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | |||
| MS_EXCEPTION_IF_NULL(build_manger); | |||
| for (const auto &anf_node : anf_nodes) { | |||
| // gen kernel json | |||
| nlohmann::json kernel_json; | |||
| TbeKernelJsonCreator creator(OP_PRE_COMPILE); | |||
| if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) { | |||
| MS_LOG(ERROR) << "GenTbeSingleKernelJson failed"; | |||
| return false; | |||
| } | |||
| kernel_json["compile_type"] = "pre_build"; | |||
| // op build | |||
| auto task_id = build_manger->StartCompileOp(kernel_json); | |||
| build_manger->SavePreTaskInfo(task_id, anf_node); | |||
| } | |||
| while (!build_manger->IsAllPreTaskFinish()) { | |||
| int task_id = -1; | |||
| char *task_result = nullptr; | |||
| char *pre_build_result = nullptr; | |||
| auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); | |||
| if (!ret) { | |||
| MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id; | |||
| } | |||
| if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) { | |||
| MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result; | |||
| } | |||
| build_manger->PreTaskFinishProcess(task_id, pre_build_result); | |||
| } | |||
| return true; | |||
| } | |||
| bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes) { | |||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | |||
| MS_EXCEPTION_IF_NULL(build_manger); | |||
| @@ -82,7 +116,8 @@ bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes) { | |||
| while (!build_manger->IsAllTaskFinish()) { | |||
| int task_id = -1; | |||
| char *task_result = nullptr; | |||
| auto ret = build_manger->WaitOne(&task_id, &task_result); | |||
| char *pre_build_result = nullptr; | |||
| auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); | |||
| if (!ret) { | |||
| MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id; | |||
| } | |||
| @@ -116,7 +151,7 @@ int32_t ParallelBuildManager::StartCompileOp(const nlohmann::json &kernel_json) | |||
| return task_id; | |||
| } | |||
| bool ParallelBuildManager::WaitOne(int *task_id, char **task_result) const { | |||
| bool ParallelBuildManager::WaitOne(int *task_id, char **task_result, char **pre_build_result) const { | |||
| MS_LOG(INFO) << "wait task start."; | |||
| MS_EXCEPTION_IF_NULL(task_id); | |||
| MS_EXCEPTION_IF_NULL(task_result); | |||
| @@ -128,10 +163,15 @@ bool ParallelBuildManager::WaitOne(int *task_id, char **task_result) const { | |||
| MS_EXCEPTION(ArgumentError) << "Failed to call function wait_one"; | |||
| return false; | |||
| } | |||
| (void)PyArg_ParseTuple(pRes, "is", task_id, task_result); | |||
| (void)PyArg_ParseTuple(pRes, "iss", task_id, task_result, pre_build_result); | |||
| return true; | |||
| } | |||
| void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) { | |||
| MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id; | |||
| pre_task_map_[task_id] = anf_node; | |||
| } | |||
| void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node, | |||
| const std::string &json_name, const std::vector<size_t> &input_size_list, | |||
| const std::vector<size_t> &output_size_list, int32_t scope_id) { | |||
| @@ -150,11 +190,24 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod | |||
| task_map_[task_id] = task_info; | |||
| } | |||
| bool ParallelBuildManager::IsAllPreTaskFinish() const { | |||
| MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size(); | |||
| return pre_task_map_.empty(); | |||
| } | |||
| bool ParallelBuildManager::IsAllTaskFinish() const { | |||
| MS_LOG(INFO) << "wait process task_num: " << task_map_.size(); | |||
| return task_map_.empty(); | |||
| } | |||
| void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) { | |||
| auto task_iter = pre_task_map_.find(task_id); | |||
| if (task_iter == pre_task_map_.end()) { | |||
| MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id; | |||
| } | |||
| (void)pre_task_map_.erase(task_iter); | |||
| } | |||
| std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) { | |||
| auto task_iter = task_map_.find(task_id); | |||
| if (task_iter == task_map_.end()) { | |||
| @@ -26,6 +26,7 @@ | |||
| #include <nlohmann/json.hpp> | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes); | |||
| bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes); | |||
| struct KernelBuildTaskInfo { | |||
| @@ -42,6 +43,7 @@ class ParallelBuildManager { | |||
| ParallelBuildManager(); | |||
| ~ParallelBuildManager(); | |||
| int32_t StartCompileOp(const nlohmann::json &kernel_json) const; | |||
| void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node); | |||
| void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name, | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | |||
| int32_t scope_id = 0); | |||
| @@ -52,8 +54,10 @@ class ParallelBuildManager { | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | |||
| AnfNode *node) const; | |||
| bool WaitOne(int *task_id, char **task_result) const; | |||
| bool WaitOne(int *task_id, char **task_result, char **pre_build_result) const; | |||
| bool IsAllPreTaskFinish() const; | |||
| bool IsAllTaskFinish() const; | |||
| void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result); | |||
| std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true); | |||
| KernelModPtr GenKernelMod(const string &json_name, const string &processor, | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | |||
| @@ -62,6 +66,7 @@ class ParallelBuildManager { | |||
| private: | |||
| PyObject *tbe_parallel_compiler_; | |||
| std::map<int32_t, AnfNodePtr> pre_task_map_; | |||
| std::map<int32_t, KernelBuildTaskInfo> task_map_; | |||
| std::vector<KernelBuildTaskInfo> same_op_list_; | |||
| }; | |||
| @@ -546,6 +546,7 @@ void AscendSession::InitRuntimeResource() { | |||
| void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| MS_LOG(INFO) << "HardwareOptimize start!"; | |||
| device::ascend::KernelPreBuild(kernel_graph.get()); | |||
| opt::AscendBackendOptimization(kernel_graph); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| kernel_graph->SetExecOrderByDefault(); | |||