diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/re_construct_json.py b/mindspore/_extends/parallel_compile/tbe_compiler/re_construct_json.py new file mode 100644 index 0000000000..24323e9842 --- /dev/null +++ b/mindspore/_extends/parallel_compile/tbe_compiler/re_construct_json.py @@ -0,0 +1,164 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""re construct json""" +import json + + +def common_op_info(json_file): + """ + Create more detail info + :param json_file: origin json file + :return: origin json file + """ + json_file["L1_addr_offset"] = 0 + json_file["L1_fusion_type"] = -1 + json_file["L1_workspace_size"] = -1 + json_file["addr_type"] = 0 + json_file["slice_offset"] = [] + json_file["split_index"] = 0 + json_file["total_shape"] = [] + json_file["valid_shape"] = [] + return json_file + + +def create_input(json_info): + """ + Create input, type is "Data" + :param json_info: json file + :return: ops list + """ + ops = [] + if "inputs" in json_info and json_info["inputs"] is not None: + ori_inputs = json_info["inputs"] + for _, item in enumerate(ori_inputs): + op_info = { + "name": item[0]["name"], + "output_desc": [common_op_info(item[0])], + "type": "Data" + } + ops.append(op_info) + return ops + + +def create_inout_desc(ori_json): + """ + Create input or output, insert "data_type" attr and other detail infos + :param ori_json: input or output list, the item in list is a dict + :return: list + """ + if ori_json is None: + return "null" + out_list = [] + for _, item in enumerate(ori_json): + item[0]["data_type"] = item[0]["dtype"] if "dtype" in item[0] else 0 + if "ori_format" in item[0] or "ori_shape"in item[0]: + item[0]["L1_addr_offset"] = 0 + item[0]["L1_fusion_type"] = -1 + item[0]["L1_workspace_size"] = -1 + item[0]["addr_type"] = 0 + item[0]["slice_offset"] = [] + item[0]["split_index"] = 0 + item[0]["total_shape"] = [] + item[0]["valid_shape"] = [] + else: + item[0]["shape"] = "NULL" + out_list.append(item[0]) + return out_list + + +def create_pre_build_attr(ori_json): + """ + Create prebuild_outs_attrs + :param ori_json: origin json file + :return: dict + """ + args = [create_inout_desc(ori_json["outputs"])[0]] + if "attrs" in ori_json and ori_json["attrs"] is not None: + ori_attrs = ori_json["attrs"] + for item in ori_attrs: + if "value" in item: + args.append(item["value"]) + pre_build_attr = {"kwds_args": {}, + "list_args": args + } + return pre_build_attr + + +def create_compute_op(ori_json): + """ + Create compute op's in and out desc + :param ori_json: origin json file + :return: dict + """ + func_name = ori_json["name"] + op_type = ori_json["Type"] + full_name = ori_json["full_name"] + pattern = ori_json["pattern"] if "pattern" in ori_json else "" + op_common_info = { + "func_name": func_name, + "input_desc": create_inout_desc(ori_json["inputs"]) if "inputs" in ori_json else "null", + "module_name": ori_json["module_name"], + "name": full_name, + "output_desc": create_inout_desc(ori_json["outputs"]) if "outputs" in ori_json else "null", + "output_data_desc": create_inout_desc(ori_json["outputs"]) if "outputs" in ori_json else "null", + "pattern": pattern, + "attr_desc": ori_json["attr_desc"] if "attr_desc" in ori_json else "null", + "py_module_path": ori_json["py_module_path"], + "type": op_type + } + return op_common_info + + +def single_to_fusion(json_file, tune_mode): + """ + Change single op json to fusion op json for auto tune + :param json_file: origin json file + :param tune_mode: tune mode + :return: a fusion op json, which contain one op + """ + ori_file = json.loads(json_file) + json_info = ori_file["op_info"] + soc_info = ori_file["SocInfo"] + soc_info["autoTilingMode"] = tune_mode + kernel_name = json_info["kernel_name"] + ops = create_input(json_info) + ops2 = create_compute_op(json_info) + ops.append(ops2) + end_file = { + "SocInfo": soc_info, + "fusion_op_name": kernel_name, + "l1_size": -1, + "op_list": ops + } + # op_info = {"fusion_op": end_file} + res = json.dumps(end_file, ensure_ascii=False) + return res + + +def fusion_to_fusion(json_str, tune_mode): + """ + Add l1_size for fusion json + :param json_str: origin json file + :param tune_mode: tune mode + :return: fusion json info + """ + + json_info = json.loads(json_str) + json_info["fusion_op"]["l1_size"] = -1 + json_info["SocInfo"]["autoTilingMode"] = tune_mode + end_file = json_info["fusion_op"] + end_file["SocInfo"] = json_info["SocInfo"] + res = json.dumps(end_file, ensure_ascii=False) + return res diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py index 36c6c655a9..264e43edb5 100644 --- a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py @@ -117,6 +117,9 @@ class TbeProcess: self.__pool.join() del self.__pool + def init_auto_tune_env(self, mode): + return "Success" + def init_process_num(self): """ init compile process num diff --git a/mindspore/_extends/remote/kernel_build_server_ascend.py b/mindspore/_extends/remote/kernel_build_server_ascend.py index 1dc513551c..bdd9216851 100644 --- a/mindspore/_extends/remote/kernel_build_server_ascend.py +++ b/mindspore/_extends/remote/kernel_build_server_ascend.py @@ -24,6 +24,9 @@ class TbeBuilder: def __init__(self): self.tbe_builder = create_tbe_parallel_process() + def init_auto_tune_env(self, mode): + return self.tbe_builder.init_auto_tune_env(mode) + def create(self): return self.tbe_builder.init_process_num() @@ -75,6 +78,11 @@ class AscendMessager(Messager): if arg == 'TBE/PRE': ans = self.tbe_builder.create() self.send_res(ans) + elif arg == "TBE/TUNE": + self.send_ack() + tune_mode = self.get_message() + ans = self.tbe_builder.init_auto_tune_env(tune_mode) + self.send_res(ans) elif arg == 'TBE/START': self.send_ack() json = self.get_message() diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc index 8f6b529d7e..084fd8b8d9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc @@ -55,6 +55,20 @@ std::map KernelFusion(const std::vector std::map kernel_mod_ret; auto build_manger = std::make_shared(); MS_EXCEPTION_IF_NULL(build_manger); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + auto device_id = context_ptr->get_param(MS_CTX_DEVICE_ID); + auto tune_mode = context_ptr->get_param(MS_CTX_TUNE_MODE); + std::string offline_tune = common::GetEnv("ENABLE_TUNE_DUMP"); + if (!offline_tune.empty()) { + for (size_t j = 0; j < offline_tune.length(); j++) { + offline_tune[j] = tolower(offline_tune[j]); + } + if (!(offline_tune == "true" || offline_tune == "false")) { + MS_LOG(EXCEPTION) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'"; + } + } + for (const auto &fusion_scope_iter : fusion_scopes) { string fusion_kernel_name; nlohmann::json fusion_op; @@ -64,11 +78,9 @@ std::map KernelFusion(const std::vector } // gen kernel_name & check cache size_t hash_id = GenFusionJsonHash(fusion_op); - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - auto device_id = context_ptr->get_param(MS_CTX_DEVICE_ID); auto json_name = fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id)); + fusion_op["graph_id"] = fusion_scope_iter.graph_id; fusion_op["fusion_op_name"] = json_name; // get io size std::vector input_size_list; @@ -79,7 +91,7 @@ std::map KernelFusion(const std::vector } // search cache auto kernel_pack = TbeUtils::SearchCache(json_name, tbe::kProcessorAiCore); - if (kernel_pack != nullptr) { + if (kernel_pack != nullptr && ((!offline_tune.empty() && offline_tune != "true") || tune_mode == "NO_TUNE")) { auto kernel_mod = build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack); if (kernel_mod != nullptr) { @@ -87,9 +99,16 @@ std::map KernelFusion(const std::vector continue; } } + // generate soc info json + nlohmann::json soc_info_json; + TbeUtils::GenSocInfo(&soc_info_json); + soc_info_json["autoTilingMode"] = tune_mode; + auto soc_version = TbeKernelJsonCreator::GetSocVersion(); + soc_info_json["socVersion"] = soc_version; // fusion build nlohmann::json fusion_json; fusion_json["fusion_op"] = fusion_op; + fusion_json["SocInfo"] = soc_info_json; auto task_id = build_manger->StartCompileOp(fusion_json); TbeUtils::SaveJsonInfo(json_name, fusion_json.dump()); if (task_id < 0) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h index 22361124ea..badd8f50b8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h +++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h @@ -26,9 +26,15 @@ namespace kernel { * @brief fuse op and return a callable mod */ struct FusionScopeInfo { - FusionScopeInfo(int64_t id, std::vector in, std::vector comp, std::vector out) - : scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {} + FusionScopeInfo(int64_t id, uint32_t g_id, std::vector in, std::vector comp, + std::vector out) + : scope_id(id), + graph_id(g_id), + input_nodes(std::move(in)), + compute_nodes(std::move(comp)), + output_nodes(std::move(out)) {} int64_t scope_id{}; + uint32_t graph_id{}; std::vector input_nodes; std::vector compute_nodes; std::vector output_nodes; diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc index 6c845e6fff..482ad2fb42 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc @@ -17,6 +17,7 @@ #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" #include #include +#include #include #include "base/core_ops.h" #include "frontend/parallel/ops_info/ops_utils.h" @@ -93,9 +94,13 @@ constexpr auto kJPattern = "pattern"; constexpr auto kJPyModulePath = "py_module_path"; constexpr auto kJAttrDesc = "attr_desc"; constexpr auto kJSocVersion = "socVersion"; +constexpr auto kAutoTilingMode = "autoTilingMode"; constexpr auto kSOC_VERSION = "SOC_VERSION"; constexpr auto kJIsDynamicShape = "is_dynamic_shape"; constexpr auto kJDynamicIndex = "dynamic_index"; +constexpr auto kJSocInfo = "SocInfo"; + +const auto kPyPath = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe"; bool IsNeedChangeDefaultFormat(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(cnode); @@ -114,11 +119,14 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptrimpl_path(); nlohmann::json op_info_json; op_info_json[kJIsDynamicShape] = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node->cast()); - op_info_json[kJName] = op_info_ptr->kernel_name(); + auto func_name = op_info_ptr->kernel_name(); + op_info_json["graph_id"] = AnfAlgo::GetGraphId(anf_node.get()); + op_info_json[kJName] = func_name; + op_info_json[kJModuleName] = std::string("impl.") + func_name; + op_info_json[kJPyModulePath] = kPyPath; // generate inputs json nlohmann::json inputs_json; if (!GenTbeInputsJson(anf_node, op_info_ptr, &inputs_json)) { @@ -148,11 +156,33 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptrget_param(MS_CTX_DEVICE_ID); + auto tune_mode = context_ptr->get_param(MS_CTX_TUNE_MODE); + op_info_json[kJFullName] = anf_node->fullname_with_scope(); json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); json_info_ = json_str; + op_info_json["Type"] = op_name; op_info_json[kJKernelName] = json_name_; + op_info_json[kGenModel] = kSingle; + op_info_json[kJFullName] = anf_node->fullname_with_scope(); + + // create attr_desc + nlohmann::json attr_desc; + for (const auto &attr : attrs_json) { + if (attr[kJName] != "isRef" && attr[kJValid] == true) { + attr_desc.push_back(attr[kJValue]); + } + } + if (!attr_desc.empty()) { + op_info_json[kJAttrDesc] = attr_desc; + } + + // generate soc info json + nlohmann::json soc_info_json; + TbeUtils::GenSocInfo(&soc_info_json); + soc_info_json[kAutoTilingMode] = tune_mode; + soc_info_json[kJSocVersion] = soc_version; + (*kernel_json)[kJSocInfo] = soc_info_json; (*kernel_json)[kJOpInfo] = op_info_json; - (*kernel_json)[kJFullName] = anf_node->fullname_with_scope(); MS_LOG(DEBUG) << "Operate type:" << creater_type_ << ", full scope name is :" << anf_node->fullname_with_scope() << ", json info name is : " << json_name_ << ", kernel json:" << kernel_json->dump(); @@ -452,14 +482,22 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr &anf_no ParseAttrValue(type, value, &attr_obj); attr_obj[kJValid] = true; } else { - if (op_info->impl_path().empty()) { - attr_obj[kJValid] = false; + auto default_value = attr_ptr->default_value(); + if (!default_value.empty()) { + std::string type = attr_ptr->type(); + ParseAttrDefaultValue(type, default_value, &attr_obj); + attr_obj[kJValid] = true; } else { - if (attr_ptr->param_type() == kParamRequred && creater_type_ == SINGLE_BUILD) { - MS_LOG(EXCEPTION) << "Op name: " << op_info->op_name() << " attr: " << attr_name - << " is required, but not set."; - } else { + MS_LOG(INFO) << "op " << op_name << "'s attr \"" << attr_name << "\" should have a default value."; + if (op_info->impl_path().empty()) { attr_obj[kJValid] = false; + } else { + if (attr_ptr->param_type() == kParamRequred && creater_type_ == SINGLE_BUILD) { + MS_LOG(EXCEPTION) << "Op name: " << op_info->op_name() << " attr: " << attr_name + << " is required, but not set."; + } else { + attr_obj[kJValid] = false; + } } } } @@ -567,6 +605,26 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo } } +void TbeKernelJsonCreator::ParseAttrDefaultValue(const std::string &type, const std::string &value, + nlohmann::json *attr_obj) { + MS_EXCEPTION_IF_NULL(attr_obj); + if (type == kVTypeInt) { + (*attr_obj)[kJValue] = std::stoi(value); + } else if (type == kVTypeInt64) { + (*attr_obj)[kJValue] = std::stoll(value); + } else if (type == kVTypeStr) { + (*attr_obj)[kJValue] = value; + } else if (type == kVTypeBool) { + bool attr_value; + std::istringstream(value) >> std::boolalpha >> attr_value; + (*attr_obj)[kJValue] = attr_value; + } else if (type == kVTypeFloat) { + (*attr_obj)[kJValue] = std::stof(value); + } else { + MS_LOG(EXCEPTION) << "Type: " << type << "not support"; + } +} + std::vector TbeKernelJsonCreator::GetDeviceInputShape(const AnfNodePtr &anf_node, size_t real_index) const { MS_EXCEPTION_IF_NULL(anf_node); std::vector shape; @@ -792,7 +850,7 @@ void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode (*compute_op_str)[kJModuleName] = std::string("impl.") + func_name; (*compute_op_str)[kJName] = cnode->fullname_with_scope(); (*compute_op_str)[kJPattern] = GetNodeFusionType(cnode); - (*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe"; + (*compute_op_str)[kJPyModulePath] = kPyPath; (void)(*fusion_kernel_name).append("_"); (void)(*fusion_kernel_name).append(func_name); // attr_desc @@ -899,12 +957,14 @@ void TbeKernelBuild::GenFusionOutputDescJson(const std::shared_ptr &anf_node, size_t index, - size_t output_index, nlohmann::json *output_desc) { + size_t output_index, nlohmann::json *output_desc, const size_t out_size) { std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); (*output_desc)[kJName] = output_desc_name; (*output_desc)[kJOutputIndex] = output_index; std::vector shape; (*output_desc)[kJShape] = shape; + auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, out_size - 1); + (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); } bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, @@ -1176,7 +1236,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode for (size_t j = output_size; j < desc_output_index.size(); ++j) { MS_LOG(INFO) << "Fusion index: " << j << ", desc_output_index: " << desc_output_index[j]; nlohmann::json output_desc; - GenReusedOutputDesc(cnode, j, desc_output_index[j], &output_desc); + GenReusedOutputDesc(cnode, j, desc_output_index[j], &output_desc, output_size); output_desc_list->emplace_back(output_desc); } } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h index f0bec0b61a..99b7504254 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h @@ -73,7 +73,7 @@ class TbeKernelBuild { nlohmann::json *output_data_desc); static void GenSuffixDescJson(nlohmann::json *output_desc); static void GenReusedOutputDesc(const std::shared_ptr &anf_node, size_t index, - size_t output_index, nlohmann::json *output_desc); + size_t output_index, nlohmann::json *output_desc, const size_t out_size); static size_t GetIOSizeImpl(const nlohmann::json &desc); static bool GetSpecInputLayers(const std::string &op_name, const std::vector &reorder_layer, std::map *spec_data_input); @@ -102,7 +102,9 @@ class TbeKernelJsonCreator { nlohmann::json *inputs_json); bool GenTbeOutputsJson(const std::shared_ptr &anf_node, const std::shared_ptr &op_info, nlohmann::json *outputs_json); + void GenSocInfo(nlohmann::json *soc_info_json); static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); + static void ParseAttrDefaultValue(const std::string &type, const std::string &value, nlohmann::json *attr_obj); bool GenInputDescJson(const std::shared_ptr &anf_node, size_t real_input_index, bool value, const std::shared_ptr &input_ptr, const string &op_input_name, size_t input_i, std::vector *input_list); diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc index 23a4c2019d..71e032118b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc @@ -37,6 +37,20 @@ bool TbeOpParallelBuild(const std::vector &anf_nodes) { auto build_manger = std::make_shared(); MS_EXCEPTION_IF_NULL(build_manger); set processed_kernel; + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + auto tune_mode = context_ptr->get_param(MS_CTX_TUNE_MODE); + std::string offline_tune = common::GetEnv("ENABLE_TUNE_DUMP"); + if (!offline_tune.empty()) { + for (size_t j = 0; j < offline_tune.length(); j++) { + offline_tune[j] = tolower(offline_tune[j]); + } + if (!(offline_tune == "true" || offline_tune == "false")) { + MS_LOG(ERROR) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'"; + return false; + } + } + for (const auto &anf_node : anf_nodes) { // gen kernel json if (AnfAlgo::GetKernelMod(anf_node) != nullptr) { @@ -56,7 +70,8 @@ bool TbeOpParallelBuild(const std::vector &anf_nodes) { (void)TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list, anf_node); // search cache const std::string &json_name = creator.json_name(); - if (build_manger->SearchInCache(json_name, processor, input_size_list, output_size_list, anf_node.get())) { + if (build_manger->SearchInCache(json_name, processor, input_size_list, output_size_list, anf_node.get()) && + ((!offline_tune.empty() && offline_tune != "true") || tune_mode == "NO_TUNE")) { continue; } // same op not need build, but need wait build finish to set kernel mode @@ -227,7 +242,8 @@ KernelModPtr ParallelBuildManager::GenKernelMod(const string &json_name, const s } int ParallelBuildManager::StartCompileOp(const nlohmann::json &kernel_json) { - return AscendKernelBuildClient::Instance().TbeStart(kernel_json.dump()); + auto tune_mode = kernel_json["SocInfo"]["autoTilingMode"]; + return AscendKernelBuildClient::Instance().TbeStart(kernel_json.dump(), tune_mode); } bool ParallelBuildManager::WaitOne(int *task_id, std::string *task_result, std::string *pre_build_result) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc index 527f92e7e2..7680ce1087 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -26,7 +28,9 @@ #include "runtime/kernel.h" #include "utils/utils.h" #include "utils/ms_utils.h" +#include "utils/ms_context.h" #include "ir/dtype/type.h" +#include "backend/session/anf_runtime_algorithm.h" #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" #include "securec/include/securec.h" @@ -40,6 +44,19 @@ constexpr auto kInfoSuffix = ".info"; uintptr_t KernelManager::kernel_stub_gen_ = 0; std::unordered_map KernelManager::info_table_ = {}; +void TbeUtils::GenSocInfo(nlohmann::json *soc_info_json) { + MS_EXCEPTION_IF_NULL(soc_info_json); + std::list list; + (*soc_info_json)["coreNum"] = ""; + (*soc_info_json)["coreType"] = ""; + (*soc_info_json)["l1Fusion"] = "false"; + (*soc_info_json)["l2Fusion"] = "false"; + (*soc_info_json)["l2Mode"] = "2"; + (*soc_info_json)["op_debug_level"] = ""; + (*soc_info_json)["op_impl_mode"] = ""; + (*soc_info_json)["op_impl_mode_list"] = list; +} + void TbeUtils::SaveJsonInfo(const std::string &json_name, const std::string &info) { char real_path[PATH_MAX] = {0}; std::string path = kCceKernelMeta + json_name + kInfoSuffix; diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h index 7d3f639b5e..e367e9d82e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "backend/session/kernel_graph.h" #include "ir/anf.h" @@ -43,6 +44,8 @@ class TbeUtils { static void LoadCache(); + static void GenSocInfo(nlohmann::json *soc_info_json); + static KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor); static KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor); diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h index ff86b1c1e7..9013d006db 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h +++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h @@ -43,6 +43,7 @@ const int8_t MULTI_ELTWISE_SIZE = 4; using FusedNodeRecord = std::vector>; struct BufferFusionInfo_t { + uint32_t graph_id; std::vector anf_nodes; std::vector inputs_list; std::vector outputs_list; diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc index 122d5a3109..1db09fa869 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc @@ -381,6 +381,7 @@ void RemoveCircle(const session::KernelGraph &kernel_graph, void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) const { MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto graph_id = kernel_graph->graph_id(); GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); @@ -390,6 +391,7 @@ void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, for (auto &buffer_fusion_info : *buffer_fusion_infos) { buffer_fusion_info.second.kernel_build_info = CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); + buffer_fusion_info.second.graph_id = graph_id; } } @@ -403,9 +405,9 @@ bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph std::transform( buffer_fusion_infos.begin(), buffer_fusion_infos.end(), std::back_inserter(fusion_scope_infos), [](const std::pair &buffer_fusion_info) -> mindspore::kernel::FusionScopeInfo { - return mindspore::kernel::FusionScopeInfo(buffer_fusion_info.first, buffer_fusion_info.second.inputs_list, - buffer_fusion_info.second.anf_nodes, - buffer_fusion_info.second.outputs_list); + return mindspore::kernel::FusionScopeInfo( + buffer_fusion_info.first, buffer_fusion_info.second.graph_id, buffer_fusion_info.second.inputs_list, + buffer_fusion_info.second.anf_nodes, buffer_fusion_info.second.outputs_list); }); auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); std::set fusion_ids; diff --git a/mindspore/ccsrc/backend/session/kernel_build_client.cc b/mindspore/ccsrc/backend/session/kernel_build_client.cc index 1010c71916..f633447128 100644 --- a/mindspore/ccsrc/backend/session/kernel_build_client.cc +++ b/mindspore/ccsrc/backend/session/kernel_build_client.cc @@ -28,18 +28,28 @@ void ReplaceStr(std::string *dest, const std::string &replace, char new_char) { } } -bool AscendKernelBuildClient::TbePre() { +bool AscendKernelBuildClient::TbePre(const std::string &mode) { auto res = SendRequest(kTbePre); if (res.find(kSuccess) == res.npos) { MS_LOG(EXCEPTION) << "PRE failed, res: " << res; } MS_LOG(INFO) << "Pre " << res; + // init env for auto tune + res = SendRequest(kTbeTune); + if (res != kAck) { + MS_LOG(EXCEPTION) << "Send tune single failed, res: " << res; + } + res = SendRequest(mode); + if (res != kSuccess) { + MS_LOG(EXCEPTION) << "PRE failed, res: " << res; + } + return true; } -int AscendKernelBuildClient::TbeStart(const std::string &json) { +int AscendKernelBuildClient::TbeStart(const std::string &json, const std::string &mode) { if (!init_flag) { - if (!TbePre()) { + if (!TbePre(mode)) { MS_LOG(EXCEPTION) << "START failed"; } init_flag = true; diff --git a/mindspore/ccsrc/backend/session/kernel_build_client.h b/mindspore/ccsrc/backend/session/kernel_build_client.h index e10932e6f4..f24c8676ea 100644 --- a/mindspore/ccsrc/backend/session/kernel_build_client.h +++ b/mindspore/ccsrc/backend/session/kernel_build_client.h @@ -200,6 +200,7 @@ class AscendKernelBuildClient : public KernelBuildClient { constexpr inline static auto kAkgStart = "AKG/START"; constexpr inline static auto kAkgData = "AKG/DATA"; constexpr inline static auto kAkgWait = "AKG/WAIT"; + constexpr inline static auto kTbeTune = "TBE/TUNE"; // Send server info. query to server constexpr inline static auto kFormat = "FORMAT"; @@ -222,7 +223,7 @@ class AscendKernelBuildClient : public KernelBuildClient { bool CheckSupported(const std::string &json); // Run TBE building. - int TbeStart(const std::string &json); + int TbeStart(const std::string &json, const std::string &mode); bool TbeWait(int *task_id, std::string *task_result, std::string *pre_build_result); void TbeReset(); @@ -239,7 +240,7 @@ class AscendKernelBuildClient : public KernelBuildClient { AscendKernelBuildClient &operator=(AscendKernelBuildClient &&) = delete; private: - bool TbePre(); + bool TbePre(const std::string &mode); AscendKernelBuildClient() { Open(); } ~AscendKernelBuildClient() override { Close(); } }; diff --git a/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc b/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc index 35f696e243..efd8f46767 100644 --- a/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc +++ b/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc @@ -94,6 +94,7 @@ REGISTER_PYBIND_DEFINE(MsContextPy, ([](const py::module *m) { .value("save_graphs_path", MsCtxParam::MS_CTX_SAVE_GRAPHS_PATH) .value("variable_memory_max_size", MsCtxParam::MS_CTX_VARIABLE_MEMORY_MAX_SIZE) .value("device_id", MsCtxParam::MS_CTX_DEVICE_ID) + .value("tune_mode", MsCtxParam::MS_CTX_TUNE_MODE) .value("max_call_depth", MsCtxParam::MS_CTX_MAX_CALL_DEPTH) .value("env_config_path", MsCtxParam::MS_CTX_ENV_CONFIG_PATH) .value("grad_for_scalar", MsCtxParam::MS_CTX_GRAD_FOR_SCALAR); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc index e96cadb4a9..7d3bc87372 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc @@ -262,6 +262,13 @@ nlohmann::json ConstructTransDataKernelJson(const std::vector &host_shap op_info[kernel_name_str] = ""; op_info[name] = trans_data; op_info[outputs_str] = ConstructOutputs(host_shape, type); + // construct soc_info + nlohmann::json soc_info; + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + auto tune_mode = ms_context->get_param(MS_CTX_TUNE_MODE); + soc_info["autoTilingMode"] = tune_mode; + kernel_json["SocInfo"] = soc_info; kernel_json[op_info_str] = op_info; kernel_json[platform_str] = platform_tbe; std::string json_str = kernel_json[op_info_str].dump(); diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index e24655fa07..9eb19b52e6 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -36,6 +36,9 @@ constexpr auto kComputeAccidentalHitsOpName = "ComputeAccidentalHits"; constexpr auto kCTCGreedyDecoderOpName = "CTCGreedyDecoder"; constexpr auto kFour2FiveOpName = "Four2Five"; constexpr auto kFive2FourOpName = "Five2Four"; +constexpr auto kConv3DOpName = "Conv3D"; +constexpr auto kConv3DBackpropFilterOpName = "Conv3DBackpropFilter"; +constexpr auto kConv3DBackpropInputOpName = "Conv3DBackpropInput"; constexpr auto kConv2DOpName = "Conv2D"; constexpr auto kConvBN1OpName = "ConvBN1"; constexpr auto kBN2AddReluOpName = "BN2AddRelu"; diff --git a/mindspore/context.py b/mindspore/context.py index e15a8f4cfc..3a4863eb79 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -204,6 +204,13 @@ class _Context: if self.enable_debug_runtime and target == "CPU": self.set_backend_policy("vm") + def set_auto_tune_mode(self, tune_mode): + candidate = ["NO_TUNE", "RL", "GA", "RL,GA", "GA,RL"] + if tune_mode in candidate: + self.set_param(ms_ctx_param.tune_mode, tune_mode) + else: + raise ValueError(f"Tune mode must be in ['NO_TUNE', 'RL', 'GA', 'RL,GA', 'GA,RL'], but got {tune_mode}") + def set_device_id(self, device_id): if device_id < 0 or device_id > 4095: raise ValueError(f"Device id must be in [0, 4095], but got {device_id}") @@ -276,6 +283,7 @@ class _Context: 'save_graphs_path': set_save_graphs_path, 'device_target': set_device_target, 'device_id': set_device_id, + 'auto_tune_mode': set_auto_tune_mode, 'max_call_depth': set_max_call_depth, 'profiling_options': set_profiling_options, 'variable_memory_max_size': set_variable_memory_max_size, @@ -480,6 +488,7 @@ def _check_target_specific_cfgs(device, arg_key): 'profiling_options': ['Ascend'], 'print_file_path': ['Ascend'], 'variable_memory_max_size': ['Ascend'], + 'auto_tune_mode': ['Ascend'], 'max_device_memory': ['GPU'] } # configs not in map device_cfgs are supposed to be suitable for all devices @@ -494,7 +503,7 @@ def _check_target_specific_cfgs(device, arg_key): @args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool, - save_graphs_path=str, enable_dump=bool, + save_graphs_path=str, enable_dump=bool, auto_tune_mode=str, save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, enable_graph_kernel=bool, check_bprop=bool, max_device_memory=str, print_file_path=str, @@ -531,7 +540,7 @@ def set_context(**kwargs): mode enable_profiling reserve_class_name_in_scope profiling_options save_graphs variable_memory_max_size - save_graphs_path + save_graphs_path auto_tune_mode env_config_path grad_for_scalar =========================== =========================== ================= @@ -603,6 +612,13 @@ def set_context(**kwargs): enable_sparse (bool): Whether to enable sparsity feature. Default: False. max_call_depth (int): Specify the maximum depth of function call. Default: 1000. env_config_path (str): Config path for DFX. + auto_tune_mode (str): The mode of auto tune when op building, get the best tiling performance, + default: NO_TUNE. The value must be in ['RL', 'GA', 'RL,GA']. + RL: rl_tune; + GA: ga_tune; + RL,GA: rl_tune/ga_tune(Automatic selection). + - rl_tune: Reinforecement Learning tune. + - ga_tune: Genetic Algorithm tune. grad_for_scalar (bool): Whether to get gradient for scalar. Default: False. Raises: diff --git a/mindspore/core/utils/ms_context.cc b/mindspore/core/utils/ms_context.cc index 54c6dba837..b916ea2b05 100644 --- a/mindspore/core/utils/ms_context.cc +++ b/mindspore/core/utils/ms_context.cc @@ -38,8 +38,10 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { set_param(MS_CTX_ENABLE_DUMP, false); set_param(MS_CTX_SAVE_DUMP_PATH, "."); set_param(MS_CTX_ENV_CONFIG_PATH, ""); + set_param(MS_CTX_TUNE_MODE, "NO_TUNE"); set_param(MS_CTX_TSD_REF, 0); set_param(MS_CTX_GE_REF, 0); + set_param(MS_CTX_IS_MULTI_GRAPH_SINK, false); set_param(MS_CTX_IS_PYNATIVE_GE_INIT, false); set_param(MS_CTX_ENABLE_REDUCE_PRECISION, true); diff --git a/mindspore/core/utils/ms_context.h b/mindspore/core/utils/ms_context.h index 11c81992fb..30ac0f88d8 100644 --- a/mindspore/core/utils/ms_context.h +++ b/mindspore/core/utils/ms_context.h @@ -108,6 +108,7 @@ enum MsCtxParam : unsigned { MS_CTX_VARIABLE_MEMORY_MAX_SIZE, MS_CTX_PYTHON_EXE_PATH, MS_CTX_ENV_CONFIG_PATH, + MS_CTX_TUNE_MODE, MS_CTX_TYPE_STRING_END, // parameter numbers of each type diff --git a/mindspore/ops/_op_impl/tbe/conv2d.py b/mindspore/ops/_op_impl/tbe/conv2d.py index f262eb9b9d..99a576ce75 100644 --- a/mindspore/ops/_op_impl/tbe/conv2d.py +++ b/mindspore/ops/_op_impl/tbe/conv2d.py @@ -29,6 +29,7 @@ conv2d_op_info = TBERegOp("Conv2D") \ .attr("dilation", "required", "listInt", "all") \ .attr("groups", "optional", "int", "all") \ .attr("format", "optional", "str", "all") \ + .attr("offset_x", "optional", "int", "all", "0") \ .input(0, "x", False, "required", "all") \ .input(1, "filter", False, "required", "all") \ .input(2, "bias", False, "optional", "all") \ diff --git a/mindspore/ops/_op_impl/tbe/conv3d.py b/mindspore/ops/_op_impl/tbe/conv3d.py index d3b4ced407..70bac6a1a1 100644 --- a/mindspore/ops/_op_impl/tbe/conv3d.py +++ b/mindspore/ops/_op_impl/tbe/conv3d.py @@ -28,7 +28,7 @@ conv3d_op_info = TBERegOp("Conv3D") \ .attr("dilations", "required", "listInt", "all") \ .attr("groups", "optional", "int", "all") \ .attr("format", "optional", "str", "all") \ - .attr("offset_x", "optional", "int", "all") \ + .attr("offset_x", "optional", "int", "all", "0") \ .input(0, "x", False, "required", "all") \ .input(1, "filter", False, "required", "all") \ .input(2, "bias", False, "optional", "all") \ diff --git a/mindspore/ops/_op_impl/tbe/depthwise_conv2d.py b/mindspore/ops/_op_impl/tbe/depthwise_conv2d.py index d7ddce72ec..d7d1249eef 100644 --- a/mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +++ b/mindspore/ops/_op_impl/tbe/depthwise_conv2d.py @@ -27,7 +27,7 @@ depthwise_conv2d_op_info = TBERegOp("DepthwiseConv2dNative") \ .attr("dilation", "required", "listInt", "all") \ .attr("pad_list", "required", "listInt", "all") \ .attr("format", "required", "str", "all") \ - .attr("offset_a", "optional", "int", "all") \ + .attr("offset_a", "optional", "int", "all", "0") \ .input(0, "x", False, "required", "all") \ .input(1, "filter", False, "required", "all") \ .input(2, "bias", False, "optional", "all") \