| @@ -0,0 +1,164 @@ | |||||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """re construct json""" | |||||
| import json | |||||
| def common_op_info(json_file): | |||||
| """ | |||||
| Create more detail info | |||||
| :param json_file: origin json file | |||||
| :return: origin json file | |||||
| """ | |||||
| json_file["L1_addr_offset"] = 0 | |||||
| json_file["L1_fusion_type"] = -1 | |||||
| json_file["L1_workspace_size"] = -1 | |||||
| json_file["addr_type"] = 0 | |||||
| json_file["slice_offset"] = [] | |||||
| json_file["split_index"] = 0 | |||||
| json_file["total_shape"] = [] | |||||
| json_file["valid_shape"] = [] | |||||
| return json_file | |||||
| def create_input(json_info): | |||||
| """ | |||||
| Create input, type is "Data" | |||||
| :param json_info: json file | |||||
| :return: ops list | |||||
| """ | |||||
| ops = [] | |||||
| if "inputs" in json_info and json_info["inputs"] is not None: | |||||
| ori_inputs = json_info["inputs"] | |||||
| for _, item in enumerate(ori_inputs): | |||||
| op_info = { | |||||
| "name": item[0]["name"], | |||||
| "output_desc": [common_op_info(item[0])], | |||||
| "type": "Data" | |||||
| } | |||||
| ops.append(op_info) | |||||
| return ops | |||||
| def create_inout_desc(ori_json): | |||||
| """ | |||||
| Create input or output, insert "data_type" attr and other detail infos | |||||
| :param ori_json: input or output list, the item in list is a dict | |||||
| :return: list | |||||
| """ | |||||
| if ori_json is None: | |||||
| return "null" | |||||
| out_list = [] | |||||
| for _, item in enumerate(ori_json): | |||||
| item[0]["data_type"] = item[0]["dtype"] if "dtype" in item[0] else 0 | |||||
| if "ori_format" in item[0] or "ori_shape"in item[0]: | |||||
| item[0]["L1_addr_offset"] = 0 | |||||
| item[0]["L1_fusion_type"] = -1 | |||||
| item[0]["L1_workspace_size"] = -1 | |||||
| item[0]["addr_type"] = 0 | |||||
| item[0]["slice_offset"] = [] | |||||
| item[0]["split_index"] = 0 | |||||
| item[0]["total_shape"] = [] | |||||
| item[0]["valid_shape"] = [] | |||||
| else: | |||||
| item[0]["shape"] = "NULL" | |||||
| out_list.append(item[0]) | |||||
| return out_list | |||||
| def create_pre_build_attr(ori_json): | |||||
| """ | |||||
| Create prebuild_outs_attrs | |||||
| :param ori_json: origin json file | |||||
| :return: dict | |||||
| """ | |||||
| args = [create_inout_desc(ori_json["outputs"])[0]] | |||||
| if "attrs" in ori_json and ori_json["attrs"] is not None: | |||||
| ori_attrs = ori_json["attrs"] | |||||
| for item in ori_attrs: | |||||
| if "value" in item: | |||||
| args.append(item["value"]) | |||||
| pre_build_attr = {"kwds_args": {}, | |||||
| "list_args": args | |||||
| } | |||||
| return pre_build_attr | |||||
| def create_compute_op(ori_json): | |||||
| """ | |||||
| Create compute op's in and out desc | |||||
| :param ori_json: origin json file | |||||
| :return: dict | |||||
| """ | |||||
| func_name = ori_json["name"] | |||||
| op_type = ori_json["Type"] | |||||
| full_name = ori_json["full_name"] | |||||
| pattern = ori_json["pattern"] if "pattern" in ori_json else "" | |||||
| op_common_info = { | |||||
| "func_name": func_name, | |||||
| "input_desc": create_inout_desc(ori_json["inputs"]) if "inputs" in ori_json else "null", | |||||
| "module_name": ori_json["module_name"], | |||||
| "name": full_name, | |||||
| "output_desc": create_inout_desc(ori_json["outputs"]) if "outputs" in ori_json else "null", | |||||
| "output_data_desc": create_inout_desc(ori_json["outputs"]) if "outputs" in ori_json else "null", | |||||
| "pattern": pattern, | |||||
| "attr_desc": ori_json["attr_desc"] if "attr_desc" in ori_json else "null", | |||||
| "py_module_path": ori_json["py_module_path"], | |||||
| "type": op_type | |||||
| } | |||||
| return op_common_info | |||||
| def single_to_fusion(json_file, tune_mode): | |||||
| """ | |||||
| Change single op json to fusion op json for auto tune | |||||
| :param json_file: origin json file | |||||
| :param tune_mode: tune mode | |||||
| :return: a fusion op json, which contain one op | |||||
| """ | |||||
| ori_file = json.loads(json_file) | |||||
| json_info = ori_file["op_info"] | |||||
| soc_info = ori_file["SocInfo"] | |||||
| soc_info["autoTilingMode"] = tune_mode | |||||
| kernel_name = json_info["kernel_name"] | |||||
| ops = create_input(json_info) | |||||
| ops2 = create_compute_op(json_info) | |||||
| ops.append(ops2) | |||||
| end_file = { | |||||
| "SocInfo": soc_info, | |||||
| "fusion_op_name": kernel_name, | |||||
| "l1_size": -1, | |||||
| "op_list": ops | |||||
| } | |||||
| # op_info = {"fusion_op": end_file} | |||||
| res = json.dumps(end_file, ensure_ascii=False) | |||||
| return res | |||||
| def fusion_to_fusion(json_str, tune_mode): | |||||
| """ | |||||
| Add l1_size for fusion json | |||||
| :param json_str: origin json file | |||||
| :param tune_mode: tune mode | |||||
| :return: fusion json info | |||||
| """ | |||||
| json_info = json.loads(json_str) | |||||
| json_info["fusion_op"]["l1_size"] = -1 | |||||
| json_info["SocInfo"]["autoTilingMode"] = tune_mode | |||||
| end_file = json_info["fusion_op"] | |||||
| end_file["SocInfo"] = json_info["SocInfo"] | |||||
| res = json.dumps(end_file, ensure_ascii=False) | |||||
| return res | |||||
| @@ -117,6 +117,9 @@ class TbeProcess: | |||||
| self.__pool.join() | self.__pool.join() | ||||
| del self.__pool | del self.__pool | ||||
| def init_auto_tune_env(self, mode): | |||||
| return "Success" | |||||
| def init_process_num(self): | def init_process_num(self): | ||||
| """ | """ | ||||
| init compile process num | init compile process num | ||||
| @@ -24,6 +24,9 @@ class TbeBuilder: | |||||
| def __init__(self): | def __init__(self): | ||||
| self.tbe_builder = create_tbe_parallel_process() | self.tbe_builder = create_tbe_parallel_process() | ||||
| def init_auto_tune_env(self, mode): | |||||
| return self.tbe_builder.init_auto_tune_env(mode) | |||||
| def create(self): | def create(self): | ||||
| return self.tbe_builder.init_process_num() | return self.tbe_builder.init_process_num() | ||||
| @@ -75,6 +78,11 @@ class AscendMessager(Messager): | |||||
| if arg == 'TBE/PRE': | if arg == 'TBE/PRE': | ||||
| ans = self.tbe_builder.create() | ans = self.tbe_builder.create() | ||||
| self.send_res(ans) | self.send_res(ans) | ||||
| elif arg == "TBE/TUNE": | |||||
| self.send_ack() | |||||
| tune_mode = self.get_message() | |||||
| ans = self.tbe_builder.init_auto_tune_env(tune_mode) | |||||
| self.send_res(ans) | |||||
| elif arg == 'TBE/START': | elif arg == 'TBE/START': | ||||
| self.send_ack() | self.send_ack() | ||||
| json = self.get_message() | json = self.get_message() | ||||
| @@ -55,6 +55,20 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| std::map<int64_t, KernelModPtr> kernel_mod_ret; | std::map<int64_t, KernelModPtr> kernel_mod_ret; | ||||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | auto build_manger = std::make_shared<ParallelBuildManager>(); | ||||
| MS_EXCEPTION_IF_NULL(build_manger); | MS_EXCEPTION_IF_NULL(build_manger); | ||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||||
| auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | |||||
| std::string offline_tune = common::GetEnv("ENABLE_TUNE_DUMP"); | |||||
| if (!offline_tune.empty()) { | |||||
| for (size_t j = 0; j < offline_tune.length(); j++) { | |||||
| offline_tune[j] = tolower(offline_tune[j]); | |||||
| } | |||||
| if (!(offline_tune == "true" || offline_tune == "false")) { | |||||
| MS_LOG(EXCEPTION) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'"; | |||||
| } | |||||
| } | |||||
| for (const auto &fusion_scope_iter : fusion_scopes) { | for (const auto &fusion_scope_iter : fusion_scopes) { | ||||
| string fusion_kernel_name; | string fusion_kernel_name; | ||||
| nlohmann::json fusion_op; | nlohmann::json fusion_op; | ||||
| @@ -64,11 +78,9 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| } | } | ||||
| // gen kernel_name & check cache | // gen kernel_name & check cache | ||||
| size_t hash_id = GenFusionJsonHash(fusion_op); | size_t hash_id = GenFusionJsonHash(fusion_op); | ||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||||
| auto json_name = | auto json_name = | ||||
| fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id)); | fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id)); | ||||
| fusion_op["graph_id"] = fusion_scope_iter.graph_id; | |||||
| fusion_op["fusion_op_name"] = json_name; | fusion_op["fusion_op_name"] = json_name; | ||||
| // get io size | // get io size | ||||
| std::vector<size_t> input_size_list; | std::vector<size_t> input_size_list; | ||||
| @@ -79,7 +91,7 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| } | } | ||||
| // search cache | // search cache | ||||
| auto kernel_pack = TbeUtils::SearchCache(json_name, tbe::kProcessorAiCore); | auto kernel_pack = TbeUtils::SearchCache(json_name, tbe::kProcessorAiCore); | ||||
| if (kernel_pack != nullptr) { | |||||
| if (kernel_pack != nullptr && ((!offline_tune.empty() && offline_tune != "true") || tune_mode == "NO_TUNE")) { | |||||
| auto kernel_mod = | auto kernel_mod = | ||||
| build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack); | build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack); | ||||
| if (kernel_mod != nullptr) { | if (kernel_mod != nullptr) { | ||||
| @@ -87,9 +99,16 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| continue; | continue; | ||||
| } | } | ||||
| } | } | ||||
| // generate soc info json | |||||
| nlohmann::json soc_info_json; | |||||
| TbeUtils::GenSocInfo(&soc_info_json); | |||||
| soc_info_json["autoTilingMode"] = tune_mode; | |||||
| auto soc_version = TbeKernelJsonCreator::GetSocVersion(); | |||||
| soc_info_json["socVersion"] = soc_version; | |||||
| // fusion build | // fusion build | ||||
| nlohmann::json fusion_json; | nlohmann::json fusion_json; | ||||
| fusion_json["fusion_op"] = fusion_op; | fusion_json["fusion_op"] = fusion_op; | ||||
| fusion_json["SocInfo"] = soc_info_json; | |||||
| auto task_id = build_manger->StartCompileOp(fusion_json); | auto task_id = build_manger->StartCompileOp(fusion_json); | ||||
| TbeUtils::SaveJsonInfo(json_name, fusion_json.dump()); | TbeUtils::SaveJsonInfo(json_name, fusion_json.dump()); | ||||
| if (task_id < 0) { | if (task_id < 0) { | ||||
| @@ -26,9 +26,15 @@ namespace kernel { | |||||
| * @brief fuse op and return a callable mod | * @brief fuse op and return a callable mod | ||||
| */ | */ | ||||
| struct FusionScopeInfo { | struct FusionScopeInfo { | ||||
| FusionScopeInfo(int64_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out) | |||||
| : scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {} | |||||
| FusionScopeInfo(int64_t id, uint32_t g_id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, | |||||
| std::vector<AnfNodePtr> out) | |||||
| : scope_id(id), | |||||
| graph_id(g_id), | |||||
| input_nodes(std::move(in)), | |||||
| compute_nodes(std::move(comp)), | |||||
| output_nodes(std::move(out)) {} | |||||
| int64_t scope_id{}; | int64_t scope_id{}; | ||||
| uint32_t graph_id{}; | |||||
| std::vector<AnfNodePtr> input_nodes; | std::vector<AnfNodePtr> input_nodes; | ||||
| std::vector<AnfNodePtr> compute_nodes; | std::vector<AnfNodePtr> compute_nodes; | ||||
| std::vector<AnfNodePtr> output_nodes; | std::vector<AnfNodePtr> output_nodes; | ||||
| @@ -17,6 +17,7 @@ | |||||
| #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" | #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" | ||||
| #include <memory> | #include <memory> | ||||
| #include <map> | #include <map> | ||||
| #include <list> | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "base/core_ops.h" | #include "base/core_ops.h" | ||||
| #include "frontend/parallel/ops_info/ops_utils.h" | #include "frontend/parallel/ops_info/ops_utils.h" | ||||
| @@ -93,9 +94,13 @@ constexpr auto kJPattern = "pattern"; | |||||
| constexpr auto kJPyModulePath = "py_module_path"; | constexpr auto kJPyModulePath = "py_module_path"; | ||||
| constexpr auto kJAttrDesc = "attr_desc"; | constexpr auto kJAttrDesc = "attr_desc"; | ||||
| constexpr auto kJSocVersion = "socVersion"; | constexpr auto kJSocVersion = "socVersion"; | ||||
| constexpr auto kAutoTilingMode = "autoTilingMode"; | |||||
| constexpr auto kSOC_VERSION = "SOC_VERSION"; | constexpr auto kSOC_VERSION = "SOC_VERSION"; | ||||
| constexpr auto kJIsDynamicShape = "is_dynamic_shape"; | constexpr auto kJIsDynamicShape = "is_dynamic_shape"; | ||||
| constexpr auto kJDynamicIndex = "dynamic_index"; | constexpr auto kJDynamicIndex = "dynamic_index"; | ||||
| constexpr auto kJSocInfo = "SocInfo"; | |||||
| const auto kPyPath = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe"; | |||||
| bool IsNeedChangeDefaultFormat(const CNodePtr &cnode) { | bool IsNeedChangeDefaultFormat(const CNodePtr &cnode) { | ||||
| MS_EXCEPTION_IF_NULL(cnode); | MS_EXCEPTION_IF_NULL(cnode); | ||||
| @@ -114,11 +119,14 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||||
| auto op_info_ptr = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name, anf_node); | auto op_info_ptr = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name, anf_node); | ||||
| MS_EXCEPTION_IF_NULL(op_info_ptr); | MS_EXCEPTION_IF_NULL(op_info_ptr); | ||||
| (*kernel_json)[kPlatform] = kPlatTBE; | (*kernel_json)[kPlatform] = kPlatTBE; | ||||
| (*kernel_json)[kGenModel] = kSingle; | |||||
| (*kernel_json)[kImplPath] = op_info_ptr->impl_path(); | (*kernel_json)[kImplPath] = op_info_ptr->impl_path(); | ||||
| nlohmann::json op_info_json; | nlohmann::json op_info_json; | ||||
| op_info_json[kJIsDynamicShape] = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node->cast<CNodePtr>()); | op_info_json[kJIsDynamicShape] = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node->cast<CNodePtr>()); | ||||
| op_info_json[kJName] = op_info_ptr->kernel_name(); | |||||
| auto func_name = op_info_ptr->kernel_name(); | |||||
| op_info_json["graph_id"] = AnfAlgo::GetGraphId(anf_node.get()); | |||||
| op_info_json[kJName] = func_name; | |||||
| op_info_json[kJModuleName] = std::string("impl.") + func_name; | |||||
| op_info_json[kJPyModulePath] = kPyPath; | |||||
| // generate inputs json | // generate inputs json | ||||
| nlohmann::json inputs_json; | nlohmann::json inputs_json; | ||||
| if (!GenTbeInputsJson(anf_node, op_info_ptr, &inputs_json)) { | if (!GenTbeInputsJson(anf_node, op_info_ptr, &inputs_json)) { | ||||
| @@ -148,11 +156,33 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | ||||
| auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | |||||
| op_info_json[kJFullName] = anf_node->fullname_with_scope(); | |||||
| json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); | json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); | ||||
| json_info_ = json_str; | json_info_ = json_str; | ||||
| op_info_json["Type"] = op_name; | |||||
| op_info_json[kJKernelName] = json_name_; | op_info_json[kJKernelName] = json_name_; | ||||
| op_info_json[kGenModel] = kSingle; | |||||
| op_info_json[kJFullName] = anf_node->fullname_with_scope(); | |||||
| // create attr_desc | |||||
| nlohmann::json attr_desc; | |||||
| for (const auto &attr : attrs_json) { | |||||
| if (attr[kJName] != "isRef" && attr[kJValid] == true) { | |||||
| attr_desc.push_back(attr[kJValue]); | |||||
| } | |||||
| } | |||||
| if (!attr_desc.empty()) { | |||||
| op_info_json[kJAttrDesc] = attr_desc; | |||||
| } | |||||
| // generate soc info json | |||||
| nlohmann::json soc_info_json; | |||||
| TbeUtils::GenSocInfo(&soc_info_json); | |||||
| soc_info_json[kAutoTilingMode] = tune_mode; | |||||
| soc_info_json[kJSocVersion] = soc_version; | |||||
| (*kernel_json)[kJSocInfo] = soc_info_json; | |||||
| (*kernel_json)[kJOpInfo] = op_info_json; | (*kernel_json)[kJOpInfo] = op_info_json; | ||||
| (*kernel_json)[kJFullName] = anf_node->fullname_with_scope(); | |||||
| MS_LOG(DEBUG) << "Operate type:" << creater_type_ << ", full scope name is :" << anf_node->fullname_with_scope() | MS_LOG(DEBUG) << "Operate type:" << creater_type_ << ", full scope name is :" << anf_node->fullname_with_scope() | ||||
| << ", json info name is : " << json_name_ << ", kernel json:" << kernel_json->dump(); | << ", json info name is : " << json_name_ << ", kernel json:" << kernel_json->dump(); | ||||
| @@ -452,14 +482,22 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no | |||||
| ParseAttrValue(type, value, &attr_obj); | ParseAttrValue(type, value, &attr_obj); | ||||
| attr_obj[kJValid] = true; | attr_obj[kJValid] = true; | ||||
| } else { | } else { | ||||
| if (op_info->impl_path().empty()) { | |||||
| attr_obj[kJValid] = false; | |||||
| auto default_value = attr_ptr->default_value(); | |||||
| if (!default_value.empty()) { | |||||
| std::string type = attr_ptr->type(); | |||||
| ParseAttrDefaultValue(type, default_value, &attr_obj); | |||||
| attr_obj[kJValid] = true; | |||||
| } else { | } else { | ||||
| if (attr_ptr->param_type() == kParamRequred && creater_type_ == SINGLE_BUILD) { | |||||
| MS_LOG(EXCEPTION) << "Op name: " << op_info->op_name() << " attr: " << attr_name | |||||
| << " is required, but not set."; | |||||
| } else { | |||||
| MS_LOG(INFO) << "op " << op_name << "'s attr \"" << attr_name << "\" should have a default value."; | |||||
| if (op_info->impl_path().empty()) { | |||||
| attr_obj[kJValid] = false; | attr_obj[kJValid] = false; | ||||
| } else { | |||||
| if (attr_ptr->param_type() == kParamRequred && creater_type_ == SINGLE_BUILD) { | |||||
| MS_LOG(EXCEPTION) << "Op name: " << op_info->op_name() << " attr: " << attr_name | |||||
| << " is required, but not set."; | |||||
| } else { | |||||
| attr_obj[kJValid] = false; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -567,6 +605,26 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo | |||||
| } | } | ||||
| } | } | ||||
| void TbeKernelJsonCreator::ParseAttrDefaultValue(const std::string &type, const std::string &value, | |||||
| nlohmann::json *attr_obj) { | |||||
| MS_EXCEPTION_IF_NULL(attr_obj); | |||||
| if (type == kVTypeInt) { | |||||
| (*attr_obj)[kJValue] = std::stoi(value); | |||||
| } else if (type == kVTypeInt64) { | |||||
| (*attr_obj)[kJValue] = std::stoll(value); | |||||
| } else if (type == kVTypeStr) { | |||||
| (*attr_obj)[kJValue] = value; | |||||
| } else if (type == kVTypeBool) { | |||||
| bool attr_value; | |||||
| std::istringstream(value) >> std::boolalpha >> attr_value; | |||||
| (*attr_obj)[kJValue] = attr_value; | |||||
| } else if (type == kVTypeFloat) { | |||||
| (*attr_obj)[kJValue] = std::stof(value); | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "Type: " << type << "not support"; | |||||
| } | |||||
| } | |||||
| std::vector<size_t> TbeKernelJsonCreator::GetDeviceInputShape(const AnfNodePtr &anf_node, size_t real_index) const { | std::vector<size_t> TbeKernelJsonCreator::GetDeviceInputShape(const AnfNodePtr &anf_node, size_t real_index) const { | ||||
| MS_EXCEPTION_IF_NULL(anf_node); | MS_EXCEPTION_IF_NULL(anf_node); | ||||
| std::vector<size_t> shape; | std::vector<size_t> shape; | ||||
| @@ -792,7 +850,7 @@ void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode | |||||
| (*compute_op_str)[kJModuleName] = std::string("impl.") + func_name; | (*compute_op_str)[kJModuleName] = std::string("impl.") + func_name; | ||||
| (*compute_op_str)[kJName] = cnode->fullname_with_scope(); | (*compute_op_str)[kJName] = cnode->fullname_with_scope(); | ||||
| (*compute_op_str)[kJPattern] = GetNodeFusionType(cnode); | (*compute_op_str)[kJPattern] = GetNodeFusionType(cnode); | ||||
| (*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe"; | |||||
| (*compute_op_str)[kJPyModulePath] = kPyPath; | |||||
| (void)(*fusion_kernel_name).append("_"); | (void)(*fusion_kernel_name).append("_"); | ||||
| (void)(*fusion_kernel_name).append(func_name); | (void)(*fusion_kernel_name).append(func_name); | ||||
| // attr_desc | // attr_desc | ||||
| @@ -899,12 +957,14 @@ void TbeKernelBuild::GenFusionOutputDescJson(const std::shared_ptr<mindspore::An | |||||
| } | } | ||||
| void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | ||||
| size_t output_index, nlohmann::json *output_desc) { | |||||
| size_t output_index, nlohmann::json *output_desc, const size_t out_size) { | |||||
| std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); | std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); | ||||
| (*output_desc)[kJName] = output_desc_name; | (*output_desc)[kJName] = output_desc_name; | ||||
| (*output_desc)[kJOutputIndex] = output_index; | (*output_desc)[kJOutputIndex] = output_index; | ||||
| std::vector<size_t> shape; | std::vector<size_t> shape; | ||||
| (*output_desc)[kJShape] = shape; | (*output_desc)[kJShape] = shape; | ||||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, out_size - 1); | |||||
| (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); | |||||
| } | } | ||||
| bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, | bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, | ||||
| @@ -1176,7 +1236,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode | |||||
| for (size_t j = output_size; j < desc_output_index.size(); ++j) { | for (size_t j = output_size; j < desc_output_index.size(); ++j) { | ||||
| MS_LOG(INFO) << "Fusion index: " << j << ", desc_output_index: " << desc_output_index[j]; | MS_LOG(INFO) << "Fusion index: " << j << ", desc_output_index: " << desc_output_index[j]; | ||||
| nlohmann::json output_desc; | nlohmann::json output_desc; | ||||
| GenReusedOutputDesc(cnode, j, desc_output_index[j], &output_desc); | |||||
| GenReusedOutputDesc(cnode, j, desc_output_index[j], &output_desc, output_size); | |||||
| output_desc_list->emplace_back(output_desc); | output_desc_list->emplace_back(output_desc); | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -73,7 +73,7 @@ class TbeKernelBuild { | |||||
| nlohmann::json *output_data_desc); | nlohmann::json *output_data_desc); | ||||
| static void GenSuffixDescJson(nlohmann::json *output_desc); | static void GenSuffixDescJson(nlohmann::json *output_desc); | ||||
| static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | ||||
| size_t output_index, nlohmann::json *output_desc); | |||||
| size_t output_index, nlohmann::json *output_desc, const size_t out_size); | |||||
| static size_t GetIOSizeImpl(const nlohmann::json &desc); | static size_t GetIOSizeImpl(const nlohmann::json &desc); | ||||
| static bool GetSpecInputLayers(const std::string &op_name, const std::vector<mindspore::AnfNodePtr> &reorder_layer, | static bool GetSpecInputLayers(const std::string &op_name, const std::vector<mindspore::AnfNodePtr> &reorder_layer, | ||||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input); | std::map<const AnfNodePtr, FusionDataType> *spec_data_input); | ||||
| @@ -102,7 +102,9 @@ class TbeKernelJsonCreator { | |||||
| nlohmann::json *inputs_json); | nlohmann::json *inputs_json); | ||||
| bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | ||||
| nlohmann::json *outputs_json); | nlohmann::json *outputs_json); | ||||
| void GenSocInfo(nlohmann::json *soc_info_json); | |||||
| static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); | static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); | ||||
| static void ParseAttrDefaultValue(const std::string &type, const std::string &value, nlohmann::json *attr_obj); | |||||
| bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value, | bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value, | ||||
| const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i, | const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i, | ||||
| std::vector<nlohmann::json> *input_list); | std::vector<nlohmann::json> *input_list); | ||||
| @@ -37,6 +37,20 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | auto build_manger = std::make_shared<ParallelBuildManager>(); | ||||
| MS_EXCEPTION_IF_NULL(build_manger); | MS_EXCEPTION_IF_NULL(build_manger); | ||||
| set<std::string> processed_kernel; | set<std::string> processed_kernel; | ||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE); | |||||
| std::string offline_tune = common::GetEnv("ENABLE_TUNE_DUMP"); | |||||
| if (!offline_tune.empty()) { | |||||
| for (size_t j = 0; j < offline_tune.length(); j++) { | |||||
| offline_tune[j] = tolower(offline_tune[j]); | |||||
| } | |||||
| if (!(offline_tune == "true" || offline_tune == "false")) { | |||||
| MS_LOG(ERROR) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'"; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| for (const auto &anf_node : anf_nodes) { | for (const auto &anf_node : anf_nodes) { | ||||
| // gen kernel json | // gen kernel json | ||||
| if (AnfAlgo::GetKernelMod(anf_node) != nullptr) { | if (AnfAlgo::GetKernelMod(anf_node) != nullptr) { | ||||
| @@ -56,7 +70,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||||
| (void)TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list, anf_node); | (void)TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list, anf_node); | ||||
| // search cache | // search cache | ||||
| const std::string &json_name = creator.json_name(); | const std::string &json_name = creator.json_name(); | ||||
| if (build_manger->SearchInCache(json_name, processor, input_size_list, output_size_list, anf_node.get())) { | |||||
| if (build_manger->SearchInCache(json_name, processor, input_size_list, output_size_list, anf_node.get()) && | |||||
| ((!offline_tune.empty() && offline_tune != "true") || tune_mode == "NO_TUNE")) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| // same op not need build, but need wait build finish to set kernel mode | // same op not need build, but need wait build finish to set kernel mode | ||||
| @@ -227,7 +242,8 @@ KernelModPtr ParallelBuildManager::GenKernelMod(const string &json_name, const s | |||||
| } | } | ||||
| int ParallelBuildManager::StartCompileOp(const nlohmann::json &kernel_json) { | int ParallelBuildManager::StartCompileOp(const nlohmann::json &kernel_json) { | ||||
| return AscendKernelBuildClient::Instance().TbeStart(kernel_json.dump()); | |||||
| auto tune_mode = kernel_json["SocInfo"]["autoTilingMode"]; | |||||
| return AscendKernelBuildClient::Instance().TbeStart(kernel_json.dump(), tune_mode); | |||||
| } | } | ||||
| bool ParallelBuildManager::WaitOne(int *task_id, std::string *task_result, std::string *pre_build_result) { | bool ParallelBuildManager::WaitOne(int *task_id, std::string *task_result, std::string *pre_build_result) { | ||||
| @@ -19,6 +19,8 @@ | |||||
| #include <dirent.h> | #include <dirent.h> | ||||
| #include <string> | #include <string> | ||||
| #include <map> | #include <map> | ||||
| #include <set> | |||||
| #include <list> | |||||
| #include <functional> | #include <functional> | ||||
| #include <iostream> | #include <iostream> | ||||
| #include <fstream> | #include <fstream> | ||||
| @@ -26,7 +28,9 @@ | |||||
| #include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
| #include "utils/utils.h" | #include "utils/utils.h" | ||||
| #include "utils/ms_utils.h" | #include "utils/ms_utils.h" | ||||
| #include "utils/ms_context.h" | |||||
| #include "ir/dtype/type.h" | #include "ir/dtype/type.h" | ||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" | #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" | ||||
| #include "securec/include/securec.h" | #include "securec/include/securec.h" | ||||
| @@ -40,6 +44,19 @@ constexpr auto kInfoSuffix = ".info"; | |||||
| uintptr_t KernelManager::kernel_stub_gen_ = 0; | uintptr_t KernelManager::kernel_stub_gen_ = 0; | ||||
| std::unordered_map<string, KernelMetaPtr> KernelManager::info_table_ = {}; | std::unordered_map<string, KernelMetaPtr> KernelManager::info_table_ = {}; | ||||
| void TbeUtils::GenSocInfo(nlohmann::json *soc_info_json) { | |||||
| MS_EXCEPTION_IF_NULL(soc_info_json); | |||||
| std::list<int64_t> list; | |||||
| (*soc_info_json)["coreNum"] = ""; | |||||
| (*soc_info_json)["coreType"] = ""; | |||||
| (*soc_info_json)["l1Fusion"] = "false"; | |||||
| (*soc_info_json)["l2Fusion"] = "false"; | |||||
| (*soc_info_json)["l2Mode"] = "2"; | |||||
| (*soc_info_json)["op_debug_level"] = ""; | |||||
| (*soc_info_json)["op_impl_mode"] = ""; | |||||
| (*soc_info_json)["op_impl_mode_list"] = list; | |||||
| } | |||||
| void TbeUtils::SaveJsonInfo(const std::string &json_name, const std::string &info) { | void TbeUtils::SaveJsonInfo(const std::string &json_name, const std::string &info) { | ||||
| char real_path[PATH_MAX] = {0}; | char real_path[PATH_MAX] = {0}; | ||||
| std::string path = kCceKernelMeta + json_name + kInfoSuffix; | std::string path = kCceKernelMeta + json_name + kInfoSuffix; | ||||
| @@ -22,6 +22,7 @@ | |||||
| #include <utility> | #include <utility> | ||||
| #include <map> | #include <map> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <nlohmann/json.hpp> | |||||
| #include "backend/session/kernel_graph.h" | #include "backend/session/kernel_graph.h" | ||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| @@ -43,6 +44,8 @@ class TbeUtils { | |||||
| static void LoadCache(); | static void LoadCache(); | ||||
| static void GenSocInfo(nlohmann::json *soc_info_json); | |||||
| static KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor); | static KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor); | ||||
| static KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor); | static KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor); | ||||
| @@ -43,6 +43,7 @@ const int8_t MULTI_ELTWISE_SIZE = 4; | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | ||||
| struct BufferFusionInfo_t { | struct BufferFusionInfo_t { | ||||
| uint32_t graph_id; | |||||
| std::vector<AnfNodePtr> anf_nodes; | std::vector<AnfNodePtr> anf_nodes; | ||||
| std::vector<AnfNodePtr> inputs_list; | std::vector<AnfNodePtr> inputs_list; | ||||
| std::vector<AnfNodePtr> outputs_list; | std::vector<AnfNodePtr> outputs_list; | ||||
| @@ -381,6 +381,7 @@ void RemoveCircle(const session::KernelGraph &kernel_graph, | |||||
| void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | ||||
| std::unordered_map<int64_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | std::unordered_map<int64_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | ||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | ||||
| auto graph_id = kernel_graph->graph_id(); | |||||
| GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | ||||
| GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); | GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); | ||||
| GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | ||||
| @@ -390,6 +391,7 @@ void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | for (auto &buffer_fusion_info : *buffer_fusion_infos) { | ||||
| buffer_fusion_info.second.kernel_build_info = | buffer_fusion_info.second.kernel_build_info = | ||||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | ||||
| buffer_fusion_info.second.graph_id = graph_id; | |||||
| } | } | ||||
| } | } | ||||
| @@ -403,9 +405,9 @@ bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph | |||||
| std::transform( | std::transform( | ||||
| buffer_fusion_infos.begin(), buffer_fusion_infos.end(), std::back_inserter(fusion_scope_infos), | buffer_fusion_infos.begin(), buffer_fusion_infos.end(), std::back_inserter(fusion_scope_infos), | ||||
| [](const std::pair<int64_t, BufferFusionInfo_t> &buffer_fusion_info) -> mindspore::kernel::FusionScopeInfo { | [](const std::pair<int64_t, BufferFusionInfo_t> &buffer_fusion_info) -> mindspore::kernel::FusionScopeInfo { | ||||
| return mindspore::kernel::FusionScopeInfo(buffer_fusion_info.first, buffer_fusion_info.second.inputs_list, | |||||
| buffer_fusion_info.second.anf_nodes, | |||||
| buffer_fusion_info.second.outputs_list); | |||||
| return mindspore::kernel::FusionScopeInfo( | |||||
| buffer_fusion_info.first, buffer_fusion_info.second.graph_id, buffer_fusion_info.second.inputs_list, | |||||
| buffer_fusion_info.second.anf_nodes, buffer_fusion_info.second.outputs_list); | |||||
| }); | }); | ||||
| auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); | auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); | ||||
| std::set<int64_t> fusion_ids; | std::set<int64_t> fusion_ids; | ||||
| @@ -28,18 +28,28 @@ void ReplaceStr(std::string *dest, const std::string &replace, char new_char) { | |||||
| } | } | ||||
| } | } | ||||
| bool AscendKernelBuildClient::TbePre() { | |||||
| bool AscendKernelBuildClient::TbePre(const std::string &mode) { | |||||
| auto res = SendRequest(kTbePre); | auto res = SendRequest(kTbePre); | ||||
| if (res.find(kSuccess) == res.npos) { | if (res.find(kSuccess) == res.npos) { | ||||
| MS_LOG(EXCEPTION) << "PRE failed, res: " << res; | MS_LOG(EXCEPTION) << "PRE failed, res: " << res; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Pre " << res; | MS_LOG(INFO) << "Pre " << res; | ||||
| // init env for auto tune | |||||
| res = SendRequest(kTbeTune); | |||||
| if (res != kAck) { | |||||
| MS_LOG(EXCEPTION) << "Send tune single failed, res: " << res; | |||||
| } | |||||
| res = SendRequest(mode); | |||||
| if (res != kSuccess) { | |||||
| MS_LOG(EXCEPTION) << "PRE failed, res: " << res; | |||||
| } | |||||
| return true; | return true; | ||||
| } | } | ||||
| int AscendKernelBuildClient::TbeStart(const std::string &json) { | |||||
| int AscendKernelBuildClient::TbeStart(const std::string &json, const std::string &mode) { | |||||
| if (!init_flag) { | if (!init_flag) { | ||||
| if (!TbePre()) { | |||||
| if (!TbePre(mode)) { | |||||
| MS_LOG(EXCEPTION) << "START failed"; | MS_LOG(EXCEPTION) << "START failed"; | ||||
| } | } | ||||
| init_flag = true; | init_flag = true; | ||||
| @@ -200,6 +200,7 @@ class AscendKernelBuildClient : public KernelBuildClient { | |||||
| constexpr inline static auto kAkgStart = "AKG/START"; | constexpr inline static auto kAkgStart = "AKG/START"; | ||||
| constexpr inline static auto kAkgData = "AKG/DATA"; | constexpr inline static auto kAkgData = "AKG/DATA"; | ||||
| constexpr inline static auto kAkgWait = "AKG/WAIT"; | constexpr inline static auto kAkgWait = "AKG/WAIT"; | ||||
| constexpr inline static auto kTbeTune = "TBE/TUNE"; | |||||
| // Send server info. query to server | // Send server info. query to server | ||||
| constexpr inline static auto kFormat = "FORMAT"; | constexpr inline static auto kFormat = "FORMAT"; | ||||
| @@ -222,7 +223,7 @@ class AscendKernelBuildClient : public KernelBuildClient { | |||||
| bool CheckSupported(const std::string &json); | bool CheckSupported(const std::string &json); | ||||
| // Run TBE building. | // Run TBE building. | ||||
| int TbeStart(const std::string &json); | |||||
| int TbeStart(const std::string &json, const std::string &mode); | |||||
| bool TbeWait(int *task_id, std::string *task_result, std::string *pre_build_result); | bool TbeWait(int *task_id, std::string *task_result, std::string *pre_build_result); | ||||
| void TbeReset(); | void TbeReset(); | ||||
| @@ -239,7 +240,7 @@ class AscendKernelBuildClient : public KernelBuildClient { | |||||
| AscendKernelBuildClient &operator=(AscendKernelBuildClient &&) = delete; | AscendKernelBuildClient &operator=(AscendKernelBuildClient &&) = delete; | ||||
| private: | private: | ||||
| bool TbePre(); | |||||
| bool TbePre(const std::string &mode); | |||||
| AscendKernelBuildClient() { Open(); } | AscendKernelBuildClient() { Open(); } | ||||
| ~AscendKernelBuildClient() override { Close(); } | ~AscendKernelBuildClient() override { Close(); } | ||||
| }; | }; | ||||
| @@ -94,6 +94,7 @@ REGISTER_PYBIND_DEFINE(MsContextPy, ([](const py::module *m) { | |||||
| .value("save_graphs_path", MsCtxParam::MS_CTX_SAVE_GRAPHS_PATH) | .value("save_graphs_path", MsCtxParam::MS_CTX_SAVE_GRAPHS_PATH) | ||||
| .value("variable_memory_max_size", MsCtxParam::MS_CTX_VARIABLE_MEMORY_MAX_SIZE) | .value("variable_memory_max_size", MsCtxParam::MS_CTX_VARIABLE_MEMORY_MAX_SIZE) | ||||
| .value("device_id", MsCtxParam::MS_CTX_DEVICE_ID) | .value("device_id", MsCtxParam::MS_CTX_DEVICE_ID) | ||||
| .value("tune_mode", MsCtxParam::MS_CTX_TUNE_MODE) | |||||
| .value("max_call_depth", MsCtxParam::MS_CTX_MAX_CALL_DEPTH) | .value("max_call_depth", MsCtxParam::MS_CTX_MAX_CALL_DEPTH) | ||||
| .value("env_config_path", MsCtxParam::MS_CTX_ENV_CONFIG_PATH) | .value("env_config_path", MsCtxParam::MS_CTX_ENV_CONFIG_PATH) | ||||
| .value("grad_for_scalar", MsCtxParam::MS_CTX_GRAD_FOR_SCALAR); | .value("grad_for_scalar", MsCtxParam::MS_CTX_GRAD_FOR_SCALAR); | ||||
| @@ -262,6 +262,13 @@ nlohmann::json ConstructTransDataKernelJson(const std::vector<size_t> &host_shap | |||||
| op_info[kernel_name_str] = ""; | op_info[kernel_name_str] = ""; | ||||
| op_info[name] = trans_data; | op_info[name] = trans_data; | ||||
| op_info[outputs_str] = ConstructOutputs(host_shape, type); | op_info[outputs_str] = ConstructOutputs(host_shape, type); | ||||
| // construct soc_info | |||||
| nlohmann::json soc_info; | |||||
| auto ms_context = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(ms_context); | |||||
| auto tune_mode = ms_context->get_param<std::string>(MS_CTX_TUNE_MODE); | |||||
| soc_info["autoTilingMode"] = tune_mode; | |||||
| kernel_json["SocInfo"] = soc_info; | |||||
| kernel_json[op_info_str] = op_info; | kernel_json[op_info_str] = op_info; | ||||
| kernel_json[platform_str] = platform_tbe; | kernel_json[platform_str] = platform_tbe; | ||||
| std::string json_str = kernel_json[op_info_str].dump(); | std::string json_str = kernel_json[op_info_str].dump(); | ||||
| @@ -36,6 +36,9 @@ constexpr auto kComputeAccidentalHitsOpName = "ComputeAccidentalHits"; | |||||
| constexpr auto kCTCGreedyDecoderOpName = "CTCGreedyDecoder"; | constexpr auto kCTCGreedyDecoderOpName = "CTCGreedyDecoder"; | ||||
| constexpr auto kFour2FiveOpName = "Four2Five"; | constexpr auto kFour2FiveOpName = "Four2Five"; | ||||
| constexpr auto kFive2FourOpName = "Five2Four"; | constexpr auto kFive2FourOpName = "Five2Four"; | ||||
| constexpr auto kConv3DOpName = "Conv3D"; | |||||
| constexpr auto kConv3DBackpropFilterOpName = "Conv3DBackpropFilter"; | |||||
| constexpr auto kConv3DBackpropInputOpName = "Conv3DBackpropInput"; | |||||
| constexpr auto kConv2DOpName = "Conv2D"; | constexpr auto kConv2DOpName = "Conv2D"; | ||||
| constexpr auto kConvBN1OpName = "ConvBN1"; | constexpr auto kConvBN1OpName = "ConvBN1"; | ||||
| constexpr auto kBN2AddReluOpName = "BN2AddRelu"; | constexpr auto kBN2AddReluOpName = "BN2AddRelu"; | ||||
| @@ -204,6 +204,13 @@ class _Context: | |||||
| if self.enable_debug_runtime and target == "CPU": | if self.enable_debug_runtime and target == "CPU": | ||||
| self.set_backend_policy("vm") | self.set_backend_policy("vm") | ||||
| def set_auto_tune_mode(self, tune_mode): | |||||
| candidate = ["NO_TUNE", "RL", "GA", "RL,GA", "GA,RL"] | |||||
| if tune_mode in candidate: | |||||
| self.set_param(ms_ctx_param.tune_mode, tune_mode) | |||||
| else: | |||||
| raise ValueError(f"Tune mode must be in ['NO_TUNE', 'RL', 'GA', 'RL,GA', 'GA,RL'], but got {tune_mode}") | |||||
| def set_device_id(self, device_id): | def set_device_id(self, device_id): | ||||
| if device_id < 0 or device_id > 4095: | if device_id < 0 or device_id > 4095: | ||||
| raise ValueError(f"Device id must be in [0, 4095], but got {device_id}") | raise ValueError(f"Device id must be in [0, 4095], but got {device_id}") | ||||
| @@ -276,6 +283,7 @@ class _Context: | |||||
| 'save_graphs_path': set_save_graphs_path, | 'save_graphs_path': set_save_graphs_path, | ||||
| 'device_target': set_device_target, | 'device_target': set_device_target, | ||||
| 'device_id': set_device_id, | 'device_id': set_device_id, | ||||
| 'auto_tune_mode': set_auto_tune_mode, | |||||
| 'max_call_depth': set_max_call_depth, | 'max_call_depth': set_max_call_depth, | ||||
| 'profiling_options': set_profiling_options, | 'profiling_options': set_profiling_options, | ||||
| 'variable_memory_max_size': set_variable_memory_max_size, | 'variable_memory_max_size': set_variable_memory_max_size, | ||||
| @@ -480,6 +488,7 @@ def _check_target_specific_cfgs(device, arg_key): | |||||
| 'profiling_options': ['Ascend'], | 'profiling_options': ['Ascend'], | ||||
| 'print_file_path': ['Ascend'], | 'print_file_path': ['Ascend'], | ||||
| 'variable_memory_max_size': ['Ascend'], | 'variable_memory_max_size': ['Ascend'], | ||||
| 'auto_tune_mode': ['Ascend'], | |||||
| 'max_device_memory': ['GPU'] | 'max_device_memory': ['GPU'] | ||||
| } | } | ||||
| # configs not in map device_cfgs are supposed to be suitable for all devices | # configs not in map device_cfgs are supposed to be suitable for all devices | ||||
| @@ -494,7 +503,7 @@ def _check_target_specific_cfgs(device, arg_key): | |||||
| @args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool, | @args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool, | ||||
| save_graphs_path=str, enable_dump=bool, | |||||
| save_graphs_path=str, enable_dump=bool, auto_tune_mode=str, | |||||
| save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, | save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, | ||||
| enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, | enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, | ||||
| enable_graph_kernel=bool, check_bprop=bool, max_device_memory=str, print_file_path=str, | enable_graph_kernel=bool, check_bprop=bool, max_device_memory=str, print_file_path=str, | ||||
| @@ -531,7 +540,7 @@ def set_context(**kwargs): | |||||
| mode enable_profiling | mode enable_profiling | ||||
| reserve_class_name_in_scope profiling_options | reserve_class_name_in_scope profiling_options | ||||
| save_graphs variable_memory_max_size | save_graphs variable_memory_max_size | ||||
| save_graphs_path | |||||
| save_graphs_path auto_tune_mode | |||||
| env_config_path | env_config_path | ||||
| grad_for_scalar | grad_for_scalar | ||||
| =========================== =========================== ================= | =========================== =========================== ================= | ||||
| @@ -603,6 +612,13 @@ def set_context(**kwargs): | |||||
| enable_sparse (bool): Whether to enable sparsity feature. Default: False. | enable_sparse (bool): Whether to enable sparsity feature. Default: False. | ||||
| max_call_depth (int): Specify the maximum depth of function call. Default: 1000. | max_call_depth (int): Specify the maximum depth of function call. Default: 1000. | ||||
| env_config_path (str): Config path for DFX. | env_config_path (str): Config path for DFX. | ||||
| auto_tune_mode (str): The mode of auto tune when op building, get the best tiling performance, | |||||
| default: NO_TUNE. The value must be in ['RL', 'GA', 'RL,GA']. | |||||
| RL: rl_tune; | |||||
| GA: ga_tune; | |||||
| RL,GA: rl_tune/ga_tune(Automatic selection). | |||||
| - rl_tune: Reinforecement Learning tune. | |||||
| - ga_tune: Genetic Algorithm tune. | |||||
| grad_for_scalar (bool): Whether to get gradient for scalar. Default: False. | grad_for_scalar (bool): Whether to get gradient for scalar. Default: False. | ||||
| Raises: | Raises: | ||||
| @@ -38,8 +38,10 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { | |||||
| set_param<bool>(MS_CTX_ENABLE_DUMP, false); | set_param<bool>(MS_CTX_ENABLE_DUMP, false); | ||||
| set_param<std::string>(MS_CTX_SAVE_DUMP_PATH, "."); | set_param<std::string>(MS_CTX_SAVE_DUMP_PATH, "."); | ||||
| set_param<std::string>(MS_CTX_ENV_CONFIG_PATH, ""); | set_param<std::string>(MS_CTX_ENV_CONFIG_PATH, ""); | ||||
| set_param<std::string>(MS_CTX_TUNE_MODE, "NO_TUNE"); | |||||
| set_param<uint32_t>(MS_CTX_TSD_REF, 0); | set_param<uint32_t>(MS_CTX_TSD_REF, 0); | ||||
| set_param<uint32_t>(MS_CTX_GE_REF, 0); | set_param<uint32_t>(MS_CTX_GE_REF, 0); | ||||
| set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, false); | set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, false); | ||||
| set_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT, false); | set_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT, false); | ||||
| set_param<bool>(MS_CTX_ENABLE_REDUCE_PRECISION, true); | set_param<bool>(MS_CTX_ENABLE_REDUCE_PRECISION, true); | ||||
| @@ -108,6 +108,7 @@ enum MsCtxParam : unsigned { | |||||
| MS_CTX_VARIABLE_MEMORY_MAX_SIZE, | MS_CTX_VARIABLE_MEMORY_MAX_SIZE, | ||||
| MS_CTX_PYTHON_EXE_PATH, | MS_CTX_PYTHON_EXE_PATH, | ||||
| MS_CTX_ENV_CONFIG_PATH, | MS_CTX_ENV_CONFIG_PATH, | ||||
| MS_CTX_TUNE_MODE, | |||||
| MS_CTX_TYPE_STRING_END, | MS_CTX_TYPE_STRING_END, | ||||
| // parameter numbers of each type | // parameter numbers of each type | ||||
| @@ -29,6 +29,7 @@ conv2d_op_info = TBERegOp("Conv2D") \ | |||||
| .attr("dilation", "required", "listInt", "all") \ | .attr("dilation", "required", "listInt", "all") \ | ||||
| .attr("groups", "optional", "int", "all") \ | .attr("groups", "optional", "int", "all") \ | ||||
| .attr("format", "optional", "str", "all") \ | .attr("format", "optional", "str", "all") \ | ||||
| .attr("offset_x", "optional", "int", "all", "0") \ | |||||
| .input(0, "x", False, "required", "all") \ | .input(0, "x", False, "required", "all") \ | ||||
| .input(1, "filter", False, "required", "all") \ | .input(1, "filter", False, "required", "all") \ | ||||
| .input(2, "bias", False, "optional", "all") \ | .input(2, "bias", False, "optional", "all") \ | ||||
| @@ -28,7 +28,7 @@ conv3d_op_info = TBERegOp("Conv3D") \ | |||||
| .attr("dilations", "required", "listInt", "all") \ | .attr("dilations", "required", "listInt", "all") \ | ||||
| .attr("groups", "optional", "int", "all") \ | .attr("groups", "optional", "int", "all") \ | ||||
| .attr("format", "optional", "str", "all") \ | .attr("format", "optional", "str", "all") \ | ||||
| .attr("offset_x", "optional", "int", "all") \ | |||||
| .attr("offset_x", "optional", "int", "all", "0") \ | |||||
| .input(0, "x", False, "required", "all") \ | .input(0, "x", False, "required", "all") \ | ||||
| .input(1, "filter", False, "required", "all") \ | .input(1, "filter", False, "required", "all") \ | ||||
| .input(2, "bias", False, "optional", "all") \ | .input(2, "bias", False, "optional", "all") \ | ||||
| @@ -27,7 +27,7 @@ depthwise_conv2d_op_info = TBERegOp("DepthwiseConv2dNative") \ | |||||
| .attr("dilation", "required", "listInt", "all") \ | .attr("dilation", "required", "listInt", "all") \ | ||||
| .attr("pad_list", "required", "listInt", "all") \ | .attr("pad_list", "required", "listInt", "all") \ | ||||
| .attr("format", "required", "str", "all") \ | .attr("format", "required", "str", "all") \ | ||||
| .attr("offset_a", "optional", "int", "all") \ | |||||
| .attr("offset_a", "optional", "int", "all", "0") \ | |||||
| .input(0, "x", False, "required", "all") \ | .input(0, "x", False, "required", "all") \ | ||||
| .input(1, "filter", False, "required", "all") \ | .input(1, "filter", False, "required", "all") \ | ||||
| .input(2, "bias", False, "optional", "all") \ | .input(2, "bias", False, "optional", "all") \ | ||||