| @@ -20,6 +20,7 @@ | |||
| #include <unordered_map> | |||
| #include <set> | |||
| #include <vector> | |||
| #include <tuple> | |||
| #include <memory> | |||
| #include <utility> | |||
| #include <fstream> | |||
| @@ -36,78 +37,81 @@ | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| std::vector<PrimitivePtr> GetClusterableOpList() { | |||
| std::vector<PrimitivePtr> clusterable_ops = { | |||
| prim::kPrimAbs, | |||
| prim::kPrimAdd, | |||
| prim::kPrimCast, | |||
| prim::kPrimEqual, | |||
| prim::kPrimExp, | |||
| prim::kPrimInplaceAssign, | |||
| prim::kPrimLog, | |||
| prim::kPrimMaximum, | |||
| prim::kPrimMinimum, | |||
| prim::kPrimMul, | |||
| prim::kPrimNeg, | |||
| prim::kPrimPow, | |||
| prim::kPrimRealDiv, | |||
| prim::kPrimReciprocal, | |||
| prim::kPrimReduceSum, | |||
| prim::kPrimReshape, | |||
| prim::kPrimRound, | |||
| prim::kPrimRsqrt, | |||
| prim::kPrimSqrt, | |||
| prim::kPrimSub, | |||
| prim::kPrimTanh, | |||
| prim::kPrimTranspose, | |||
| #if ENABLE_D | |||
| prim::kPrimMatMul, | |||
| prim::kPrimTransData, | |||
| prim::kPrimBatchMatMul, | |||
| #elif ENABLE_GPU | |||
| prim::kPrimACos, | |||
| prim::kPrimAcosh, | |||
| prim::kPrimArgMax, | |||
| prim::kPrimArgMin, | |||
| prim::kPrimAsin, | |||
| prim::kPrimAsinh, | |||
| prim::kPrimAssign, | |||
| prim::kPrimAtan, | |||
| prim::kPrimAtan2, | |||
| prim::kPrimCos, | |||
| prim::kPrimDiv, | |||
| prim::kPrimErf, | |||
| prim::kPrimExpm1, | |||
| prim::kPrimFloor, | |||
| prim::kPrimFloorDiv, | |||
| prim::kPrimFloorMod, | |||
| prim::kPrimGreater, | |||
| prim::kPrimGreaterEqual, | |||
| prim::kPrimIsFinite, | |||
| prim::kPrimIsInf, | |||
| prim::kPrimIsNan, | |||
| prim::kPrimLess, | |||
| prim::kPrimLessEqual, | |||
| prim::kPrimLogicalAnd, | |||
| prim::kPrimLogicalOr, | |||
| prim::kPrimLogicalNot, | |||
| prim::kPrimMod, | |||
| prim::kPrimNotEqual, | |||
| prim::kPrimReduceMax, | |||
| prim::kPrimReduceMin, | |||
| prim::kPrimSelect, | |||
| prim::kPrimSign, | |||
| prim::kPrimSin, | |||
| prim::kPrimStridedSlice, | |||
| prim::kPrimUserDefined, | |||
| #endif | |||
| using context::OpLevel_0; | |||
| using context::OpLevel_1; | |||
| std::vector<PrimitivePtr> GraphKernelCluster::GetClusterableOpList() { | |||
| std::vector<std::tuple<std::string, unsigned int, PrimitivePtr>> clusterable_ops_with_level = { | |||
| // all target | |||
| {kAllTarget, OpLevel_0, prim::kPrimAbs}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimAdd}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimCast}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimEqual}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimExp}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimInplaceAssign}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimLog}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimMaximum}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimMinimum}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimMul}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimNeg}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimPow}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimRealDiv}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimReciprocal}, | |||
| {kAllTarget, OpLevel_1, prim::kPrimReduceSum}, | |||
| {kAllTarget, OpLevel_1, prim::kPrimReshape}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimRound}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimRsqrt}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimSqrt}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimSub}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimTanh}, | |||
| {kAllTarget, OpLevel_1, prim::kPrimTranspose}, | |||
| // ascend | |||
| {kAscendDevice, OpLevel_1, prim::kPrimMatMul}, | |||
| {kAscendDevice, OpLevel_1, prim::kPrimTransData}, | |||
| {kAscendDevice, OpLevel_1, prim::kPrimBatchMatMul}, | |||
| // gpu | |||
| {kGPUDevice, OpLevel_0, prim::kPrimACos}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimAcosh}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimArgMax}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimArgMin}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimAsin}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimAsinh}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimAssign}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimAtan}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimAtan2}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimCos}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimDiv}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimErf}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimExpm1}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimFloor}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimFloorDiv}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimFloorMod}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimGreater}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimGreaterEqual}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimIsFinite}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimIsInf}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimIsNan}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimLess}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimLessEqual}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimLogicalAnd}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimLogicalOr}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimLogicalNot}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimMod}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimNotEqual}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimReduceMax}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimReduceMin}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSelect}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSign}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSin}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimStridedSlice}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimUserDefined}, | |||
| }; | |||
| const auto &flags = context::GraphKernelFlags::GetInstance(); | |||
| std::vector<PrimitivePtr> clusterable_ops = GetValidOps(clusterable_ops_with_level, flags.fusion_ops_level); | |||
| OpListFilter(&clusterable_ops, flags.enable_cluster_ops_only, flags.enable_cluster_ops, flags.disable_cluster_ops); | |||
| return clusterable_ops; | |||
| } | |||
| namespace { | |||
| size_t CountGraphKernelInnerNodes(const AnfNodePtr &node) { | |||
| AnfNodePtrList node_list; | |||
| kernel::GetValidKernelNodes(AnfAlgo::GetCNodeFuncGraphPtr(node), &node_list); | |||
| @@ -115,15 +119,14 @@ size_t CountGraphKernelInnerNodes(const AnfNodePtr &node) { | |||
| } | |||
| } // namespace | |||
| bool IsClusterableOp(const AnfNodePtr &node) { | |||
| bool GraphKernelCluster::IsClusterableOp(const AnfNodePtr &node) { | |||
| if (AnfAlgo::IsGraphKernel(node)) { | |||
| return true; | |||
| } | |||
| if (IsKeepBasicNode(node)) { | |||
| return false; | |||
| } | |||
| auto op_list = GetClusterableOpList(); | |||
| bool node_in_oplist = std::any_of(op_list.begin(), op_list.end(), | |||
| bool node_in_oplist = std::any_of(op_list_.begin(), op_list_.end(), | |||
| [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); | |||
| if (!node_in_oplist) { | |||
| return false; | |||
| @@ -496,6 +499,7 @@ void GraphKernelCluster::RemoveWildGetitem(std::vector<size_t> *candidates) { | |||
| } | |||
| void GraphKernelCluster::Init(const FuncGraphPtr &func_graph) { | |||
| op_list_ = GetClusterableOpList(); | |||
| // process cnode only | |||
| nodes_ = TopoSort(func_graph->get_return(), SuccIncoming, | |||
| [](const AnfNodePtr &node) { return node->isa<CNode>() ? FOLLOW : EXCLUDE; }); | |||
| @@ -40,6 +40,8 @@ class GraphKernelCluster : public Pass { | |||
| bool Run(const FuncGraphPtr &func_graph) override; | |||
| private: | |||
| std::vector<PrimitivePtr> GetClusterableOpList(); | |||
| bool IsClusterableOp(const AnfNodePtr &node); | |||
| void Init(const FuncGraphPtr &func_graph); | |||
| bool Process(const FuncGraphPtr &func_graph); | |||
| std::vector<size_t> FindCandidates(size_t basenode_id); | |||
| @@ -57,9 +59,8 @@ class GraphKernelCluster : public Pass { | |||
| std::vector<AnfNodePtr> nodes_; | |||
| std::unordered_map<AnfNodePtr, size_t> node_idx_map_; | |||
| std::stringstream dump_buf_; | |||
| std::vector<PrimitivePtr> op_list_; | |||
| }; | |||
| bool IsClusterableOp(const AnfNodePtr &node); | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_GRAPH_KERNEL_CLUSTER_H_ | |||
| @@ -20,6 +20,7 @@ | |||
| #include <set> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <tuple> | |||
| #include <algorithm> | |||
| #include "utils/context/graph_kernel_flags.h" | |||
| @@ -40,61 +41,61 @@ | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| using context::OpLevel_0; | |||
| using context::OpLevel_1; | |||
| constexpr size_t kAssignInputIdx = 1; | |||
| constexpr size_t kLambOptimizerInputIdx = 12; | |||
| constexpr size_t kLambWeightInputIdx = 4; | |||
| std::vector<PrimitivePtr> GetExpandOps() { | |||
| std::vector<PrimitivePtr> expand_ops = { | |||
| prim::kPrimAddN, | |||
| prim::kPrimAssignAdd, | |||
| prim::kPrimErfc, | |||
| prim::kPrimExpandDims, | |||
| prim::kPrimGeLU, | |||
| prim::kPrimGeLUGrad, | |||
| prim::kPrimSquare, | |||
| prim::kPrimTile, | |||
| #if ENABLE_D | |||
| prim::kLambApplyOptimizerAssign, | |||
| prim::kLambApplyWeightAssign, | |||
| prim::kPrimClipByNormNoDivSum, | |||
| prim::kPrimSqrtGrad, | |||
| prim::kSoftmaxGradExt, | |||
| prim::kFusedMulAdd, | |||
| #elif ENABLE_GPU | |||
| prim::kPrimBatchMatMul, | |||
| prim::kPrimBiasAdd, | |||
| prim::kPrimBiasAddGrad, | |||
| prim::kPrimDropout, | |||
| prim::kPrimDropoutGrad, | |||
| prim::kPrimFusedAdam, | |||
| prim::kPrimFusedAdamWeightDecay, | |||
| prim::kPrimMaximumGrad, | |||
| prim::kPrimMinimumGrad, | |||
| prim::kPrimLayerNorm, | |||
| prim::kPrimLayerNormGrad, | |||
| prim::kPrimLogSoftmax, | |||
| prim::kPrimLogSoftmaxGrad, | |||
| prim::kPrimMatMul, | |||
| prim::kPrimReduceMean, | |||
| prim::kPrimRelu, | |||
| prim::kPrimReluGrad, | |||
| prim::kPrimSigmoid, | |||
| prim::kPrimSigmoidGrad, | |||
| prim::kPrimSigmoidCrossEntropyWithLogits, | |||
| prim::kPrimSigmoidCrossEntropyWithLogitsGrad, | |||
| prim::kPrimSlice, | |||
| prim::kPrimSoftmax, | |||
| prim::kPrimSoftmaxCrossEntropyWithLogits, | |||
| prim::kPrimSquaredDifference, | |||
| prim::kPrimSqueeze, | |||
| prim::kPrimEqualCount, | |||
| prim::kPrimSquareSumAll, | |||
| prim::kPrimIdentityMath, | |||
| prim::kPrimOnesLike, | |||
| #endif | |||
| std::vector<std::tuple<std::string, unsigned int, PrimitivePtr>> expand_ops_with_level = { | |||
| {kAllTarget, OpLevel_0, prim::kPrimAddN}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimAssignAdd}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimErfc}, | |||
| {kAllTarget, OpLevel_1, prim::kPrimExpandDims}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimGeLU}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimGeLUGrad}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimSquare}, | |||
| {kAllTarget, OpLevel_0, prim::kPrimTile}, | |||
| {kAscendDevice, OpLevel_0, prim::kLambApplyOptimizerAssign}, | |||
| {kAscendDevice, OpLevel_0, prim::kLambApplyWeightAssign}, | |||
| {kAscendDevice, OpLevel_0, prim::kPrimClipByNormNoDivSum}, | |||
| {kAscendDevice, OpLevel_0, prim::kPrimSqrtGrad}, | |||
| {kAscendDevice, OpLevel_1, prim::kSoftmaxGradExt}, | |||
| {kAscendDevice, OpLevel_0, prim::kFusedMulAdd}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimBatchMatMul}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimBiasAdd}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimBiasAddGrad}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimDropout}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimDropoutGrad}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimFusedAdam}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimFusedAdamWeightDecay}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimMaximumGrad}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimMinimumGrad}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimLayerNorm}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimLayerNormGrad}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimLogSoftmax}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimLogSoftmaxGrad}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimMatMul}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimReduceMean}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimRelu}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimReluGrad}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSigmoid}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSigmoidGrad}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSigmoidCrossEntropyWithLogits}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSigmoidCrossEntropyWithLogitsGrad}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSlice}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimSoftmax}, | |||
| {kGPUDevice, OpLevel_1, prim::kPrimSoftmaxCrossEntropyWithLogits}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSquaredDifference}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSqueeze}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimEqualCount}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimSquareSumAll}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimIdentityMath}, | |||
| {kGPUDevice, OpLevel_0, prim::kPrimOnesLike}, | |||
| }; | |||
| const auto &flags = context::GraphKernelFlags::GetInstance(); | |||
| std::vector<PrimitivePtr> expand_ops = GetValidOps(expand_ops_with_level, flags.fusion_ops_level); | |||
| OpListFilter(&expand_ops, flags.enable_expand_ops_only, flags.enable_expand_ops, flags.disable_expand_ops); | |||
| return expand_ops; | |||
| } | |||
| @@ -951,5 +951,21 @@ void EliminateRedundantParameters(const FuncGraphPtr &func_graph, AnfNodePtrList | |||
| func_graph->set_parameters(new_parameter); | |||
| *inputs = std::move(new_inputs); | |||
| } | |||
| std::vector<PrimitivePtr> GetValidOps( | |||
| const std::vector<std::tuple<std::string, unsigned int, PrimitivePtr>> &ops_with_level, unsigned int level) { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| std::string target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET); | |||
| std::vector<PrimitivePtr> valid_ops; | |||
| for (const auto &[op_target, op_level, op] : ops_with_level) { | |||
| if (op_target == kAllTarget || op_target == target) { | |||
| if (level >= op_level) { | |||
| valid_ops.emplace_back(op); | |||
| } | |||
| } | |||
| } | |||
| return valid_ops; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -47,6 +47,7 @@ constexpr auto kGetGraphKernelOpExpander = "get_op_expander"; | |||
| constexpr auto kJsonKeyMultiGraph = "multi_graph"; | |||
| constexpr auto kJsonKeyGraphDesc = "graph_desc"; | |||
| constexpr auto kJsonKeyGraphMode = "graph_mode"; | |||
| constexpr auto kAllTarget = "ALL"; | |||
| constexpr auto kGraphKernelDumpPath = "graph_kernel_dump"; | |||
| inline const PrimitivePtr kPrimUnPadAkg = std::make_shared<Primitive>("UnPadAkg"); | |||
| @@ -141,6 +142,9 @@ FuncGraphPtr LiteGraph2AnfGraph(const graphkernel::LiteGraphPtr &lite_graph, Anf | |||
| // remove parameter which is not used | |||
| void EliminateRedundantParameters(const FuncGraphPtr &func_graph, AnfNodePtrList *inputs); | |||
| std::vector<PrimitivePtr> GetValidOps( | |||
| const std::vector<std::tuple<std::string, unsigned int, PrimitivePtr>> &ops_with_level, unsigned int level); | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_GRAPH_KERNEL_HELPER_H_ | |||
| @@ -170,6 +170,9 @@ void GraphKernelFlags::Refresh() { | |||
| void GraphKernelFlags::RegisterFlags(std::map<std::string, std::string> *flag_map) { | |||
| FlagRegister reg(flag_map); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| bool is_gpu = (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice); | |||
| // Set opt_level first, some flags' default value depends on it. | |||
| // Default optimization level is level 2 when enable graphkernel | |||
| @@ -185,6 +188,7 @@ void GraphKernelFlags::RegisterFlags(std::map<std::string, std::string> *flag_ma | |||
| reg.AddFlag("enable_recompute_fusion", &enable_recompute_fusion, opt_level >= OptLevel_2); | |||
| reg.AddFlag("enable_parallel_fusion", &enable_parallel_fusion, opt_level == OptLevel_3); | |||
| reg.AddFlag("enable_low_precision", &enable_low_precision); | |||
| reg.AddFlag("fusion_ops_level", &fusion_ops_level, is_gpu ? OpLevel_MAX : OpLevel_0); | |||
| // Integer flags | |||
| reg.AddFlag("online_tuning", &online_tuning); | |||
| @@ -215,6 +219,7 @@ std::string GraphKernelFlags::DumpAllFlags() const { | |||
| json["enable_low_precision"] = enable_low_precision; | |||
| json["opt_level"] = opt_level; | |||
| json["fusion_ops_level"] = fusion_ops_level; | |||
| json["online_tuning"] = online_tuning; | |||
| json["repository_path"] = repository_path; | |||
| @@ -32,6 +32,10 @@ constexpr unsigned int OptLevel_2 = 2; // Default functions | |||
| constexpr unsigned int OptLevel_3 = 3; // Experimental functions | |||
| constexpr unsigned int OptLevel_MAX = 4; | |||
| constexpr unsigned int OpLevel_0 = 0; | |||
| constexpr unsigned int OpLevel_1 = 1; | |||
| constexpr unsigned int OpLevel_MAX = 2; | |||
| class GraphKernelFlags { | |||
| public: | |||
| static const GraphKernelFlags &GetInstance() { | |||
| @@ -65,26 +69,31 @@ class GraphKernelFlags { | |||
| * | |||
| * Experimental feature, enabled by default when opt_level=3 | |||
| */ | |||
| bool enable_stitch_fusion; | |||
| bool enable_stitch_fusion{false}; | |||
| /** | |||
| * Enable recompute fusion in graph kernel fusion strategy, enabled when op_level>=2. | |||
| */ | |||
| bool enable_recompute_fusion; | |||
| bool enable_recompute_fusion{false}; | |||
| /** | |||
| * Enable parallel fusion in graph kernel fusion strategy. | |||
| * | |||
| * Experimental feature, enabled by default when opt_level=3 | |||
| */ | |||
| bool enable_parallel_fusion; | |||
| bool enable_parallel_fusion{false}; | |||
| /** | |||
| * Enable low precision in data transferring between graph kernel and computing in graph kernel | |||
| * in graph kernel. | |||
| * Experimental feature, enabled by the enable_low_precision flag | |||
| */ | |||
| bool enable_low_precision; | |||
| bool enable_low_precision{false}; | |||
| /** | |||
| * Expand and cluster AKG's operators by level. | |||
| */ | |||
| unsigned int fusion_ops_level{OpLevel_0}; | |||
| /** | |||
| * Optimization level, value from 0 to 3. | |||
| @@ -95,7 +104,7 @@ class GraphKernelFlags { | |||
| * The default value is OptLevel_2 when the context "enable_graph_kernel" is set, | |||
| * but if it's also changed in "graph_kernel_flags", then the "graph_kernel_flags" will prevail. | |||
| */ | |||
| unsigned int opt_level; // defaults 0 or 2 | |||
| unsigned int opt_level{0}; // defaults 0 or 2 | |||
| /** | |||
| * Online tuning level, value from 0 to 3. | |||