Merge pull request !31413 from DeshiChen/0316_pclintr1.7
| @@ -545,8 +545,9 @@ std::tuple<std::vector<bool>, std::vector<ParallelInfo>> ParallelOpFusion::DoSea | |||
| std::vector<ParallelInfo> parallel_infos; | |||
| std::vector<bool> origin_candidates_used(origin_size, false); | |||
| std::vector<bool> sorted_candidates_used(candidates.size(), false); | |||
| size_t i = 0; | |||
| while (i < candidates.size()) { | |||
| size_t offset; | |||
| for (size_t i = 0; i < candidates.size(); i += offset + 1) { | |||
| offset = 0; | |||
| if (sorted_candidates_used[i]) { | |||
| continue; | |||
| } | |||
| @@ -599,7 +600,7 @@ std::tuple<std::vector<bool>, std::vector<ParallelInfo>> ParallelOpFusion::DoSea | |||
| } | |||
| max_benefit = benefit; | |||
| best_parallel_info = ParallelInfo(other_candidates, dim_infos, fusion_info); | |||
| i += begin - 1; | |||
| offset = begin - 1; | |||
| } | |||
| if (max_benefit > 0) { | |||
| @@ -609,7 +610,6 @@ std::tuple<std::vector<bool>, std::vector<ParallelInfo>> ParallelOpFusion::DoSea | |||
| origin_candidates_used[IntToSize(get_index(origin_indices, node))] = true; | |||
| } | |||
| } | |||
| i++; | |||
| } | |||
| // Current nodes is not suitable to fuse, so pop first node to try other fusion possibility. | |||
| @@ -65,7 +65,7 @@ AnfNodePtr ProcessNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, s | |||
| return new_cnode; | |||
| } | |||
| const AnfNodePtr SplitAssign::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { | |||
| const AnfNodePtr SplitAssign::Process(const FuncGraphPtr &, const AnfNodePtr &node, const EquivPtr &) const { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (!CanSplit(node)) return node; | |||
| return ProcessNode(node->func_graph(), node, 1); | |||
| @@ -30,7 +30,7 @@ class SplitAssign : public opt::PatternProcessPass { | |||
| class OpUMonadExpander : public DefaultExpander { | |||
| public: | |||
| explicit OpUMonadExpander(size_t input_idx) : input_idx_(input_idx) {} | |||
| ~OpUMonadExpander() = default; | |||
| virtual ~OpUMonadExpander() = default; | |||
| AnfNodePtr Run(const AnfNodePtr &node) override; | |||
| private: | |||
| @@ -47,7 +47,7 @@ AnfNodePtr DropoutExpander::PreProcess(const FuncGraphPtr &func_graph, const Anf | |||
| CheckCNodeInputSize(cnode, kDropoutInputTensorNum); | |||
| auto shape = AnfAlgo::GetInputDeviceShape(cnode, 0); | |||
| ShapeVector shape_i64; | |||
| std::transform(shape.begin(), shape.end(), std::back_inserter(shape_i64), [](size_t x) { return SizeToLong(x); }); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(shape_i64), SizeToLong); | |||
| // Get seed from original dropout's attrs, rather than set seed by time. | |||
| // Only seed0 and seed1 are all equal to 0, then set seed = time. | |||
| auto node_prim = GetCNodePrimitive(node); | |||
| @@ -21,6 +21,8 @@ | |||
| namespace mindspore::graphkernel { | |||
| class DropoutExpander : public PyExpander { | |||
| public: | |||
| DropoutExpander() = default; | |||
| virtual ~DropoutExpander() = default; | |||
| AnfNodePtr Run(const AnfNodePtr &node) override; | |||
| private: | |||
| @@ -157,10 +157,11 @@ class MinCut { | |||
| std::vector<std::pair<size_t, TransOpType>> GetOneNodeOps() const { | |||
| std::vector<std::pair<size_t, TransOpType>> one_node_ops; | |||
| for (size_t i = 1; i <= origin_nodes_num_; ++i) { | |||
| auto tmpi = i; // to evade pclint warning "for statement index variable modified in body." | |||
| if (nodes_[i].format == kFormatA && nodes_[i + origin_nodes_num_].format != kFormatA) { | |||
| (void)one_node_ops.emplace_back(i, kTransAB); | |||
| (void)one_node_ops.emplace_back(tmpi, kTransAB); | |||
| } else if (nodes_[i].format != kFormatA && nodes_[i + origin_nodes_num_].format == kFormatA) { | |||
| (void)one_node_ops.emplace_back(i, kTransBA); | |||
| (void)one_node_ops.emplace_back(tmpi, kTransBA); | |||
| } | |||
| } | |||
| return one_node_ops; | |||
| @@ -32,6 +32,7 @@ | |||
| #include "common/graph_kernel/core/graph_kernel_utils.h" | |||
| namespace mindspore::graphkernel { | |||
| constexpr auto kTsaInputIndex = 2; | |||
| class TsaChecker : public AtomicAddChecker { | |||
| public: | |||
| explicit TsaChecker(const PrimitivePtr &target) { target_type_ = target; } | |||
| @@ -146,7 +147,7 @@ std::pair<AnfNodePtr, size_t> TsaAtomicAddToFirstTensor::GetOrCreateNewTsaFirstN | |||
| return {new_copy_composite_node, tsa_first_input.second}; | |||
| } | |||
| void TsaAtomicAddToFirstTensor::CorrectKernelBuildInfo( | |||
| void TsaAtomicAddToFirstTensor::ChangeKernelBuildInfo( | |||
| const AnfNodePtr &composite_node, const std::vector<std::tuple<AtomicAddInfo, AnfNodePtr, size_t>> &outer_infos) { | |||
| // Change kernel build info with modify input | |||
| auto kernel_info = static_cast<device::KernelInfo *>(composite_node->kernel_info()); | |||
| @@ -178,7 +179,7 @@ void TsaAtomicAddToFirstTensor::CorrectKernelBuildInfo( | |||
| for (const auto &outer_info : outer_infos) { | |||
| auto &modified_input = std::get<1>(outer_info); | |||
| auto tsa_first_input_index = std::get<2>(outer_info); | |||
| auto tsa_first_input_index = std::get<kTsaInputIndex>(outer_info); | |||
| auto kernel_with_index = common::AnfAlgo::VisitKernel(modified_input, 0); | |||
| modified_inputs_format[tsa_first_input_index] = | |||
| AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second); | |||
| @@ -198,7 +199,7 @@ void TsaAtomicAddToFirstTensor::CorrectKernelBuildInfo( | |||
| AnfAlgo::SetSelectKernelBuildInfo(new_selected_info, composite_node.get()); | |||
| } | |||
| void TsaAtomicAddToFirstTensor::ProcessOriginCNode( | |||
| void TsaAtomicAddToFirstTensor::ProcessOriginalCNode( | |||
| const AnfNodePtr &composite_node, const std::vector<std::tuple<AtomicAddInfo, AnfNodePtr, size_t>> &outer_nodes) { | |||
| auto sub_graph = common::AnfAlgo::GetCNodeFuncGraphPtr(composite_node); | |||
| auto mng_sub = sub_graph->manager(); | |||
| @@ -220,7 +221,7 @@ void TsaAtomicAddToFirstTensor::ProcessOriginCNode( | |||
| CreateInplaceAssignNodeAndCorrectReturn(sub_graph, parameters_infos); | |||
| CorrectAbstract(composite_node, info_and_tsa_outers); | |||
| CorrectKernelBuildInfo(composite_node, outer_nodes); | |||
| ChangeKernelBuildInfo(composite_node, outer_nodes); | |||
| auto old_graph_name = GetValue<std::string>(sub_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); | |||
| auto new_graph_name = | |||
| @@ -246,7 +247,7 @@ void TsaAtomicAddToFirstTensor::ProcessTsa(const KernelGraphPtr &main_graph, con | |||
| // Insert extra input(broadcast node output) to composite node, and make origin TensorScatterAdd inplaceassign to it. | |||
| // Note: if it's single output, this will increase total memory because of a fake out. | |||
| ProcessOriginCNode(origin_composite_node, info_and_outer_nodes_with_index); | |||
| ProcessOriginalCNode(origin_composite_node, info_and_outer_nodes_with_index); | |||
| // Insert update_state_node to keep execution order. | |||
| auto update_state_node = InsertUpdateState(main_graph, origin_composite_node); | |||
| @@ -48,10 +48,10 @@ class TsaAtomicAddToFirstTensor : public AtomicCleanInsertter { | |||
| bool Run(const FuncGraphPtr &func_graph) override; | |||
| private: | |||
| void ProcessOriginCNode(const AnfNodePtr &composite_node, | |||
| const std::vector<std::tuple<AtomicAddInfo, AnfNodePtr, size_t>> &outer_nodes); | |||
| void CorrectKernelBuildInfo(const AnfNodePtr &composite_node, | |||
| const std::vector<std::tuple<AtomicAddInfo, AnfNodePtr, size_t>> &outer_infos); | |||
| void ProcessOriginalCNode(const AnfNodePtr &composite_node, | |||
| const std::vector<std::tuple<AtomicAddInfo, AnfNodePtr, size_t>> &outer_nodes); | |||
| void ChangeKernelBuildInfo(const AnfNodePtr &composite_node, | |||
| const std::vector<std::tuple<AtomicAddInfo, AnfNodePtr, size_t>> &outer_infos); | |||
| void ProcessTsa(const KernelGraphPtr &main_graph, const AnfNodePtr &anf_node, | |||
| const std::vector<AtomicAddInfo> &atomic_add_infos, const FuncGraphManagerPtr &mng); | |||
| std::pair<AnfNodePtr, size_t> GetOrCreateNewTsaFirstNode(const KernelGraphPtr &main_graph, | |||
| @@ -44,19 +44,18 @@ bool UssAtomicAdd::Run(const FuncGraphPtr &func_graph) { | |||
| } | |||
| bool changed = false; | |||
| std::shared_ptr<AtomicAddChecker> atomic_add_checker = | |||
| std::shared_ptr<AtomicAddChecker> checker = | |||
| std::make_shared<UssChecker>(std::make_shared<Primitive>("UnsortedSegmentSum")); | |||
| if (atomic_add_checker == nullptr) { | |||
| if (checker == nullptr) { | |||
| return changed; | |||
| } | |||
| auto topo_nodes = TopoSort(kernel_graph->get_return()); | |||
| for (const auto &node : topo_nodes) { | |||
| if (!atomic_add_checker->Check(node)) { | |||
| if (!checker->Check(node)) { | |||
| continue; | |||
| } | |||
| auto atomic_add_infos = atomic_add_checker->GetAtomicAddInfo(); | |||
| auto atomic_add_infos = checker->GetAtomicAddInfo(); | |||
| InsertAtomicClean(kernel_graph, node, atomic_add_infos, mng); | |||
| changed = true; | |||
| } | |||
| @@ -37,7 +37,7 @@ bool BindValueToGraph::Run(const FuncGraphPtr &func_graph) { | |||
| } | |||
| if (auto vptr = node->cast<ValueNodePtr>(); value_nodes.count(vptr) == 0) { | |||
| auto new_node = kernel_graph->NewValueNode(vptr); | |||
| mng->Replace(vptr, new_node); | |||
| (void)mng->Replace(vptr, new_node); | |||
| kernel_graph->AddValueNodeToGraph(new_node); | |||
| changed = true; | |||
| } | |||
| @@ -67,7 +67,7 @@ bool AkgKernelPool::LockMng::TryLock() const { | |||
| uint32_t trial = 2000; | |||
| const uint32_t sleep_time_us = 5000; | |||
| int32_t ret = -1; | |||
| int32_t ret; | |||
| while (trial > 0) { | |||
| ret = lockf(fd_, F_TLOCK, 0); | |||
| if (ret == 0 || (errno != EACCES && errno != EAGAIN)) { | |||
| @@ -87,7 +87,7 @@ bool AkgKernelPool::LockMng::TryLock() const { | |||
| return true; | |||
| } | |||
| void AkgKernelPool::LockMng::Unlock() const { | |||
| void AkgKernelPool::LockMng::Unlock() const noexcept { | |||
| auto ret = lockf(fd_, F_ULOCK, 0); | |||
| if (ret == -1) { | |||
| MS_LOG(ERROR) << "Failed to release the lock, error msg:" << GetErrorInfo(); | |||
| @@ -118,7 +118,8 @@ void *AkgKernelPool::CreateSharedMem(const std::string &path) { | |||
| auto hash_id = std::hash<std::string>()(path); | |||
| auto key_id = static_cast<key_t>(hash_id); | |||
| auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + 512; | |||
| const size_t min_mem_size = 512; | |||
| auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + min_mem_size; | |||
| { | |||
| ACQUIRE_LOCK; | |||
| @@ -418,7 +419,7 @@ std::vector<JsonNodePair> AkgKernelBuilder::GetNotCachedKernels(const std::vecto | |||
| (void)repeat_nodes_.emplace_back(json_generator, anf_node); | |||
| continue; | |||
| } | |||
| kernel_name_set.insert(kernel_name); | |||
| (void)kernel_name_set.insert(kernel_name); | |||
| (void)new_build_args.emplace_back(json_generator, anf_node); | |||
| } | |||
| return new_build_args; | |||
| @@ -616,7 +617,7 @@ bool AkgKernelBuilder::AkgKernelParallelBuild(const std::vector<AnfNodePtr> &anf | |||
| MS_EXCEPTION(UnknownError) << "Collect op info failed. op[" << anf_node->fullname_with_scope() << "]."; | |||
| } | |||
| } | |||
| json_and_node.push_back({akg_kernel_json_generator, anf_node}); | |||
| (void)json_and_node.emplace_back(std::move(akg_kernel_json_generator), anf_node); | |||
| } | |||
| if (json_and_node.empty()) { | |||
| @@ -77,13 +77,12 @@ class AkgKernelPool { | |||
| private: | |||
| bool TryLock() const; | |||
| void Unlock() const; | |||
| void Unlock() const noexcept; | |||
| int32_t fd_{-1}; | |||
| std::string calling_position_; | |||
| }; | |||
| public: | |||
| AkgKernelPool() = default; | |||
| virtual ~AkgKernelPool() { | |||
| // Close key file | |||
| @@ -109,9 +108,11 @@ class AkgKernelPool { | |||
| constexpr inline static int32_t kDoneIdx_ = 2; | |||
| private: | |||
| inline size_t *ListBegin(int32_t list_idx) const { return kernel_lists_[list_idx]; } | |||
| inline size_t *ListBegin(int32_t list_idx) { return kernel_lists_[list_idx]; } | |||
| inline const size_t *ListBegin(int32_t list_idx) const { return kernel_lists_[list_idx]; } | |||
| inline size_t *ListEnd(int32_t list_idx) const { | |||
| inline size_t *ListEnd(int32_t list_idx) { return kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]; } | |||
| inline const size_t *ListEnd(int32_t list_idx) const { | |||
| return kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]; | |||
| } | |||
| @@ -236,26 +236,23 @@ class CNodeDecoder { | |||
| PrimitivePtr CreatePrimitiveWithAttrs(const std::string &op_name) const { | |||
| auto primitive = std::make_shared<Primitive>(op_name); | |||
| for (const auto &attr : cnode_attrs_) { | |||
| primitive->AddAttr(attr.first, attr.second); | |||
| (void)primitive->AddAttr(attr.first, attr.second); | |||
| } | |||
| return primitive; | |||
| } | |||
| tensor::TensorPtr DecodeScalar(const nlohmann::json &scalar_json) const { | |||
| auto type_id = StringToTypeId(scalar_json[kJsonKeyDataType]); | |||
| switch (type_id) { | |||
| case kNumberTypeFloat16: | |||
| return std::make_shared<tensor::Tensor>(static_cast<float>(scalar_json[kJsonKeyValue]), kFloat16); | |||
| case kNumberTypeFloat32: | |||
| return std::make_shared<tensor::Tensor>(static_cast<float>(scalar_json[kJsonKeyValue]), kFloat32); | |||
| case kNumberTypeInt32: | |||
| return std::make_shared<tensor::Tensor>(static_cast<int64_t>(scalar_json[kJsonKeyValue]), kInt32); | |||
| default: | |||
| MS_LOG(ERROR) << "Fail to parse scalar " << scalar_json[kJsonKeyValue] | |||
| << " in json, because its type: " << scalar_json[kJsonKeyDataType] | |||
| << " is not in supported list: [float16, float32, int32]. json is: " << scalar_json; | |||
| break; | |||
| if (type_id == TypeId::kNumberTypeFloat16) { | |||
| return std::make_shared<tensor::Tensor>(static_cast<float>(scalar_json[kJsonKeyValue]), kFloat16); | |||
| } else if (type_id == TypeId::kNumberTypeFloat32) { | |||
| return std::make_shared<tensor::Tensor>(static_cast<float>(scalar_json[kJsonKeyValue]), kFloat32); | |||
| } else if (type_id == TypeId::kNumberTypeInt32) { | |||
| return std::make_shared<tensor::Tensor>(static_cast<int64_t>(scalar_json[kJsonKeyValue]), kInt32); | |||
| } | |||
| MS_LOG(ERROR) << "Fail to parse scalar " << scalar_json[kJsonKeyValue] | |||
| << " in json, because its type: " << scalar_json[kJsonKeyDataType] | |||
| << " is not in supported list: [float16, float32, int32]. json is: " << scalar_json; | |||
| return nullptr; | |||
| } | |||
| @@ -35,6 +35,7 @@ using kernel::OpInfo; | |||
| using kernel::OpIOInfo; | |||
| namespace { | |||
| constexpr auto kAttrParallelDimInfoSize = 2; | |||
| constexpr auto kDebugStrDepth = 2; | |||
| std::vector<int64_t> GetDynInputSizes(const AnfNodePtr &anf_node) { | |||
| std::vector<int64_t> dyn_input_sizes; | |||
| @@ -73,7 +74,7 @@ std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(cons | |||
| const NodeUsersMap &users = mng->node_users(); | |||
| auto input_users = users.find(input); | |||
| if (input_users == users.end() || input_users->second.empty()) { | |||
| MS_EXCEPTION(ArgumentError) << "Input [" << i << "][" << input->DebugString(2) << "] of [" | |||
| MS_EXCEPTION(ArgumentError) << "Input [" << i << "][" << input->DebugString(kDebugStrDepth) << "] of [" | |||
| << input->func_graph()->ToString() << "] has no users."; | |||
| } | |||
| bool found = false; | |||
| @@ -88,14 +89,14 @@ std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(cons | |||
| found = true; | |||
| break; | |||
| } | |||
| int used_as_idx = input_user.second - 1; | |||
| int accum_idx = 0; | |||
| int64_t used_as_idx = IntToLong(input_user.second - 1); | |||
| int64_t accum_idx = 0; | |||
| for (size_t dyn_i = 0; dyn_i < dyn_input_sizes.size(); ++dyn_i) { | |||
| accum_idx += LongToInt(dyn_input_sizes[dyn_i]); | |||
| accum_idx += dyn_input_sizes[dyn_i]; | |||
| if (used_as_idx < accum_idx) { | |||
| auto tmp_dyn_i = dyn_i; // to evade pclint warning "for statement index variable modified in body." | |||
| input_index.push_back(std::make_pair( | |||
| anf_node, | |||
| std::make_pair(dyn_i, IntToSize(used_as_idx - (accum_idx - LongToInt(dyn_input_sizes[dyn_i])))))); | |||
| anf_node, std::make_pair(tmp_dyn_i, LongToSize(used_as_idx - (accum_idx - dyn_input_sizes[dyn_i]))))); | |||
| found = true; | |||
| break; | |||
| } | |||
| @@ -105,7 +106,7 @@ std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(cons | |||
| if (found) break; | |||
| } | |||
| if (found) continue; | |||
| MS_EXCEPTION(ArgumentError) << "Input [" << i << "][" << input->DebugString(2) << "] of [" | |||
| MS_EXCEPTION(ArgumentError) << "Input [" << i << "][" << input->DebugString(kDebugStrDepth) << "] of [" | |||
| << input->func_graph()->ToString() << "] found no related kernel info."; | |||
| } | |||
| return input_index; | |||
| @@ -131,7 +132,7 @@ std::vector<std::pair<AnfNodePtr, size_t>> GetOutputIndex(const std::vector<AnfN | |||
| found = true; | |||
| } | |||
| if (!found) { | |||
| MS_EXCEPTION(ArgumentError) << "Output [" << i << "][" << output->DebugString(2) << "] of [" | |||
| MS_EXCEPTION(ArgumentError) << "Output [" << i << "][" << output->DebugString(kDebugStrDepth) << "] of [" | |||
| << output->func_graph()->ToString() << "] found no related kernel info."; | |||
| } | |||
| } | |||
| @@ -258,7 +259,8 @@ bool AkgKernelJsonGenerator::GetInputTensorValue(const AnfNodePtr &anf_node, siz | |||
| MS_EXCEPTION_IF_NULL(data); | |||
| if (tensor->DataSize() > 1) { | |||
| // not const tensor. | |||
| MS_LOG(WARNING) << "Not take value of tensor whose datasize greater than 1, [" << input_node->DebugString(2) << "]"; | |||
| MS_LOG(WARNING) << "Not take value of tensor whose datasize greater than 1, [" | |||
| << input_node->DebugString(kDebugStrDepth) << "]"; | |||
| return false; | |||
| } | |||
| @@ -333,14 +335,14 @@ bool AkgKernelJsonGenerator::CreateInputDescJson(const AnfNodePtr &anf_node, con | |||
| auto input_shape = this->cb_->GetInputShape(anf_node, real_input_index); | |||
| if (!is_basic_op_ && GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) { | |||
| MS_LOG(DEBUG) << "Pick single value [" << input_desc_json[kJsonKeyValue] << "] from input[" << real_input_index | |||
| << "] of node [" << anf_node->DebugString(2); | |||
| << "] of node [" << anf_node->DebugString(kDebugStrDepth); | |||
| input_shape.clear(); | |||
| } | |||
| if (input_shape.empty()) { | |||
| input_shape.push_back(1); | |||
| } | |||
| input_desc_json[kJsonKeyShape] = input_shape; | |||
| input_list.emplace_back(input_desc_json); | |||
| (void)input_list.emplace_back(input_desc_json); | |||
| real_input_index++; | |||
| } | |||
| (void)inputs_json->emplace_back(input_list); | |||
| @@ -587,7 +589,8 @@ void AkgKernelJsonGenerator::SaveNodeAddress(const AnfNodePtr &anf_node, nlohman | |||
| OpInfoPtr AkgKernelJsonGenerator::ExtractOpInfo(const AnfNodePtr &anf_node) const { | |||
| if (dump_option_.extract_opinfo_from_anfnode) { | |||
| return OpInfoExtractor().Run(anf_node); | |||
| OpInfoExtractor e; | |||
| return e.Run(anf_node); | |||
| } else { | |||
| #ifdef MSLITE_ENABLE_GRAPH_KERNEL | |||
| MS_LOG(EXCEPTION) << "OpLib is not supported."; | |||
| @@ -671,26 +674,18 @@ size_t AkgKernelJsonGenerator::GetTensorSize(const nlohmann::json &node_json) co | |||
| return std::accumulate(shape.begin(), shape.end(), nbyte, std::multiplies<size_t>()); | |||
| } | |||
| bool AkgKernelJsonGenerator::GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *input_size, | |||
| void AkgKernelJsonGenerator::GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *input_size, | |||
| std::vector<size_t> *output_size) const { | |||
| if (input_size == nullptr || output_size == nullptr) { | |||
| MS_LOG(ERROR) << "input size or output size is nullptr when parsing IO size in json: " << node_json; | |||
| return false; | |||
| } | |||
| input_size->clear(); | |||
| output_size->clear(); | |||
| for (size_t i = 0; i < node_json[kJsonKeyInputDesc].size(); i++) { | |||
| for (size_t m = 0; m < node_json[kJsonKeyInputDesc][i].size(); m++) { | |||
| input_size->push_back(GetTensorSize(node_json[kJsonKeyInputDesc][i][m])); | |||
| } | |||
| } | |||
| for (size_t i = 0; i < node_json[kJsonKeyOutputDesc].size(); i++) { | |||
| output_size->push_back(GetTensorSize(node_json[kJsonKeyOutputDesc][i])); | |||
| } | |||
| return true; | |||
| } | |||
| bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::json *kernel_json) { | |||
| @@ -716,10 +711,7 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j | |||
| (*kernel_json)[kJsonKeyComputeCapability] = ComputeCapability::Get(); | |||
| } | |||
| if (!GetIOSize(*kernel_json, &input_size_list_, &output_size_list_)) { | |||
| MS_LOG(ERROR) << "Fail to get input and output size of json: " << *kernel_json; | |||
| return false; | |||
| } | |||
| GetIOSize(*kernel_json, &input_size_list_, &output_size_list_); | |||
| MS_LOG(DEBUG) << "Akg create kernel json desc success, full scope name is : " << anf_node->fullname_with_scope() | |||
| << ", json info name is : " << kernel_name_; | |||
| @@ -737,16 +729,15 @@ void AkgKernelJsonGenerator::GenStitchJson(const std::vector<AnfNodePtr> &anf_no | |||
| if (stitch_attr != nullptr && GetValue<std::string>(stitch_attr) == "common") { | |||
| auto name = GetTensorName((*node_json_map)[anf_node], kJsonKeyOutputDesc, {0, 0}); | |||
| if (std::find(stitchs.begin(), stitchs.end(), name) == stitchs.end()) { | |||
| stitchs.emplace_back(name); | |||
| (void)stitchs.emplace_back(name); | |||
| } | |||
| } | |||
| } | |||
| if (!stitchs.empty()) { | |||
| std::vector<nlohmann::json> v; | |||
| for (auto &s : stitchs) { | |||
| std::vector<std::string> t; | |||
| t.emplace_back(s); | |||
| v.emplace_back(t); | |||
| std::vector<std::string> t(1, s); | |||
| (void)v.emplace_back(std::move(t)); | |||
| } | |||
| nlohmann::json stitch_json; | |||
| stitch_json[kJsonKeyStitchOp] = v; | |||
| @@ -771,8 +762,8 @@ bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf | |||
| UpdateTensorName(anf_nodes, &node_json_map); | |||
| std::vector<nlohmann::json> node_json_desc; | |||
| std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc), | |||
| [&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; }); | |||
| (void)std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc), | |||
| [&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; }); | |||
| (*kernel_json)[kJsonKeyOpDesc] = node_json_desc; | |||
| auto inputs_json = CreateInputsJson(anf_nodes, input_list, node_json_map); | |||
| @@ -816,11 +807,7 @@ bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf | |||
| } | |||
| GenStitchJson(anf_nodes, &node_json_map, kernel_json); | |||
| if (!GetIOSize(*kernel_json, &input_size_list_, &output_size_list_)) { | |||
| MS_LOG(ERROR) << "Fail to get input and output size of json: " << *kernel_json; | |||
| return false; | |||
| } | |||
| GetIOSize(*kernel_json, &input_size_list_, &output_size_list_); | |||
| return true; | |||
| } | |||
| @@ -853,11 +840,14 @@ void AkgKernelJsonGenerator::UpdateTensorName(const std::vector<AnfNodePtr> &anf | |||
| size_t input_tensor_num = is_dynamic_input ? LongToSize(dyn_input_sizes[i]) : 1; | |||
| for (size_t j = 0; j < input_tensor_num; ++j) { | |||
| auto tmp_input = GetKernelInput(anf_node, real_input_index); | |||
| std::string tensor_name = GetTensorName((*node_json_map)[anf_node], kJsonKeyInputDesc, std::make_pair(i, j)); | |||
| auto tmpi = i; | |||
| auto tmpj = j; // use tmpi and tmpj to evade pclint warning "for statement index variable modified in body." | |||
| std::string tensor_name = | |||
| GetTensorName((*node_json_map)[anf_node], kJsonKeyInputDesc, std::make_pair(tmpi, tmpj)); | |||
| if (node_json_map->find(tmp_input.first) != node_json_map->end()) { | |||
| std::string new_tensor_name = | |||
| GetTensorName((*node_json_map)[tmp_input.first], kJsonKeyOutputDesc, std::make_pair(0, tmp_input.second)); | |||
| SetTensorName(kJsonKeyInputDesc, new_tensor_name, std::make_pair(i, j), &((*node_json_map)[anf_node])); | |||
| SetTensorName(kJsonKeyInputDesc, new_tensor_name, std::make_pair(tmpi, tmpj), &((*node_json_map)[anf_node])); | |||
| MS_LOG(DEBUG) << "Update [" << real_input_index << "] input [" << tensor_name << "] of [" | |||
| << anf_node->fullname_with_scope() << "] to [" << tmp_input.second << "] output [" | |||
| << new_tensor_name << "] of [" << tmp_input.first->fullname_with_scope() << "]."; | |||
| @@ -947,11 +937,12 @@ void AkgKernelJsonGenerator::GenParallelJson(const std::vector<AnfNodePtr> &anf_ | |||
| parallel_fusion_json[kJsonKeyTypeInfo] = type_info; | |||
| std::vector<std::vector<std::string>> sgraphs; | |||
| std::vector<size_t> cnums; | |||
| std::for_each(sub_graphs_info.cbegin(), sub_graphs_info.cend(), | |||
| [&sgraphs, &cnums](const std::pair<size_t, std::pair<size_t, std::vector<std::string>>> &sg_info) { | |||
| sgraphs.push_back(sg_info.second.second); | |||
| cnums.push_back(sg_info.second.first); | |||
| }); | |||
| (void)std::for_each( | |||
| sub_graphs_info.cbegin(), sub_graphs_info.cend(), | |||
| [&sgraphs, &cnums](const std::pair<size_t, std::pair<size_t, std::vector<std::string>>> &sg_info) { | |||
| sgraphs.push_back(sg_info.second.second); | |||
| cnums.push_back(sg_info.second.first); | |||
| }); | |||
| parallel_fusion_json[kJsonKeySubGraph] = sgraphs; | |||
| parallel_fusion_json[kJsonKeyCoreNum] = cnums; | |||
| @@ -93,7 +93,7 @@ class AkgKernelJsonGenerator { | |||
| public: | |||
| explicit AkgKernelJsonGenerator(DumpOption dump_option) | |||
| : dump_option_(std::move(dump_option)), cb_(Callback::Instance()) {} | |||
| ~AkgKernelJsonGenerator() = default; | |||
| ~AkgKernelJsonGenerator() { cb_ = nullptr; } | |||
| bool CollectJson(const AnfNodePtr &anf_node, nlohmann::json *kernel_json); | |||
| bool CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list, | |||
| @@ -119,7 +119,7 @@ class AkgKernelJsonGenerator { | |||
| bool CreateAttrDescJson(const AnfNodePtr &anf_node, const OpInfoPtr &op_info, nlohmann::json *attrs_json); | |||
| void GenStitchJson(const std::vector<AnfNodePtr> &anf_nodes, std::map<AnfNodePtr, nlohmann::json> *node_json_map, | |||
| nlohmann::json *kernel_json); | |||
| bool GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *input_size, | |||
| void GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *input_size, | |||
| std::vector<size_t> *output_size) const; | |||
| bool GenSingleJsons(const std::vector<AnfNodePtr> &anf_nodes, std::map<AnfNodePtr, nlohmann::json> *node_json_map); | |||
| void UpdateTensorName(const std::vector<AnfNodePtr> &anf_nodes, std::map<AnfNodePtr, nlohmann::json> *node_json_map); | |||
| @@ -22,8 +22,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void AkgMetadataInfo(const CNodePtr &kernel_node, | |||
| std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list) { | |||
| void AkgMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_info_list); | |||
| @@ -34,7 +34,7 @@ std::vector<std::string> GetTokens(const std::string &str, const std::string &de | |||
| char *saveptr = nullptr; | |||
| char *pch = strtok_r(&c_str[0], delim.c_str(), &saveptr); | |||
| while (pch != nullptr) { | |||
| tokens.emplace_back(pch); | |||
| (void)tokens.emplace_back(pch); | |||
| pch = strtok_r(nullptr, delim.c_str(), &saveptr); | |||
| } | |||
| return tokens; | |||
| @@ -85,7 +85,7 @@ class FlagRegister { | |||
| ~FlagRegister() = default; | |||
| template <typename T> | |||
| void AddFlag(const std::string &flag_name, T *const flag_var, T default_value = T()) const { | |||
| void AddFlag(const std::string &flag_name, T *flag_var, T default_value = T()) const { | |||
| auto iter = flag_map_.find(flag_name); | |||
| if (iter != flag_map_.end()) { | |||
| T var; | |||
| @@ -100,7 +100,7 @@ class FlagRegister { | |||
| MS_LOG(WARNING) << "Invalid GraphKernel flag: --" << iter->first << "=" << iter->second; | |||
| } | |||
| } | |||
| flag_map_.erase(iter); | |||
| (void)flag_map_.erase(iter); | |||
| } else { | |||
| *flag_var = std::move(default_value); | |||
| } | |||
| @@ -162,8 +162,9 @@ const GraphKernelFlags &GraphKernelFlags::GetInstance() { | |||
| std::pair<std::string, bool> GraphKernelFlags::GetGraphKernelContext() { | |||
| // This environment variable is deprecated. | |||
| auto flags = common::GetEnv("MS_GRAPH_KERNEL_FLAGS"); | |||
| #ifdef MSLITE_ENABLE_GRAPH_KERNEL | |||
| bool enable_context{false}; | |||
| #ifndef MSLITE_ENABLE_GRAPH_KERNEL | |||
| #else | |||
| static bool print_warning = true; | |||
| if ((!flags.empty()) && print_warning) { | |||
| print_warning = false; | |||
| @@ -176,7 +177,7 @@ std::pair<std::string, bool> GraphKernelFlags::GetGraphKernelContext() { | |||
| if (flags.empty()) { | |||
| flags = context->get_param<std::string>(MS_CTX_GRAPH_KERNEL_FLAGS); | |||
| } | |||
| enable_context = context->get_param<bool>(MS_CTX_ENABLE_GRAPH_KERNEL); | |||
| bool enable_context = context->get_param<bool>(MS_CTX_ENABLE_GRAPH_KERNEL); | |||
| #endif | |||
| return std::make_pair(flags, enable_context); | |||
| } | |||
| @@ -231,11 +232,10 @@ void GraphKernelFlags::Refresh() { | |||
| void GraphKernelFlags::RegisterFlags(std::map<std::string, std::string> *flag_map) { | |||
| FlagRegister reg(flag_map); | |||
| bool is_ascend{false}; | |||
| #ifndef MSLITE_ENABLE_GRAPH_KERNEL | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| is_ascend = (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice); | |||
| #endif | |||
| if (context_ptr != nullptr) { | |||
| is_ascend = (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice); | |||
| } | |||
| // Set opt_level first, some flags' default value depends on it. | |||
| // Default optimization level is level 2 when enable graphkernel | |||