| @@ -47,7 +47,6 @@ endif() | |||||
| if (DEBUG_MODE) | if (DEBUG_MODE) | ||||
| set(CMAKE_BUILD_TYPE "Debug") | set(CMAKE_BUILD_TYPE "Debug") | ||||
| add_compile_definitions(MEM_REUSE_DEBUG) | |||||
| else() | else() | ||||
| set(CMAKE_BUILD_TYPE "Release") | set(CMAKE_BUILD_TYPE "Release") | ||||
| endif() | endif() | ||||
| @@ -2,6 +2,7 @@ file(GLOB_RECURSE _PREACTIVATE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "common/*.cc" | "common/*.cc" | ||||
| "mem_reuse/*.cc" | "mem_reuse/*.cc" | ||||
| "pass/*.cc" | "pass/*.cc" | ||||
| "somas/*.cc" | |||||
| ) | ) | ||||
| if (ENABLE_D) | if (ENABLE_D) | ||||
| @@ -0,0 +1,132 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_ | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||||
| #include "backend/optimizer/somas/somas_node.h" | |||||
| #include "backend/optimizer/somas/somas_solver_pre.h" | |||||
| #include "backend/optimizer/somas/somas_stream.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "backend/session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| class Somas { | |||||
| public: | |||||
| // Constructors/Destructors | |||||
| Somas() = default; | |||||
| Somas(const Somas &) = delete; | |||||
| Somas &operator=(const Somas &) = delete; | |||||
| ~Somas() = default; | |||||
| bool Allocate(const session::KernelGraph *graph); | |||||
| size_t GetTotalMemSize() { return mem_offset_; } | |||||
| void set_mem_base_addr(uint8_t *mem_base_addr) { mem_base_addr_ = mem_base_addr; } | |||||
| uint8_t *GetNodeOutputPtr(const AnfNodePtr &node, size_t index) const; | |||||
| uint8_t *GetNodeWorkSpacePtr(const AnfNodePtr &node, size_t index) const; | |||||
| void DumpSomasBasicIR(const string filename); | |||||
| void DumpSomasMemoryIR(const string filename); | |||||
| private: | |||||
| // Maps | |||||
| std::unordered_map<size_t, SomasTensorPtr> tensors_map_; | |||||
| std::map<void *, SomasNodePtr> nodes_map_; | |||||
| // Vectors | |||||
| std::vector<SomasNodePtr> nodes_list_; | |||||
| std::vector<SomasStreamPtr> streams_list_; | |||||
| std::vector<SomasTensorPtr> tensors_list_; | |||||
| // Stream groups | |||||
| std::vector<vector<uint32_t>> streams_groups_; | |||||
| // Solver | |||||
| std::unordered_map<size_t, SomasSolverTensorDescPtr> solver_tensor_desc_list_; | |||||
| SomasSolverPrePtr somas_solver_; | |||||
| // Constraints | |||||
| std::shared_ptr<Array> cannot_reuse_; | |||||
| // Contiguous list | |||||
| std::vector<vector<size_t>> contiguous_tensors_list_; | |||||
| // Ref lists | |||||
| std::vector<vector<size_t>> ref_node_constraints_; | |||||
| std::vector<vector<size_t>> ref_overlap_constraints_; | |||||
| // total Offset | |||||
| size_t mem_offset_; | |||||
| // getnext op output size | |||||
| size_t get_next_size_; | |||||
| // Memory base addr | |||||
| uint8_t *mem_base_addr_{nullptr}; | |||||
| // Save debug info | |||||
| bool save_graphs_{false}; | |||||
| std::string save_graphs_path_; | |||||
| // statistic info | |||||
| size_t upper_bound_{0}; | |||||
| size_t lower_bound_{0}; | |||||
| size_t workspace_total_size_{0}; | |||||
| size_t comm_input_total_size_{0}; | |||||
| size_t comm_output_total_size_{0}; | |||||
| size_t lifelong_all_total_size_{0}; | |||||
| size_t lifelong_start_total_size_{0}; | |||||
| size_t lifelong_end_total_size_{0}; | |||||
| bool InitSomasTensors(const session::KernelGraph *graph); | |||||
| void InitBasicInfo(const session::KernelGraph *graph); | |||||
| void InitSomasStreamAndNode(const session::KernelGraph *graph); | |||||
| void InitSomasOutputAndWorkspaceTensors(const session::KernelGraph *graph); | |||||
| void InitSomasInputTensors(const session::KernelGraph *graph); | |||||
| void GetNextOutputProcess(const session::KernelGraph *graph); | |||||
| void IndependentNodeOutputProcess(const session::KernelGraph *graph); | |||||
| void SummaryInputProcess(const session::KernelGraph *graph); | |||||
| void RefNodeProcess(const session::KernelGraph *graph); | |||||
| void UnReuseNodeProcess(const session::KernelGraph *graph); | |||||
| SomasTensorPtr CreateGapTensor(size_t gap_tensor_id); | |||||
| void GenContiguousList(const session::KernelGraph *graph); | |||||
| void PreprocessingConflicts(); | |||||
| void ComputeConflictPairs(); | |||||
| bool Assign(const session::KernelGraph *graph); | |||||
| void DumpOfflineIR(const string filename); | |||||
| void DumpSomasMemoryPoolInfoIR(const string filename); | |||||
| std::string GetSplitName(const string &scope_name) const; | |||||
| size_t CalcLowerBound() const; | |||||
| void GenStatisticInfo(); | |||||
| }; | |||||
| using SomasPtr = std::shared_ptr<Somas>; | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_ | |||||
| @@ -0,0 +1,46 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/optimizer/somas/somas_node.h" | |||||
| #include <algorithm> | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| void SomasNode::ComputeAncestorNodes() { | |||||
| // Fast algorithm: assuming nodes execute this function in the received topological order | |||||
| int64_t thisId = this->GetStream()->GetId(); | |||||
| for (SomasNodePtr node : ancestor_nodes_) { | |||||
| int64_t ancestorId = node->GetStream()->GetId(); | |||||
| // Map Improvement for max_ancestor_order | |||||
| if (thisId != ancestorId) { | |||||
| this->anc_stream_max_order_[ancestorId] = std::max(this->anc_stream_max_order_[ancestorId], node->GetId()); | |||||
| } | |||||
| for (SomasStreamPtr stream : node->GetStream()->ancestor_streams_) { | |||||
| int64_t streamId = stream->GetId(); | |||||
| this->anc_stream_max_order_[streamId] = | |||||
| std::max(this->anc_stream_max_order_[streamId], node->anc_stream_max_order_[streamId]); | |||||
| } | |||||
| } | |||||
| } | |||||
| void SomasNode::PresetAncestorStreams(const std::vector<SomasStreamPtr> stream_vector) { | |||||
| for (SomasStreamPtr stream : stream_vector) { | |||||
| anc_stream_max_order_[stream->GetId()] = 0; | |||||
| } | |||||
| } | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,78 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_NODE_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_NODE_H_ | |||||
| #include "backend/optimizer/somas/somas_stream.h" | |||||
| #include "backend/optimizer/somas/somas_tensor.h" | |||||
| #include <memory> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| class SomasStream; | |||||
| class SomasTensor; | |||||
| enum NodeType { kCommonNode, kCommunicationNode }; | |||||
| using SomasStreamPtr = std::shared_ptr<SomasStream>; | |||||
| using SomasTensorPtr = std::shared_ptr<SomasTensor>; | |||||
| class SomasNode { | |||||
| public: | |||||
| using SomasNodePtr = std::shared_ptr<SomasNode>; | |||||
| // Public attributes (mutated in code) | |||||
| std::string scope_full_name_; | |||||
| std::set<SomasNodePtr> | |||||
| ancestor_nodes_; // keeping only distance *one* ancestor nodes; enough to ComputeAncestorNodes() | |||||
| std::set<SomasTensorPtr> tensors_; | |||||
| std::vector<SomasTensorPtr> input_tensors_; | |||||
| std::vector<SomasTensorPtr> output_tensors_; | |||||
| std::vector<SomasTensorPtr> workspace_tensors_; | |||||
| std::unordered_map<int64_t, size_t> anc_stream_max_order_; | |||||
| // Constructors/Destructors | |||||
| SomasNode(size_t id, NodeType type, SomasStreamPtr stream) : id_(id), stream_(stream), type_(type) {} | |||||
| SomasNode(const SomasNode &) = delete; | |||||
| SomasNode &operator=(const SomasNode &) = delete; | |||||
| ~SomasNode() = default; | |||||
| // Accessors | |||||
| const size_t &GetId() { return id_; } | |||||
| SomasStreamPtr GetStream() { return stream_; } | |||||
| const NodeType &GetType() { return type_; } | |||||
| // Computing ancestors | |||||
| void PresetAncestorStreams(const std::vector<SomasStreamPtr> stream_vector); | |||||
| void ComputeAncestorNodes(); | |||||
| private: | |||||
| const size_t id_{0}; | |||||
| SomasStreamPtr const stream_; | |||||
| const NodeType type_; | |||||
| }; | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_NODE_H_ | |||||
| @@ -0,0 +1,260 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/optimizer/somas/somas_solver_alg.h" | |||||
| #include <algorithm> | |||||
| #include <stack> | |||||
| #include <utility> | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| // offset picking heuristics | |||||
| bool SmallestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) { | |||||
| return a.first < b.first || (a.first == b.first && a.second < b.second); | |||||
| } | |||||
| bool LargestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) { | |||||
| return a.first > b.first || (a.first == b.first && a.second < b.second); | |||||
| } | |||||
| bool BestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) { | |||||
| return a.second < b.second || (a.second == b.second && a.first < b.first); | |||||
| } | |||||
| bool WorstFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) { | |||||
| return a.second > b.second || (a.second == b.second && a.first < b.first); | |||||
| } | |||||
| size_t SharedObjects(FootPrint *p) { return p->Next()->getOffset(); } | |||||
| size_t SingleObject(FootPrint *p) { return SIZE_MAX; } | |||||
| bool (*g_pBranching[kNumFittingTypes])(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) = { | |||||
| BestFit, SmallestFit | |||||
| #ifdef SOMAS_DEBUG | |||||
| , | |||||
| LargestFit, WorstFit | |||||
| #endif | |||||
| }; | |||||
| size_t (*algorithm[kNumAlgorithmTypes])(FootPrint *p) = {SharedObjects, SingleObject}; | |||||
| size_t FootPrint::Result() { | |||||
| std::shared_ptr<FootPrint> foot_print = shared_from_this(); | |||||
| size_t upperbound = 0; | |||||
| uint32_t total_footprints = 0; | |||||
| while (NULL != foot_print) { | |||||
| foot_print->printStats(); | |||||
| upperbound = foot_print->getOffset(); | |||||
| foot_print = foot_print->Next(); | |||||
| total_footprints++; | |||||
| } | |||||
| MS_LOG(DEBUG) << total_footprints << " footprints allocated"; | |||||
| return upperbound; | |||||
| } | |||||
| bool FootPrint::findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset) { | |||||
| MS_EXCEPTION_IF_NULL(merged); | |||||
| MS_EXCEPTION_IF_NULL(offset); | |||||
| bool bfound = false; | |||||
| std::set<pair<size_t, size_t>, bool (*)(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b)> | |||||
| offsetcandidates(g_pBranching[m_branching_strategy_]); | |||||
| size_t gap = 0; | |||||
| Interval a; | |||||
| Interval it; | |||||
| a.ub() = algorithm[m_algorithm_](this); | |||||
| while (!(*merged).empty()) { | |||||
| it = (*merged).top(); | |||||
| (*merged).pop(); | |||||
| a.lb() = it.ub(); | |||||
| if (a.contains(block.m_size_)) { | |||||
| gap = a.ub() - a.lb() - block.m_size_; | |||||
| offsetcandidates.emplace(pair<size_t, size_t>(a.lb(), gap)); | |||||
| } | |||||
| a.ub() = it.lb(); | |||||
| } | |||||
| a.lb() = m_offset_; | |||||
| gap = a.ub() - a.lb() - block.m_size_; | |||||
| if (a.contains(block.m_size_)) offsetcandidates.emplace(pair<size_t, size_t>(a.lb(), gap)); | |||||
| if (offsetcandidates.size() > 0) { | |||||
| *offset = (*offsetcandidates.begin()).first; | |||||
| m_foot_print_next_->m_offset_ = std::max(m_foot_print_next_->m_offset_, *offset + block.m_size_); | |||||
| offsetcandidates.erase(offsetcandidates.begin()); | |||||
| bfound = true; | |||||
| } | |||||
| return bfound; | |||||
| } | |||||
| void FootPrint::Merge(vector<Interval> *interval_v, stack<Interval> *s) { | |||||
| MS_EXCEPTION_IF_NULL(s); | |||||
| sort((*interval_v).begin(), (*interval_v).end(), | |||||
| [](Interval &i1, Interval &i2) { return (i1.lb() < i2.lb()) || (i1.lb() == i2.lb() && i1.ub() < i2.ub()); }); | |||||
| (*s).push((*interval_v)[0]); | |||||
| for (size_t i = 1; i < (*interval_v).size(); i++) { | |||||
| Interval &top = (*s).top(); | |||||
| Interval &b = (*interval_v)[i]; | |||||
| if (top.ub() < b.lb()) | |||||
| (*s).push(b); | |||||
| else if (top.ub() < b.ub()) | |||||
| top.ub() = b.ub(); | |||||
| } | |||||
| return; | |||||
| } | |||||
| void FootPrint::ConstrainedBLocks(const std::shared_ptr<Array> &constraints, const BlockTensor &b1, | |||||
| const BlockTensor &b2, vector<Interval> *oInterval) { | |||||
| MS_EXCEPTION_IF_NULL(oInterval); | |||||
| // propagate | |||||
| size_t acum = m_offset_; | |||||
| for (SomasSolverTensorDescPtr p1 = b1.m_start_tensor_; NULL != p1; p1 = p1->right_) { | |||||
| for (SomasSolverTensorDescPtr p2 = b2.m_start_tensor_; NULL != p2; p2 = p2->right_) { | |||||
| if ((*constraints)(p1->index_, p2->index_) == 1) { | |||||
| Interval a = Interval(acum, acum + p1->size_); | |||||
| Interval b = Interval(p2); | |||||
| if (a.lb() < b.ub()) { | |||||
| (*oInterval).emplace_back(b); | |||||
| } | |||||
| } | |||||
| } | |||||
| acum += p1->size_; | |||||
| } | |||||
| } | |||||
| bool FootPrint::findOffset(const std::shared_ptr<Array> &constraints, const BlockTensor &block, size_t *offset) { | |||||
| MS_EXCEPTION_IF_NULL(offset); | |||||
| bool bretval = true; | |||||
| vector<Interval> l_interval; | |||||
| const size_t intervals_estimation = 1000; | |||||
| l_interval.reserve(intervals_estimation * sizeof(Interval)); | |||||
| *offset = m_offset_; | |||||
| bretval = true; | |||||
| // transform constrained tensors in non eligible intervals | |||||
| for (size_t i = 0; i < m_starts_.size(); i++) { | |||||
| if (block.Alone() && m_starts_[i]->Alone() && | |||||
| (*constraints)(block.m_start_tensor_->index_, m_starts_[i]->m_start_tensor_->index_)) { | |||||
| if (m_algorithm_ != 1 && i == 0) return false; | |||||
| Interval It = Interval(m_starts_[i]->m_start_tensor_); | |||||
| l_interval.emplace_back(It); | |||||
| } else { | |||||
| ConstrainedBLocks(constraints, block, *m_starts_[i], &l_interval); // solve multiple tensor blocks | |||||
| } | |||||
| } | |||||
| // merge non-eligible intervals and find a slot to allocate the tensor block | |||||
| if (!l_interval.empty()) { | |||||
| stack<Interval> l_mergedIntervals; | |||||
| Merge(&l_interval, &l_mergedIntervals); | |||||
| bretval = findFirst(&l_mergedIntervals, block, offset); | |||||
| } | |||||
| return bretval; | |||||
| } | |||||
| void FootPrint::addElem(BlockTensor *block, const size_t &offset) { | |||||
| if (m_foot_print_next_ == NULL) { | |||||
| m_foot_print_next_ = std::make_shared<FootPrint>(); | |||||
| size_t newoffset = m_offset_ + block->m_size_; | |||||
| m_foot_print_next_->setOffset(newoffset); | |||||
| m_foot_print_next_->setAlignment(m_alignment_); | |||||
| m_foot_print_next_->m_solId_ = m_solId_; | |||||
| m_starts_.clear(); | |||||
| MS_LOG(DEBUG) << "Creating footprint at offset: " << m_offset_; | |||||
| } | |||||
| addStart(block); | |||||
| size_t offset1 = offset; | |||||
| SomasSolverTensorDescPtr tensor = block->m_start_tensor_; | |||||
| MS_LOG(DEBUG) << "Allocating block: " << tensor->index_ << " in offset: " << offset; | |||||
| pair<uint32_t, size_t> sol_offset; | |||||
| sol_offset.first = block->m_current_sol_; | |||||
| sol_offset.second = offset; | |||||
| if (block->offsets_.count(sol_offset.first)) | |||||
| MS_LOG(WARNING) << "Warning addElem: Offset overwritten at solution " << block->m_current_sol_ << " for block " | |||||
| << block->m_start_tensor_->index_; | |||||
| block->offsets_.insert(sol_offset); | |||||
| while (tensor) { | |||||
| tensor->offset_ = offset1; | |||||
| offset1 += tensor->size_; | |||||
| MS_LOG(DEBUG) << tensor->index_ << " " << tensor->size_ << " " << tensor->offset_; | |||||
| tensor = tensor->right_; | |||||
| } | |||||
| } | |||||
| void FootPrint::printStats() { | |||||
| MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_; | |||||
| } | |||||
| bool FastHeuristic::Eval( // unordered_map<size_t, SomasSolverTensorDescPtr> &tensors_m, | |||||
| vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print, | |||||
| const std::shared_ptr<Array> &pConstraints) { | |||||
| MS_EXCEPTION_IF_NULL(foot_print); | |||||
| auto start = std::chrono::system_clock::now(); | |||||
| std::shared_ptr<FootPrint> p = foot_print; | |||||
| bool bpushed = false; | |||||
| uint32_t startscount = 0; | |||||
| size_t offset = foot_print->getOffset(); | |||||
| m_tensors_allocated_ = 0; | |||||
| SomasSolverTensorDescPtr tensor = NULL; | |||||
| for (size_t i = 0; i < (*block_tensors_v).size(); i++) { | |||||
| BlockTensor &block = (*block_tensors_v)[i]; | |||||
| if (block.m_bre_allocate_ == false) { | |||||
| offset = block.m_start_tensor_->offset_; | |||||
| pair<uint32_t, size_t> aux; | |||||
| aux.first = foot_print->m_solId_; | |||||
| aux.second = block.m_start_tensor_->offset_; | |||||
| if (block.offsets_.count(aux.first)) { | |||||
| MS_LOG(WARNING) << "Warning: Offset overwritten at solution " << aux.first << " for block " | |||||
| << block.m_start_tensor_->index_; | |||||
| } | |||||
| block.offsets_.insert(aux); | |||||
| continue; | |||||
| } | |||||
| bpushed = false; | |||||
| p = foot_print; | |||||
| block.m_current_sol_ = foot_print->m_solId_; | |||||
| while (!bpushed) { | |||||
| if (p->findOffset(pConstraints, block, &offset)) { | |||||
| p->addElem(&block, offset); | |||||
| startscount++; | |||||
| tensor = block.m_start_tensor_; | |||||
| while (tensor) { | |||||
| m_tensors_allocated_++; | |||||
| tensor = tensor->right_; | |||||
| } | |||||
| bpushed = true; | |||||
| break; | |||||
| } | |||||
| // go to the next footprint slot | |||||
| if (NULL != p->Next()) { | |||||
| p = p->Next(); | |||||
| } else if (bpushed == false) { // something went wrong | |||||
| MS_LOG(WARNING) << "Could not allocate memory for tensor: " << tensor->index_; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| } | |||||
| MS_LOG(DEBUG) | |||||
| << "\nElapsed time of Fast Heuristic search: " | |||||
| << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms"; | |||||
| return true; | |||||
| } | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,177 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_ALG_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_ALG_H_ | |||||
| #include <algorithm> | |||||
| #include <cassert> | |||||
| #include <chrono> | |||||
| #include <cstddef> | |||||
| #include <cstring> | |||||
| #include <list> | |||||
| #include <memory> | |||||
| #include <numeric> | |||||
| #include <set> | |||||
| #include <stack> | |||||
| #include <unordered_map> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "backend/optimizer/somas/somas_solver_pre.h" | |||||
| #include "utils/ms_context.h" | |||||
| using std::pair; | |||||
| using std::set; | |||||
| using std::stack; | |||||
| using std::unordered_map; | |||||
| using std::vector; | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| class Interval { | |||||
| public: | |||||
| Interval() { m_a_ = m_b_ = 0; } | |||||
| explicit Interval(SomasSolverTensorDescPtr t) { | |||||
| m_a_ = t->offset_; | |||||
| m_b_ = m_a_ + t->size_; | |||||
| } | |||||
| Interval(const size_t &a, const size_t &b) { | |||||
| m_a_ = a; | |||||
| m_b_ = b; | |||||
| } | |||||
| bool intersect(const Interval &i) { return (in(i.m_a_) || in(i.m_b_)); } | |||||
| bool in(const size_t &a) { return ((a > m_a_) && (a < m_b_)); } | |||||
| Interval intersection(const Interval &i) { | |||||
| if (m_a_ < i.m_a_) | |||||
| return Interval(m_a_, i.m_b_); | |||||
| else | |||||
| return Interval(i.m_a_, m_b_); | |||||
| } | |||||
| void merge(const Interval &i) { | |||||
| m_a_ = std::min(m_a_, i.m_a_); | |||||
| m_b_ = std::max(m_b_, i.m_b_); | |||||
| } | |||||
| size_t &lb() { return m_a_; } | |||||
| size_t &ub() { return m_b_; } | |||||
| bool contains(size_t width) { return (m_b_ - m_a_) >= width; } | |||||
| bool contains(const Interval &a) { return ((a.m_a_ >= m_a_) && (a.m_b_ <= m_b_)); } | |||||
| Interval &operator=(const Interval &in) { | |||||
| m_a_ = in.m_a_; | |||||
| m_b_ = in.m_b_; | |||||
| return *this; | |||||
| } | |||||
| private: | |||||
| size_t m_a_; | |||||
| size_t m_b_; | |||||
| }; | |||||
| class BlockTensor { | |||||
| public: | |||||
| SomasSolverTensorDescPtr m_start_tensor_; | |||||
| unordered_map<uint32_t, | |||||
| std::set<pair<size_t, size_t>, bool (*)(const pair<size_t, size_t> &, const pair<size_t, size_t> &)>> | |||||
| offsets_candidates_; | |||||
| uint32_t m_current_sol_; | |||||
| bool m_bre_allocate_; | |||||
| unordered_map<uint32_t, size_t> offsets_; | |||||
| size_t m_size_; | |||||
| BlockTensor() | |||||
| : m_start_tensor_(NULL), | |||||
| offsets_candidates_(), | |||||
| m_current_sol_(0), | |||||
| m_bre_allocate_(true), | |||||
| offsets_(), | |||||
| m_size_(0) {} | |||||
| BlockTensor &operator=(const BlockTensor &bt) { | |||||
| m_bre_allocate_ = bt.m_bre_allocate_; | |||||
| m_current_sol_ = 0; | |||||
| m_start_tensor_ = bt.m_start_tensor_; | |||||
| offsets_candidates_ = bt.offsets_candidates_; | |||||
| offsets_ = bt.offsets_; | |||||
| m_size_ = bt.m_size_; | |||||
| return *this; | |||||
| } | |||||
| void log() { | |||||
| SomasSolverTensorDescPtr p = m_start_tensor_; | |||||
| MS_LOG(DEBUG) << "Block of Tensors [" << m_start_tensor_->index_ << "]\nsize: " << m_size_ << "Tensors:"; | |||||
| while (p) { | |||||
| MS_LOG(DEBUG) << "[" << p->index_ << "," << p->size_ << "]"; | |||||
| p = p->right_; | |||||
| } | |||||
| } | |||||
| bool Alone() const { return ((NULL == m_start_tensor_->right_) && (NULL == m_start_tensor_->left_)); } | |||||
| }; | |||||
| class FootPrint : public std::enable_shared_from_this<FootPrint> { | |||||
| public: | |||||
| uint32_t m_solId_; | |||||
| FootPrint() | |||||
| : m_offset_(0), | |||||
| m_starts_(), | |||||
| m_foot_print_next_(NULL), | |||||
| m_alignment_(0), | |||||
| m_branching_strategy_(0), | |||||
| m_algorithm_(0) {} | |||||
| void setAlignment(const size_t a) { m_alignment_ = a; } | |||||
| void setBranchingStrategy(uint32_t bs) { m_branching_strategy_ = bs; } | |||||
| void setCurrentSol(uint32_t solId) { m_solId_ = solId; } | |||||
| void setAlgorithm(uint32_t algorithm) { m_algorithm_ = algorithm; } | |||||
| void addStart(BlockTensor *elemIndex) { m_starts_.push_back(elemIndex); } | |||||
| void addElem(BlockTensor *block, const size_t &offset); | |||||
| std::shared_ptr<FootPrint> &Next() { return m_foot_print_next_; } | |||||
| vector<BlockTensor *> &getStarts() { return m_starts_; } | |||||
| void Destroy(); | |||||
| const size_t getOffset() { return m_offset_; } | |||||
| void setOffset(const size_t &offset) { m_offset_ = offset; } | |||||
| bool findOffset(const std::shared_ptr<Array> &constraints, const BlockTensor &block, size_t *offset); | |||||
| void ConstrainedBLocks(const std::shared_ptr<Array> &constraints, const BlockTensor &b1, const BlockTensor &b2, | |||||
| vector<Interval> *oInterval_l); | |||||
| void Merge(vector<Interval> *l_interval, stack<Interval> *l_merged); | |||||
| bool findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset); | |||||
| size_t Result(); | |||||
| void printStats(); | |||||
| private: | |||||
| size_t m_offset_; | |||||
| vector<BlockTensor *> m_starts_; | |||||
| std::shared_ptr<FootPrint> m_foot_print_next_; | |||||
| size_t m_alignment_; | |||||
| uint32_t m_branching_strategy_; | |||||
| uint32_t m_algorithm_; | |||||
| }; | |||||
| class FastHeuristic { | |||||
| public: | |||||
| FastHeuristic() : m_alignment_(512), m_tensors_allocated_(0) {} | |||||
| void setAlignment(const size_t &a) { m_alignment_ = a; } | |||||
| void Destroy(); | |||||
| bool Eval( // unordered_map<size_t, SomasSolverTensorDescPtr> &tensors_m, | |||||
| vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print, | |||||
| const std::shared_ptr<Array> &pConstraints); | |||||
| private: | |||||
| size_t m_alignment_; | |||||
| size_t m_tensors_allocated_; | |||||
| }; | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_ALG_H_ | |||||
| @@ -0,0 +1,405 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <algorithm> | |||||
| #include <chrono> | |||||
| #include <cstdio> | |||||
| #include <ctime> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "backend/optimizer/somas/somas_solver_alg.h" | |||||
| #include "backend/optimizer/somas/somas_solver_core.h" | |||||
| #include "backend/optimizer/somas/somas_solver_pre.h" | |||||
| using std::sort; | |||||
| using std::unordered_map; | |||||
| using std::vector; | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| Status SomasSolverCore::MemoryAllocationSolver() { | |||||
| auto start = std::chrono::system_clock::now(); | |||||
| Status retval = SUCCESS; | |||||
| size_t best = SIZE_MAX; | |||||
| size_t best_timing = SIZE_MAX; | |||||
| if (all_) { // loop over all heuristics | |||||
| FittingType best_branching = kBest; | |||||
| SortingType best_sorting = kGreaterSizeSmallerIndex; | |||||
| AlgorithmType best_algorithm = kManyObjects; | |||||
| uint32_t best_sol = 0; | |||||
| size_t worst = 0; | |||||
| BuildBlocks(); | |||||
| Clean(); | |||||
| MS_LOG(INFO) << "time\tSol#\tResult\t\t\t\tAlgorithm\tSorting Strategy\tOffset Strategy"; | |||||
| for (size_t algorithm = 0; algorithm < kNumAlgorithmTypes; algorithm++) { | |||||
| algorithm_ = static_cast<AlgorithmType>(algorithm); | |||||
| for (size_t sort_strategy = 0; sort_strategy < kNumSortingTypes; sort_strategy++) { | |||||
| sort_strategy_ = static_cast<SortingType>(sort_strategy); | |||||
| SortTensors(); | |||||
| for (size_t branching_strategy = 0; branching_strategy < kNumFittingTypes; branching_strategy++) { | |||||
| branching_strategy_ = static_cast<FittingType>(branching_strategy); | |||||
| Clean(); | |||||
| MS_LOG(DEBUG) << "Timing Start " << tensors_.size() << " Tensors"; | |||||
| start = std::chrono::system_clock::now(); | |||||
| upperbound_ = FindSolutions(); | |||||
| MS_LOG(DEBUG) | |||||
| << "\nElapsed time of upper bound testing: " | |||||
| << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() | |||||
| << " ms"; | |||||
| start = std::chrono::system_clock::now(); | |||||
| if (upperbound_ > worst) { | |||||
| worst = upperbound_; | |||||
| } | |||||
| if (upperbound_ < best || upperbound_ == best) { | |||||
| best = upperbound_; | |||||
| best_algorithm = algorithm_; | |||||
| best_branching = branching_strategy_; | |||||
| best_sorting = sort_strategy_; | |||||
| best_sol = sol_count_; | |||||
| best_timing = timing_; | |||||
| } | |||||
| Verify(); | |||||
| sol_count_++; | |||||
| } | |||||
| } | |||||
| } | |||||
| upperbound_ = best; | |||||
| auto end = std::chrono::system_clock::now(); | |||||
| size_t total_time = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count(); | |||||
| const double giga = 1024. * 1024. * 1024.; | |||||
| const double cent = 100.; | |||||
| MS_LOG(INFO) << "SOMAS SOLVER RESUME:"; | |||||
| MS_LOG(INFO) << "Best Solution:[" << 1 + best_sol << "/" << sol_count_ << "] "; | |||||
| MS_LOG(INFO) << "Best result:" << best << " Bytes " << (best) / (giga) << " GB (" | |||||
| << (best - lifelongmemory_) / (giga) << " GB + " << lifelongmemory_ / (giga) | |||||
| << " GB from lifelong tensors)"; | |||||
| MS_LOG(INFO) << "Best timing:" << best_timing << " ms"; | |||||
| MS_LOG(INFO) << "Best algorithm: " << algorithm_type_[best_algorithm].c_str(); | |||||
| MS_LOG(INFO) << "Best sorting strategy: " << sorting_[best_sorting].c_str(); | |||||
| MS_LOG(INFO) << "Best offset strategy: " << branching_[best_branching].c_str(); | |||||
| MS_LOG(INFO) << "Time elapsed: " << total_time << " ms"; | |||||
| MS_LOG(INFO) << "Spread:" << static_cast<double>((worst - best) / static_cast<double>(best * cent)) << " %%"; | |||||
| best_sol_ = best_sol; | |||||
| SetBestSolution(); | |||||
| } else { | |||||
| MS_LOG(INFO) << "Algorithm strategy: " << algorithm_type_[algorithm_].c_str(); | |||||
| MS_LOG(INFO) << "Sorting strategy: " << sorting_[sort_strategy_].c_str(); | |||||
| MS_LOG(INFO) << "Offset strategy: " << branching_[branching_strategy_].c_str(); | |||||
| BuildBlocks(); | |||||
| SortTensors(); | |||||
| upperbound_ = FindSolutions(); | |||||
| Verify(); | |||||
| } | |||||
| return retval; | |||||
| } | |||||
| Status SomasSolverCore::Verify() { | |||||
| Status retval = SUCCESS; | |||||
| if (verify_) { | |||||
| MS_LOG(INFO) << "Verifying solution.."; | |||||
| if (!Verify(upperbound_)) { | |||||
| MS_LOG(WARNING) << "Solver Allocation Memory Check FAILS"; | |||||
| retval = FAILED; | |||||
| } else { | |||||
| const double giga = 1024. * 1024. * 1024.; | |||||
| MS_LOG(INFO) << "Solver Allocation Memory Check SUCCESS !!"; | |||||
| MS_LOG(INFO) << "Result: " << upperbound_ << " (" << (upperbound_) / (giga) << " GB)"; | |||||
| retval = SUCCESS; | |||||
| } | |||||
| } | |||||
| return retval; | |||||
| } | |||||
| Status SomasSolverCore::Verify(unordered_map<size_t, SomasSolverTensorDescPtr> *pTensor_map) { | |||||
| Status retval = SUCCESS; | |||||
| if (NULL == pTensor_map) return retval; | |||||
| MS_LOG(INFO) << "Verifying HQ Solution.."; | |||||
| MS_LOG(INFO) << "Checking tensors id, sizes.."; | |||||
| for (auto ptensor : *pTensor_map) { | |||||
| if (tensors_.count(ptensor.first) == 0) { | |||||
| MS_LOG(WARNING) << "HQ Tensor id " << ptensor.first << " does not exists"; | |||||
| } else if (tensors_[ptensor.first]->size_ != ptensor.second->size_) { | |||||
| size_t HQ_index = ptensor.first; | |||||
| size_t HQ_size = ptensor.second->size_; | |||||
| size_t index = ptensor.first; | |||||
| size_t size = tensors_[ptensor.first]->size_; | |||||
| MS_LOG(WARNING) << "HQ Tensor Id: " << HQ_index << " with size: " << HQ_size | |||||
| << " is different from Tensor Id: " << index << " size: " << size; | |||||
| } | |||||
| } | |||||
| MS_LOG(INFO) << "Checking HQ Solution.."; | |||||
| tensors_ = *pTensor_map; | |||||
| retval = Verify(upperbound_) == 0 ? FAILED : SUCCESS; | |||||
| return retval; | |||||
| } | |||||
| bool SomasSolverCore::Verify(const size_t &upperbound) { | |||||
| auto start = std::chrono::system_clock::now(); | |||||
| bool retval = true; | |||||
| size_t result = 0; | |||||
| SomasSolverTensorDescPtr t1; | |||||
| SomasSolverTensorDescPtr t2; | |||||
| for (auto t1_ : tensors_) { | |||||
| // check alignment | |||||
| result = std::max(result, t1_.second->size_ + t1_.second->offset_); | |||||
| for (auto t2_ : tensors_) { | |||||
| t1 = t1_.second; | |||||
| t2 = t2_.second; | |||||
| if (t1->index_ == t2->index_) continue; | |||||
| bool blifelong = (t1->lifelong_ || t2->lifelong_) && (t1->index_ != t2->index_); | |||||
| const size_t continuous = 2; | |||||
| const size_t conflict = 1; | |||||
| if ((*constraints_)(t1->index_, t2->index_) == continuous) { // continuous constraint | |||||
| // t1 must be continous to t2 | |||||
| bool bcontinuous = t1->offset_ == (t2->offset_ + t2->size_); | |||||
| if (!bcontinuous) { | |||||
| MS_LOG(WARNING) << "Continuous constraint violation in tensors " << t1->index_ << " and" << t2->index_; | |||||
| retval = false; | |||||
| } | |||||
| } else if (blifelong || (*constraints_)(t1->index_, t2->index_) == conflict) { // conflict constraint | |||||
| size_t t1_ub = t1->offset_ + t1->size_; | |||||
| size_t t2_ub = t2->offset_ + t2->size_; | |||||
| bool b_overlap_lb = ((t2->offset_ >= t1->offset_) && (t2->offset_ < t1_ub)); | |||||
| bool b_overlap_ub = ((t2_ub > t1->offset_) && (t2_ub < t1_ub)); | |||||
| bool b_overlap = b_overlap_lb || b_overlap_ub; | |||||
| bool biszerosized = t1->size_ == 0 || t2->size_ == 0; | |||||
| if (b_overlap && !biszerosized) { | |||||
| MS_LOG(WARNING) << "Non-overlap constraint violation in tensors " << t1->index_ << " and" << t2->index_; | |||||
| retval = false; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| if (upperbound != result) { | |||||
| MS_LOG(WARNING) << "ERROR Invalid upperbound result --> Footprint Result: " << upperbound_ | |||||
| << " Tensor Result: " << result + lifelongmemory_; | |||||
| retval = false; | |||||
| } | |||||
| MS_LOG(DEBUG) | |||||
| << "\nElapsed time of Fast Heuristic Check: " | |||||
| << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms"; | |||||
| return retval; | |||||
| } | |||||
| void SomasSolverCore::BuildBlocks() { | |||||
| MS_LOG(DEBUG) << "Building block of tensors"; | |||||
| lifelongmemory_ = 0; | |||||
| uint64_t tensors_block_count = 0; | |||||
| for (auto tensor : tensors_) { | |||||
| SomasSolverTensorDescPtr pTensor = tensor.second; | |||||
| if (pTensor->blocked_) continue; | |||||
| if (pTensor->lifelong_) { | |||||
| lifelongmemory_ += pTensor->size_; | |||||
| continue; | |||||
| } | |||||
| // move to the left | |||||
| while (pTensor->left_) pTensor = pTensor->left_; | |||||
| // set start tensor | |||||
| BlockTensor bTensor; | |||||
| bTensor.m_bre_allocate_ = true; | |||||
| bTensor.m_start_tensor_ = pTensor; | |||||
| // find size | |||||
| bTensor.m_size_ = 0; | |||||
| do { | |||||
| bTensor.m_size_ += pTensor->size_; | |||||
| pTensor->blocked_ = true; | |||||
| pTensor = pTensor->right_; | |||||
| tensors_block_count++; | |||||
| } while (NULL != pTensor); | |||||
| // add to the list | |||||
| this->block_tensors_.emplace_back(bTensor); | |||||
| } | |||||
| if (tensors_block_count != tensors_.size()) | |||||
| MS_LOG(INFO) << static_cast<int>(tensors_.size() - tensors_block_count) << " lifelong tensors found"; | |||||
| // for debug | |||||
| for (auto &b : block_tensors_) b.log(); | |||||
| } | |||||
| void SomasSolverCore::Clean() { | |||||
| for (auto &block : block_tensors_) { | |||||
| block.m_bre_allocate_ = true; | |||||
| auto pTensor = block.m_start_tensor_; | |||||
| while (pTensor) { | |||||
| pTensor->offset_ = 0; | |||||
| pTensor = pTensor->right_; | |||||
| } | |||||
| } | |||||
| upperbound_ = SIZE_MAX; | |||||
| } | |||||
| void SomasSolverCore::SortTensors() { // need to sort the tensors for Fast Heuristic | |||||
| MS_LOG(DEBUG) << "Sorting Blocks of tensor, strategy: " << sorting_[sort_strategy_].c_str(); | |||||
| switch (sort_strategy_) { | |||||
| case kGreaterSizeSmallerIndex: { // size(>), index(<) | |||||
| sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { | |||||
| return t1.m_size_ > t2.m_size_ || | |||||
| (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_); | |||||
| }); | |||||
| break; | |||||
| } | |||||
| #ifdef SOMAS_DEBUG | |||||
| case kGreaterSizeGreaterIndex: { // size(>), index(>) | |||||
| sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { | |||||
| return t1.m_size > t2.m_size || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_); | |||||
| }); | |||||
| break; | |||||
| } | |||||
| case kGreaterSizeSmallerConstraintsSmallerIndex: { // size(>), constraints(<), index(<) | |||||
| sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { | |||||
| return t1.m_size > t2.m_size || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ < t2.m_pStartTensor->constraints_) || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ && | |||||
| t1.m_pStartTensor->index_ < t2.m_pStartTensor->index_); | |||||
| }); | |||||
| break; | |||||
| } | |||||
| case kGreaterSizeSmallerConstraintsGreaterIndex: { // size(>), constraints(<), index(>) | |||||
| sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { | |||||
| return t1.m_size > t2.m_size || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ < t2.m_pStartTensor->constraints_) || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ && | |||||
| t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_); | |||||
| }); | |||||
| break; | |||||
| } | |||||
| case kGreaterSizeGreaterConstraintsSmallerIndex: { // size(>), constraints(>), index(<) | |||||
| sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { | |||||
| return t1.m_size > t2.m_size || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ > t2.m_pStartTensor->constraints_) || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ && | |||||
| t1.m_pStartTensor->index_ < t2.m_pStartTensor->index_); | |||||
| }); | |||||
| break; | |||||
| } | |||||
| case kGreaterSizeGreaterConstraintsGreaterIndex: { // // size(>), constraints(>), index(>) | |||||
| sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { | |||||
| return t1.m_size > t2.m_size || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ > t2.m_pStartTensor->constraints_) || | |||||
| (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ && | |||||
| t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_); | |||||
| }); | |||||
| break; | |||||
| } | |||||
| #endif | |||||
| case kNumSortingTypes: { // no sorting case | |||||
| break; | |||||
| } | |||||
| } | |||||
| // log for debug purposes | |||||
| for (auto &block : block_tensors_) block.log(); | |||||
| } | |||||
| void SomasSolverCore::RestoreSolution(uint32_t sol_id) { | |||||
| for (auto block : block_tensors_) { | |||||
| if (block.offsets_.count(sol_id) == 0) assert(0); | |||||
| size_t bestOffset = block.offsets_[sol_id]; | |||||
| size_t offset = bestOffset; | |||||
| SomasSolverTensorDescPtr pTensor = block.m_start_tensor_; | |||||
| while (pTensor) { | |||||
| pTensor->offset_ = offset; | |||||
| offset += pTensor->size_; | |||||
| pTensor = pTensor->right_; | |||||
| } | |||||
| } | |||||
| } | |||||
| size_t SomasSolverCore::Search(const std::shared_ptr<FootPrint> &pFootprint) { | |||||
| size_t result = 0; | |||||
| FastHeuristic fh; | |||||
| MS_LOG(INFO) << "Calling FastSolver Search for " << block_tensors_.size() << " tensors "; | |||||
| auto start = std::chrono::system_clock::now(); | |||||
| if (fh.Eval(&block_tensors_, pFootprint, constraints_)) { | |||||
| result = pFootprint->Result(); | |||||
| auto end = std::chrono::system_clock::now(); | |||||
| timing_ = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count(); | |||||
| if (all_) { | |||||
| const double giga = 1073741824.; | |||||
| MS_LOG(INFO) << timing_ << " ms\t" << sol_count_ + 1 << "/" | |||||
| << kNumFittingTypes * kNumAlgorithmTypes * kNumSortingTypes << "\t" << result << " Bytes (" | |||||
| << result / giga << " GB)\t" << algorithm_type_[algorithm_].c_str() << "\t" | |||||
| << sorting_[sort_strategy_].c_str() << "\t" << branching_[branching_strategy_].c_str(); | |||||
| } | |||||
| } else { | |||||
| MS_LOG(INFO) << "FastSolver could not find solution"; | |||||
| } | |||||
| if (result < upperbound_) { | |||||
| upperbound_ = result; | |||||
| best_sol_ = pFootprint->m_solId_; | |||||
| best_branching_ = branching_strategy_; | |||||
| best_sort_ = sort_strategy_; | |||||
| } | |||||
| return upperbound_; | |||||
| } | |||||
| void SomasSolverCore::AppendLifelongTensors() { | |||||
| MS_LOG(DEBUG) << "Appending lifelong tensors to solution"; | |||||
| size_t offset = upperbound_; | |||||
| for (auto t_ : tensors_) { | |||||
| SomasSolverTensorDescPtr pTensor = t_.second; | |||||
| if (pTensor->lifelong_) { | |||||
| pTensor->offset_ = offset; | |||||
| offset += pTensor->size_; | |||||
| } | |||||
| } | |||||
| upperbound_ += lifelongmemory_; | |||||
| MS_LOG(DEBUG) << lifelongmemory_ << " bytes from lifelong tensors added to solution"; | |||||
| } | |||||
| size_t SomasSolverCore::FindSolutions() { | |||||
| MS_LOG(DEBUG) << "Start allocating blocks,offset strategy: " << branching_[branching_strategy_].c_str(); | |||||
| std::shared_ptr<FootPrint> pFootprint = std::make_shared<FootPrint>(); | |||||
| pFootprint->setBranchingStrategy(branching_strategy_); | |||||
| pFootprint->setCurrentSol(sol_count_); | |||||
| pFootprint->setAlgorithm(algorithm_); | |||||
| Search(pFootprint); | |||||
| AppendLifelongTensors(); | |||||
| Destroy(pFootprint); | |||||
| return upperbound_; | |||||
| } | |||||
| void SomasSolverCore::Destroy(std::shared_ptr<FootPrint> &pFootprint) { | |||||
| while (NULL != pFootprint) { | |||||
| if (NULL != pFootprint->Next()) { | |||||
| std::shared_ptr<FootPrint> &p = pFootprint; | |||||
| pFootprint = pFootprint->Next(); | |||||
| // delete p; | |||||
| p = NULL; | |||||
| } else { | |||||
| // delete pFootprint; | |||||
| pFootprint = NULL; | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,101 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_CORE_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_CORE_H_ | |||||
| #include <algorithm> | |||||
| #include <chrono> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "backend/optimizer/somas/somas_solver_alg.h" | |||||
| #include "backend/optimizer/somas/somas_solver_pre.h" | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| class SomasSolverCore { | |||||
| public: | |||||
| /// Interface Function: receive parameters, creates the model to solve and then save the result | |||||
| SomasSolverCore(const std::unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | |||||
| const std::shared_ptr<Array> &constraints) | |||||
| : tensors_(tensors), | |||||
| constraints_(constraints), | |||||
| upperbound_(SIZE_MAX), | |||||
| timing_(0), | |||||
| lifelongmemory_(0), | |||||
| verify_(false), | |||||
| all_(true), | |||||
| best_sol_(0), | |||||
| sort_strategy_(kGreaterSizeSmallerIndex), | |||||
| branching_strategy_(kBest), | |||||
| sol_count_(0), | |||||
| algorithm_(kManyObjects) {} | |||||
| ~SomasSolverCore() = default; | |||||
| Status MemoryAllocationSolver(); | |||||
| Status Verify(); | |||||
| bool Verify(const size_t &); | |||||
| Status Verify(unordered_map<size_t, SomasSolverTensorDescPtr> *); | |||||
| void VerifySolution(const bool verify) { verify_ = verify; } | |||||
| void SortTensors(); | |||||
| void BuildBlocks(); | |||||
| void Clean(); | |||||
| void SetBestSolution() { RestoreSolution(best_sol_); } | |||||
| void RestoreSolution(uint32_t sol_id); | |||||
| void SetSortingStrategy(SortingType sort_strategy) { sort_strategy_ = sort_strategy; } | |||||
| void SetFittingStrategy(FittingType branching_strategy) { branching_strategy_ = branching_strategy; } | |||||
| void SetAlgorithmStrategy(AlgorithmType algorithm_strategy) { algorithm_ = algorithm_strategy; } | |||||
| void SetAllStrategies(bool all) { all_ = all; } | |||||
| const size_t &GetUpperbound() const { return upperbound_; } | |||||
| private: | |||||
| std::unordered_map<size_t, SomasSolverTensorDescPtr> tensors_; | |||||
| vector<BlockTensor> block_tensors_; | |||||
| std::shared_ptr<Array> constraints_; | |||||
| size_t upperbound_{0}; | |||||
| size_t timing_{0}; | |||||
| size_t lifelongmemory_{0}; | |||||
| bool verify_{false}; | |||||
| bool all_{false}; | |||||
| uint32_t best_sol_{0}; | |||||
| SortingType best_sort_; | |||||
| FittingType best_branching_; | |||||
| SortingType sort_strategy_; | |||||
| FittingType branching_strategy_; | |||||
| uint32_t sol_count_{0}; | |||||
| AlgorithmType algorithm_; | |||||
| size_t FindSolutions(); | |||||
| size_t Search(const std::shared_ptr<FootPrint> &pFootprint); | |||||
| void AppendLifelongTensors(); | |||||
| void Destroy(std::shared_ptr<FootPrint> &); | |||||
| const std::string sorting_[6] = {"size(>), index(<)", | |||||
| "size(>), index(>)", | |||||
| "size(>), constraints(<), index(<)", | |||||
| "size(>), constraints(<), index(>)", | |||||
| "size(>), constraints(>), index(<)", | |||||
| "size(>), constraints(>), index(>)"}; | |||||
| const std::string branching_[4] = {"bestfit", "smallest", "largest", "worstfit"}; | |||||
| const std::string algorithm_type_[2] = {"Shared Objects", "Single Object"}; | |||||
| }; | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_CORE_H_ | |||||
| @@ -0,0 +1,209 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <cstdio> | |||||
| #include <fstream> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include "backend/optimizer/somas/somas_solver_core.h" | |||||
| #include "backend/optimizer/somas/somas_solver_pre.h" | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| Status SomasSolverPre::Solving(const session::KernelGraph *graph, | |||||
| std::unordered_map<size_t, SomasSolverTensorDescPtr> *ptensors, | |||||
| std::shared_ptr<Array> pConstraints, const vector<vector<size_t>> &continuous_v, | |||||
| bool bVerifySolution, bool ball, SortingType sorting, FittingType fitting, | |||||
| AlgorithmType algorithm) { | |||||
| Status retval = SUCCESS; | |||||
| try { | |||||
| size_t maxIndex = 0; | |||||
| std::unordered_map<size_t, SomasSolverTensorDescPtr> &tensors = *ptensors; | |||||
| std::unordered_map<size_t, SomasSolverTensorDescPtr>::iterator max = | |||||
| std::max_element(tensors.begin(), tensors.end(), | |||||
| [](const std::pair<size_t, SomasSolverTensorDescPtr> &a, | |||||
| const std::pair<size_t, SomasSolverTensorDescPtr> &b) { return a.first < b.first; }); | |||||
| maxIndex = max->first; | |||||
| if (maxIndex > pConstraints->Rows() - 1) { | |||||
| MS_LOG(WARNING) << "ERROR: MaxIndex invalid, MaxIndex " << maxIndex << ", Rows " << pConstraints->Rows(); | |||||
| return FAILED; | |||||
| } | |||||
| MS_LOG(INFO) << "Filling in constraints matrix.."; | |||||
| uint32_t continuous_cnt = 0; | |||||
| // creating S Lists | |||||
| for (auto &aux : continuous_v) { | |||||
| for (uint32_t i = 0; i < aux.size() - 1; i++) { | |||||
| uint32_t index1 = aux[i]; | |||||
| uint32_t index2 = aux[i + 1]; | |||||
| if (NULL == tensors[index1]) { | |||||
| MS_LOG(WARNING) << "NULL tensor received in continuous constraint (tensor index " << index1 << ")"; | |||||
| return FAILED; | |||||
| } | |||||
| if (NULL == tensors[index2]) { | |||||
| MS_LOG(WARNING) << "NULL tensor received in continuous constraint (tensor index " << index2 << ")"; | |||||
| return FAILED; | |||||
| } | |||||
| const size_t continuous = 2; | |||||
| (*pConstraints)(index2, index1) = continuous; | |||||
| if (tensors[index1]->right_) | |||||
| MS_LOG(WARNING) << "Warning:tensor " << index1 | |||||
| << " already has a right tensor (id: " << tensors[index1]->right_->index_; | |||||
| if (tensors[index2]->left_) | |||||
| MS_LOG(WARNING) << "Warning:tensor " << index2 | |||||
| << " already has a left tensor (id: " << tensors[index2]->left_->index_; | |||||
| tensors[index1]->right_ = tensors[index2]; | |||||
| tensors[index2]->left_ = tensors[index1]; | |||||
| continuous_cnt++; | |||||
| } | |||||
| } | |||||
| continuous_cnt++; | |||||
| std::shared_ptr<SomasSolverCore> pSolver = std::make_shared<SomasSolverCore>(tensors, pConstraints); | |||||
| pSolver->SetAlgorithmStrategy(algorithm); | |||||
| pSolver->SetSortingStrategy(sorting); | |||||
| pSolver->SetFittingStrategy(fitting); | |||||
| pSolver->SetAllStrategies(ball); | |||||
| pSolver->VerifySolution(bVerifySolution); | |||||
| if (SUCCESS == (pSolver->MemoryAllocationSolver())) { | |||||
| max_offset_ = pSolver->GetUpperbound(); | |||||
| const double giga = 1024. * 1024. * 1024.; | |||||
| MS_LOG(INFO) << "SomasSolver::Solving SUCCESS"; | |||||
| MS_LOG(INFO) << "SomasSolver::Solving RESULT: " << max_offset_ << " (" << max_offset_ / (giga) << " GB)"; | |||||
| } | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG); | |||||
| if (save_graphs) { | |||||
| Log(graph, tensors, pConstraints, continuous_v); | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(EXCEPTION) << "SomasSolver::Solving FAILED: " << e.what(); | |||||
| retval = FAILED; | |||||
| } | |||||
| return retval; | |||||
| } | |||||
| void SomasSolverPre::Log(const session::KernelGraph *graph, | |||||
| const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | |||||
| const std::shared_ptr<Array> &pConstraints, const vector<vector<size_t>> &continuous_v) { | |||||
| MS_LOG(INFO) << "SomasSolver::Log Writing somas-input.txt.."; | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH); | |||||
| std::string filename = save_graphs_path + "/" + "somas_solver_input_" + std::to_string(graph->graph_id()) + ".ir"; | |||||
| if (filename.size() > PATH_MAX) { | |||||
| MS_LOG(ERROR) << "File path " << filename << " is too long."; | |||||
| return; | |||||
| } | |||||
| char real_path[PATH_MAX] = {0}; | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| if (_fullpath(real_path, filename.c_str(), PATH_MAX) == nullptr) { | |||||
| MS_LOG(DEBUG) << "dir " << filename << " does not exit."; | |||||
| } | |||||
| #else | |||||
| if (realpath(filename.c_str(), real_path) == nullptr) { | |||||
| MS_LOG(DEBUG) << "Dir " << filename << " does not exit."; | |||||
| } | |||||
| #endif | |||||
| std::string path_string = real_path; | |||||
| ChangeFileMode(path_string, S_IRWXU); | |||||
| std::ofstream ofs_1(real_path); | |||||
| if (!ofs_1.is_open()) { | |||||
| MS_LOG(ERROR) << "Open log file '" << real_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| for (auto &t : tensors) { | |||||
| ofs_1 << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl; | |||||
| } | |||||
| for (auto &t1 : tensors) { | |||||
| for (auto &t2 : tensors) { | |||||
| size_t idx1 = t1.first; | |||||
| size_t idx2 = t2.first; | |||||
| if ((idx1 != idx2) && (*pConstraints)(idx1, idx2) == 1) { | |||||
| ofs_1 << "C " << idx1 << " " << idx2 << std::endl; | |||||
| } | |||||
| } | |||||
| } | |||||
| for (auto &s : continuous_v) { | |||||
| ofs_1 << "S"; | |||||
| for (auto idx : s) { | |||||
| ofs_1 << " " << idx; | |||||
| } | |||||
| ofs_1 << std::endl; | |||||
| } | |||||
| ofs_1.close(); | |||||
| MS_LOG(INFO) << "SomasSolver::Log Writing somas-output.txt.."; | |||||
| std::string out_filename = | |||||
| save_graphs_path + "/" + "somas_solver_output_" + std::to_string(graph->graph_id()) + ".ir"; | |||||
| if (out_filename.size() > PATH_MAX) { | |||||
| MS_LOG(ERROR) << "File path " << out_filename << " is too long."; | |||||
| return; | |||||
| } | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| if (_fullpath(real_path, out_filename.c_str(), PATH_MAX) == nullptr) { | |||||
| MS_LOG(DEBUG) << "dir " << out_filename << " does not exit."; | |||||
| } | |||||
| #else | |||||
| if (realpath(out_filename.c_str(), real_path) == nullptr) { | |||||
| MS_LOG(DEBUG) << "Dir " << out_filename << " does not exit."; | |||||
| } | |||||
| #endif | |||||
| path_string = real_path; | |||||
| ChangeFileMode(path_string, S_IRWXU); | |||||
| std::ofstream ofs_2(real_path); | |||||
| if (!ofs_2.is_open()) { | |||||
| MS_LOG(ERROR) << "Open log file '" << real_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| for (auto &t : tensors) { | |||||
| SomasSolverTensorDescPtr tensor = t.second; | |||||
| int continuous = 0; | |||||
| if (tensor->left_ == NULL && tensor->right_ != NULL) | |||||
| continuous = 1; | |||||
| else if (tensor->left_ != NULL && tensor->right_ != NULL) | |||||
| continuous = 2; | |||||
| else if (tensor->left_ != NULL && tensor->right_ == NULL) | |||||
| continuous = 3; | |||||
| const size_t alignment = 512; | |||||
| bool size_aligned = tensor->size_ % alignment == 0; | |||||
| bool offset_aligned = tensor->offset_ % alignment == 0; | |||||
| ofs_2 << std::endl | |||||
| << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_ | |||||
| << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned | |||||
| << "\toffset_aligned=" << offset_aligned; | |||||
| } | |||||
| ofs_2.close(); | |||||
| MS_LOG(INFO) << "SomasSolver::Log done"; | |||||
| } | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,159 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_PRE_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_PRE_H_ | |||||
| #include <algorithm> | |||||
| #include <cassert> | |||||
| #include <cstddef> | |||||
| #include <cstring> | |||||
| #include <iostream> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <stack> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "backend/session/kernel_graph.h" | |||||
| using std::unordered_map; | |||||
| using std::vector; | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| enum Status { FAILED, SUCCESS }; | |||||
| enum AlgorithmType { kManyObjects = 0, kSingleObject, kNumAlgorithmTypes }; | |||||
| enum SortingType { | |||||
| kGreaterSizeSmallerIndex = 0, | |||||
| #ifdef SOMAS_DEBUG | |||||
| kGreaterSizeGreaterIndex, | |||||
| kGreaterSizeSmallerConstraintsSmallerIndex, | |||||
| kGreaterSizeSmallerConstraintsGreaterIndex, | |||||
| kGreaterSizeGreaterConstraintsSmallerIndex, | |||||
| kGreaterSizeGreaterConstraintsGreaterIndex, | |||||
| #endif | |||||
| kNumSortingTypes | |||||
| }; | |||||
| enum FittingType { | |||||
| kBest = 0, | |||||
| kSmallest, | |||||
| #ifdef SOMAS_DEBUG | |||||
| kLargest, | |||||
| kWorst, | |||||
| #endif | |||||
| kNumFittingTypes | |||||
| }; | |||||
| class Array { | |||||
| public: | |||||
| Array(const size_t &rows, const size_t &cols) : rows_(rows), cols_(cols) { | |||||
| conflicts_array_ = std::make_unique<int[]>(rows * cols); | |||||
| for (uint32_t i = 0; i < rows * cols; i++) { | |||||
| conflicts_array_[i] = 1; | |||||
| } | |||||
| } | |||||
| Array(const Array &array) : rows_(array.rows_), cols_(array.cols_) { | |||||
| conflicts_array_ = std::make_unique<int[]>(array.rows_ * array.cols_); | |||||
| for (uint32_t i = 0; i < array.rows_ * array.cols_; i++) { | |||||
| conflicts_array_[i] = array.conflicts_array_[i]; | |||||
| } | |||||
| } | |||||
| Array &operator=(const Array &array) { return *this; } | |||||
| int &operator()(const size_t &i, const size_t &j) { | |||||
| assert((i * cols_ + j) < (rows_ * cols_)); | |||||
| return conflicts_array_[i * cols_ + j]; | |||||
| } | |||||
| const size_t &Rows() { return rows_; } | |||||
| const size_t &Cols() { return cols_; } | |||||
| private: | |||||
| const size_t rows_; | |||||
| const size_t cols_; | |||||
| std::unique_ptr<int[]> conflicts_array_; | |||||
| }; | |||||
| struct SomasSolverTensorDesc { | |||||
| size_t index_; | |||||
| size_t size_; | |||||
| size_t offset_; | |||||
| bool lifelong_; | |||||
| size_t constraints_; | |||||
| using SomasSolverTensorDescPtr = std::shared_ptr<SomasSolverTensorDesc>; | |||||
| SomasSolverTensorDescPtr right_; | |||||
| SomasSolverTensorDescPtr left_; | |||||
| bool blocked_; | |||||
| SomasSolverTensorDesc() = default; | |||||
| SomasSolverTensorDesc(size_t index, size_t size, size_t offset, bool blifelong) | |||||
| : index_(index), size_(size), offset_(offset), lifelong_(blifelong) { | |||||
| constraints_ = 0; | |||||
| right_ = NULL; | |||||
| left_ = NULL; | |||||
| blocked_ = false; | |||||
| } | |||||
| void Update(size_t index, size_t size, size_t offset, bool blifelong, size_t constraints) { | |||||
| index_ = index; | |||||
| size_ = size; | |||||
| offset_ = offset; | |||||
| lifelong_ = blifelong; | |||||
| constraints_ = constraints; | |||||
| } | |||||
| friend std::ostream &operator<<(std::ostream &out, const SomasSolverTensorDescPtr n) { | |||||
| out << n->index_ << " " << n->size_ << " " << n->offset_ << "\n"; | |||||
| return out; | |||||
| } | |||||
| friend std::istream &operator>>(std::istream &in, SomasSolverTensorDescPtr n) { | |||||
| in >> n->index_ >> n->size_ >> n->offset_; | |||||
| return in; | |||||
| } | |||||
| }; | |||||
| using SomasSolverTensorDescPtr = std::shared_ptr<SomasSolverTensorDesc>; | |||||
| class SomasSolverPre { | |||||
| public: | |||||
| SomasSolverPre() = default; | |||||
| ~SomasSolverPre() = default; | |||||
| SomasSolverPre(const SomasSolverPre &) = delete; | |||||
| SomasSolverPre &operator=(const SomasSolverPre &) = delete; | |||||
| size_t GetMaxOffset() { return max_offset_; } | |||||
| Status Solving(const session::KernelGraph *graph, std::unordered_map<size_t, SomasSolverTensorDescPtr> *tensors, | |||||
| std::shared_ptr<Array> pConstraints, const vector<vector<size_t>> &continuous_v, | |||||
| bool bVerifySolution, // true -> Check continuous and non overlapping constraints solution | |||||
| bool ball = true, // true -> run full set of heuristics, false -> run single heuristic specified | |||||
| SortingType sorting = kGreaterSizeSmallerIndex, FittingType fitting = kBest, | |||||
| AlgorithmType algorithm = kManyObjects); | |||||
| void Log(const session::KernelGraph *graph, const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | |||||
| const std::shared_ptr<Array> &pConstraints_v, const vector<vector<size_t>> &continuous_v); | |||||
| private: | |||||
| size_t max_offset_; | |||||
| }; | |||||
| using SomasSolverPrePtr = std::shared_ptr<SomasSolverPre>; | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_PRE_H_ | |||||
| @@ -0,0 +1,53 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/optimizer/somas/somas_stream.h" | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| void SomasStream::ComputeAncestorStreams() { | |||||
| // (Naive) algorithm: for a given stream, compute its ancestors assuming only distance 1 ancestors are known (handles | |||||
| // cycles between streams) | |||||
| std::set<SomasStreamPtr> current_level, temp_level, already_visited; | |||||
| auto thisPtr = std::make_shared<SomasStream>(id_); | |||||
| already_visited.insert(thisPtr); | |||||
| // Initialize current level to distance 2 ancestors | |||||
| for (auto stream1 : ancestor_streams_) { | |||||
| already_visited.insert(stream1); | |||||
| for (auto stream2 : stream1->ancestor_streams_) { | |||||
| if (std::find(already_visited.begin(), already_visited.end(), stream2) == already_visited.end()) | |||||
| current_level.insert(stream2); | |||||
| } | |||||
| } | |||||
| while (!current_level.empty()) { | |||||
| // Push current level into ancestors | |||||
| for (auto stream1 : current_level) { | |||||
| ancestor_streams_.insert(stream1); | |||||
| already_visited.insert(stream1); | |||||
| // Keep next level of this ancestor | |||||
| for (auto stream2 : stream1->ancestor_streams_) { | |||||
| if (std::find(already_visited.begin(), already_visited.end(), stream2) == already_visited.end()) | |||||
| temp_level.insert(stream2); | |||||
| } | |||||
| } | |||||
| current_level.clear(); | |||||
| current_level = temp_level; | |||||
| temp_level.clear(); | |||||
| } | |||||
| } | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,61 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_STREAM_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_STREAM_H_ | |||||
| #include "backend/optimizer/somas/somas_node.h" | |||||
| #include "backend/optimizer/somas/somas_tensor.h" | |||||
| #include <memory> | |||||
| #include <set> | |||||
| #include <vector> | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| class SomasNode; | |||||
| class SomasTensor; | |||||
| using SomasTensorPtr = std::shared_ptr<SomasTensor>; | |||||
| class SomasStream { | |||||
| public: | |||||
| using SomasStreamPtr = std::shared_ptr<SomasStream>; | |||||
| // Attributes mutated in code | |||||
| std::vector<SomasTensorPtr> tensors_; // vector needed for same-stream loop in ConflictComputing() | |||||
| std::set<SomasStreamPtr> ancestor_streams_; | |||||
| std::set<SomasStreamPtr> ancestor_streams_group_; | |||||
| // Constructors/Destructors | |||||
| explicit SomasStream(int64_t id) : id_(id) {} | |||||
| SomasStream(const SomasStream &) = delete; | |||||
| SomasStream &operator=(const SomasStream &) = delete; | |||||
| ~SomasStream() = default; | |||||
| // Accessors | |||||
| const int64_t &GetId() const { return id_; } | |||||
| // Ancestor Computing | |||||
| void ComputeAncestorStreams(); // Given "ancestors at distance one" information, compute "ancestors at any distance" | |||||
| private: | |||||
| const int64_t id_{0}; | |||||
| }; | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_STREAM_H_ | |||||
| @@ -0,0 +1,64 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/optimizer/somas/somas_tensor.h" | |||||
| #include "backend/optimizer/somas/somas_node.h" | |||||
| #include "backend/optimizer/somas/somas_stream.h" | |||||
| #include "backend/optimizer/somas/somas.h" | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| SomasTensor::SomasTensor(size_t id, SomasNodePtr source_node, SomasStreamPtr source_stream, size_t real_size, | |||||
| LifeLongType lifelong_value) | |||||
| : lifelong_value_(lifelong_value), | |||||
| type_(kUnknown), | |||||
| offset_(0), | |||||
| id_(id), | |||||
| source_node_(source_node), | |||||
| source_stream_(source_stream), | |||||
| original_size_(real_size) { | |||||
| const size_t alignment = 512; | |||||
| const size_t alignment_complement = 31; | |||||
| aligned_size_ = (real_size > 0) ? (real_size + alignment + alignment_complement) / alignment * alignment : 0; | |||||
| solver_tensor_desc_ = std::make_shared<SomasSolverTensorDesc>(id_, aligned_size_, offset_, false); | |||||
| ref_overlap_ = false; | |||||
| between_streams_ = false; | |||||
| num_constraints_ = 0; | |||||
| } | |||||
| SomasSolverTensorDescPtr SomasTensor::GetSolverTensorDesc() { | |||||
| if (type_ == kGap) { // ignore lifelong_ value for gaps given to solver, and pass with original_size_ | |||||
| solver_tensor_desc_->Update(id_, original_size_, offset_, false, num_constraints_); | |||||
| } else { | |||||
| solver_tensor_desc_->Update(id_, aligned_size_, offset_, lifelong_value_ == kLifeLongGraphAll, num_constraints_); | |||||
| } | |||||
| if (aligned_size_ == 0) { // ignore zero-size tensors for solver | |||||
| return nullptr; | |||||
| } else { | |||||
| return solver_tensor_desc_; | |||||
| } | |||||
| } | |||||
| void SomasTensor::ComputeMaxDestinationId() { | |||||
| for (SomasStreamPtr stream : destinationStreams_) max_destination_id_[stream] = 0; | |||||
| for (SomasNodePtr node : destinations_) | |||||
| if (node->GetId() > max_destination_id_[node->GetStream()]) max_destination_id_[node->GetStream()] = node->GetId(); | |||||
| } | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,129 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_TENSOR_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_TENSOR_H_ | |||||
| #include <memory> | |||||
| #include <set> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "backend/optimizer/somas/somas_node.h" | |||||
| #include "backend/optimizer/somas/somas_solver_pre.h" | |||||
| #include "backend/optimizer/somas/somas_stream.h" | |||||
| namespace mindspore { | |||||
| namespace somas { | |||||
| class SomasNode; | |||||
| class SomasStream; | |||||
| // Lifetime type | |||||
| struct Lifetime { | |||||
| size_t start_; | |||||
| size_t end_; | |||||
| explicit Lifetime(size_t start = 0, size_t end = 0) : start_(start), end_(end) {} | |||||
| }; | |||||
| using lifetime_t = struct Lifetime; | |||||
| // Tensor type | |||||
| enum TensorType { | |||||
| kCommon, | |||||
| kOutputOnly, | |||||
| kWorkspace, | |||||
| kGetNextOutput, | |||||
| kSummaryInput, | |||||
| kRefNodeInput, | |||||
| kRefNodeOutput, | |||||
| kGap, | |||||
| kUnknown | |||||
| }; | |||||
| enum LifeLongType { | |||||
| kLifeLongNone, // life time is from tensor start to tensor end | |||||
| kLifeLongGraphAll, // life time is from graph start to graph end | |||||
| kLifeLongGraphStart, // life time is from graph start to tensor end | |||||
| kLifeLongGraphEnd // life time is from tensor start to graph end | |||||
| }; | |||||
| using SomasNodePtr = std::shared_ptr<SomasNode>; | |||||
| using SomasStreamPtr = std::shared_ptr<SomasStream>; | |||||
| class SomasTensor { | |||||
| public: | |||||
| using SomasTensorPtr = std::shared_ptr<SomasTensor>; | |||||
| size_t aligned_size_{0}; | |||||
| LifeLongType lifelong_value_; | |||||
| bool ref_overlap_; | |||||
| bool between_streams_; | |||||
| lifetime_t lifetime_; | |||||
| TensorType type_; | |||||
| size_t offset_{0}; | |||||
| size_t num_constraints_{0}; | |||||
| std::set<SomasNodePtr> destinations_; | |||||
| std::set<SomasStreamPtr> destinationStreams_; | |||||
| unordered_map<SomasStreamPtr, size_t> max_destination_id_; | |||||
| // Constructors/Destructors | |||||
| explicit SomasTensor(size_t id, SomasNodePtr source_node, SomasStreamPtr source_stream, size_t real_size, | |||||
| LifeLongType lifelong_value = kLifeLongNone); | |||||
| SomasTensor(const SomasTensor &) = delete; | |||||
| SomasTensor &operator=(const SomasTensor &) = delete; | |||||
| ~SomasTensor() = default; | |||||
| // Accessors | |||||
| const size_t &GetId() { return id_; } | |||||
| SomasNodePtr GetSourceNode() const { return source_node_; } | |||||
| SomasStreamPtr GetSourceStream() const { return source_stream_; } | |||||
| const size_t &GetOriginalSize() { return original_size_; } | |||||
| const size_t &GetAlignedSize() { return aligned_size_; } | |||||
| bool IsLifelong() { return lifelong_value_ == kLifeLongGraphAll; } | |||||
| bool IsWorkspace() { return type_ == kWorkspace; } | |||||
| bool IsOutputOnly() { return type_ == kOutputOnly; } | |||||
| size_t GetOffset() { return offset_; } | |||||
| bool IsBetweenStreams() { return between_streams_; } | |||||
| bool IsSemiLifelongStart() { return lifelong_value_ == kLifeLongGraphStart; } | |||||
| bool IsSemiLifelongEnd() { return lifelong_value_ == kLifeLongGraphEnd; } | |||||
| bool IsRefOverlap() { return ref_overlap_; } | |||||
| bool IsGap() { return type_ == kGap; } | |||||
| // Computing functions | |||||
| void SetOffset(size_t start_offset = 0) { | |||||
| if (aligned_size_ != 0 && type_ != kGetNextOutput) { | |||||
| offset_ = start_offset + solver_tensor_desc_->offset_; | |||||
| } | |||||
| } | |||||
| SomasSolverTensorDescPtr GetSolverTensorDesc(); | |||||
| void ComputeMaxDestinationId(); | |||||
| private: | |||||
| const size_t id_{0}; | |||||
| const SomasNodePtr source_node_; | |||||
| SomasStreamPtr const source_stream_; | |||||
| const size_t original_size_{0}; | |||||
| SomasSolverTensorDescPtr solver_tensor_desc_; | |||||
| }; | |||||
| } // namespace somas | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_TENSOR_H_ | |||||
| @@ -1,3 +1,4 @@ | |||||
| /** | /** | ||||
| * Copyright 2019 Huawei Technologies Co., Ltd | * Copyright 2019 Huawei Technologies Co., Ltd | ||||
| * | * | ||||
| @@ -15,21 +16,21 @@ | |||||
| */ | */ | ||||
| #include "runtime/device/kernel_runtime.h" | #include "runtime/device/kernel_runtime.h" | ||||
| #include <vector> | |||||
| #include <utility> | |||||
| #include <numeric> | |||||
| #include <functional> | #include <functional> | ||||
| #include "utils/ms_utils.h" | |||||
| #include "common/trans.h" | |||||
| #include "utils/utils.h" | |||||
| #include "utils/ms_context.h" | |||||
| #include "frontend/operator/ops.h" | |||||
| #include "backend/session/kernel_graph.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include <numeric> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "backend/optimizer/common/helper.h" | #include "backend/optimizer/common/helper.h" | ||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "backend/session/kernel_graph.h" | |||||
| #include "common/trans.h" | |||||
| #include "debug/data_dump/dump_json_parser.h" | #include "debug/data_dump/dump_json_parser.h" | ||||
| #include "frontend/operator/ops.h" | |||||
| #include "ir/value.h" | #include "ir/value.h" | ||||
| #include "utils/ms_context.h" | |||||
| #include "utils/ms_utils.h" | |||||
| #include "utils/shape_utils.h" | #include "utils/shape_utils.h" | ||||
| #include "utils/utils.h" | |||||
| using mindspore::kernel::Address; | using mindspore::kernel::Address; | ||||
| using mindspore::kernel::AddressPtr; | using mindspore::kernel::AddressPtr; | ||||
| @@ -440,6 +441,9 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNode | |||||
| if (type == kReuseDynamicMem) { | if (type == kReuseDynamicMem) { | ||||
| // reuse communication op's all outputs' memory | // reuse communication op's all outputs' memory | ||||
| type = kReuseDynamicCommMem; | type = kReuseDynamicCommMem; | ||||
| } | |||||
| if (type == kReuseDynamicCommMem || type == kSomasReuseDynamicMem) { | |||||
| bool not_reuse = KernelMemNotReuse(node); | bool not_reuse = KernelMemNotReuse(node); | ||||
| if (not_reuse) { | if (not_reuse) { | ||||
| type = kDynamicMem; | type = kDynamicMem; | ||||
| @@ -504,7 +508,7 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP | |||||
| return; | return; | ||||
| } | } | ||||
| if (type == kReuseDynamicMem) { | |||||
| if (type == kReuseDynamicMem || type == kSomasReuseDynamicMem) { | |||||
| bool not_reuse = KernelMemNotReuse(node); | bool not_reuse = KernelMemNotReuse(node); | ||||
| if (not_reuse) { | if (not_reuse) { | ||||
| type = kDynamicMem; | type = kDynamicMem; | ||||
| @@ -530,13 +534,13 @@ void KernelRuntime::AssignNodeOutputMem(MemType type, const AnfNodePtr &node, in | |||||
| if (node->isa<CNode>()) { | if (node->isa<CNode>()) { | ||||
| bool independent = AnfAlgo::IsIndependentNode(node->cast<CNodePtr>()); | bool independent = AnfAlgo::IsIndependentNode(node->cast<CNodePtr>()); | ||||
| if (independent && type == kReuseDynamicMem) { | |||||
| MS_LOG(INFO) << "Independent disable mem_reuse"; | |||||
| if (independent && (type == kReuseDynamicMem || type == kSomasReuseDynamicMem)) { | |||||
| MS_LOG(INFO) << "Independent node " << node->fullname_with_scope() << " disable memory reuse"; | |||||
| type = kDynamicMem; | type = kDynamicMem; | ||||
| } | } | ||||
| } | } | ||||
| if (type == kReuseDynamicMem) { | |||||
| if (type == kReuseDynamicMem || type == kSomasReuseDynamicMem) { | |||||
| bool not_reuse = KernelMemNotReuse(node); | bool not_reuse = KernelMemNotReuse(node); | ||||
| if (not_reuse) { | if (not_reuse) { | ||||
| type = kDynamicMem; | type = kDynamicMem; | ||||
| @@ -671,8 +675,13 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) { | |||||
| if (is_enable_mem_reuse) { | if (is_enable_mem_reuse) { | ||||
| MS_LOG(INFO) << "Memory Reuse is enable..."; | MS_LOG(INFO) << "Memory Reuse is enable..."; | ||||
| #ifdef MEM_REUSE_DEBUG | |||||
| mem_manager_->MallocReusedDynamicMem(graph); | mem_manager_->MallocReusedDynamicMem(graph); | ||||
| mem_type = kReuseDynamicMem; | mem_type = kReuseDynamicMem; | ||||
| #else | |||||
| mem_manager_->MallocSomasDynamicMem(graph); | |||||
| mem_type = kSomasReuseDynamicMem; | |||||
| #endif | |||||
| } else { | } else { | ||||
| MS_LOG(INFO) << "Memory Reuse is disable..."; | MS_LOG(INFO) << "Memory Reuse is disable..."; | ||||
| } | } | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "runtime/device/memory_manager.h" | #include "runtime/device/memory_manager.h" | ||||
| #include <string> | |||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "utils/ms_context.h" | #include "utils/ms_context.h" | ||||
| using mindspore::memreuse::BestFitMemReuse; | using mindspore::memreuse::BestFitMemReuse; | ||||
| @@ -47,6 +48,40 @@ void MemoryManager::MallocReusedDynamicMem(const session::KernelGraph *graph) { | |||||
| mem_reuse_util_ptr_->set_mem_base(base_ptr); | mem_reuse_util_ptr_->set_mem_base(base_ptr); | ||||
| } | } | ||||
| void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) { | |||||
| MS_EXCEPTION_IF_NULL(graph); | |||||
| SomasPtr somas_reuse_util_ptr = std::make_shared<somas::Somas>(); | |||||
| MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr); | |||||
| somas_reuse_util_ptr_ = somas_reuse_util_ptr; | |||||
| if (!(somas_reuse_util_ptr->Allocate(graph))) { | |||||
| MS_LOG(EXCEPTION) << "Somas Allocate Failed."; | |||||
| } | |||||
| size_t total_allocated_size = somas_reuse_util_ptr->GetTotalMemSize(); | |||||
| MS_LOG(INFO) << "Graph " << graph->graph_id() << ": TotalSomasReuseDynamicSize [" << total_allocated_size << "]"; | |||||
| auto base_ptr = MallocDynamicMem(total_allocated_size, false); | |||||
| MS_LOG(INFO) << "Somas Reuse Memory Base Address [" << static_cast<void *>(base_ptr) << "], End Address [" | |||||
| << static_cast<void *>(base_ptr + total_allocated_size) << "]"; | |||||
| somas_reuse_util_ptr->set_mem_base_addr(base_ptr); | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG); | |||||
| auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH); | |||||
| if (save_graphs_path.empty()) { | |||||
| save_graphs_path = "."; | |||||
| } | |||||
| if (save_graphs) { | |||||
| std::string file_path = | |||||
| save_graphs_path + "/" + "somas_after_allocate_" + std::to_string(graph->graph_id()) + ".ir"; | |||||
| somas_reuse_util_ptr_->DumpSomasBasicIR(file_path); | |||||
| std::string mem_file_path = save_graphs_path + "/" + "somas_mem_info_" + std::to_string(graph->graph_id()) + ".ir"; | |||||
| somas_reuse_util_ptr_->DumpSomasMemoryIR(mem_file_path); | |||||
| } | |||||
| } | |||||
| uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size, | uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size, | ||||
| const DeviceAddressPtr &address) { | const DeviceAddressPtr &address) { | ||||
| MS_EXCEPTION_IF_NULL(node); | MS_EXCEPTION_IF_NULL(node); | ||||
| @@ -68,6 +103,9 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, Me | |||||
| } else if (type == kReuseDynamicCommMem) { | } else if (type == kReuseDynamicCommMem) { | ||||
| MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_); | MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_); | ||||
| ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); | ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); | ||||
| } else if (type == kSomasReuseDynamicMem) { | |||||
| MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_); | |||||
| ptr = somas_reuse_util_ptr_->GetNodeOutputPtr(node, index); | |||||
| } else { | } else { | ||||
| ptr = MallocDynamicMem(size, communication_mem); | ptr = MallocDynamicMem(size, communication_mem); | ||||
| } | } | ||||
| @@ -83,6 +121,9 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, Me | |||||
| } else if (type == kReuseDynamicMem) { | } else if (type == kReuseDynamicMem) { | ||||
| MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_); | MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_); | ||||
| ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); | ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); | ||||
| } else if (type == kSomasReuseDynamicMem) { | |||||
| MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_); | |||||
| ptr = somas_reuse_util_ptr_->GetNodeOutputPtr(node, index); | |||||
| } | } | ||||
| address->ptr_ = ptr; | address->ptr_ = ptr; | ||||
| return ptr; | return ptr; | ||||
| @@ -92,6 +133,9 @@ uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, | |||||
| if (type == kReuseDynamicMem) { | if (type == kReuseDynamicMem) { | ||||
| MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_); | MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_); | ||||
| return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index); | return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index); | ||||
| } else if (type == kSomasReuseDynamicMem) { | |||||
| MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_); | |||||
| return somas_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index); | |||||
| } | } | ||||
| return MallocDynamicMem(size, false); | return MallocDynamicMem(size, false); | ||||
| } | } | ||||
| @@ -17,16 +17,18 @@ | |||||
| #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_ | #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_ | ||||
| #define MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_ | #define MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_ | ||||
| #include <memory> | #include <memory> | ||||
| #include <vector> | |||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | |||||
| #include "backend/optimizer/mem_reuse/mem_reuse.h" | #include "backend/optimizer/mem_reuse/mem_reuse.h" | ||||
| #include "backend/optimizer/mem_reuse/mem_reuse_allocator.h" | #include "backend/optimizer/mem_reuse/mem_reuse_allocator.h" | ||||
| #include "backend/optimizer/somas/somas.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| enum MemType { kStaticMem, kDynamicMem, kReuseDynamicMem, kReuseDynamicCommMem }; | |||||
| enum MemType { kStaticMem, kDynamicMem, kReuseDynamicMem, kSomasReuseDynamicMem, kReuseDynamicCommMem }; | |||||
| const int kGetAllOuts = -1; | const int kGetAllOuts = -1; | ||||
| const uint64_t kMemAlignSize = 512; | const uint64_t kMemAlignSize = 512; | ||||
| using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr; | using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr; | ||||
| using SomasPtr = mindspore::somas::SomasPtr; | |||||
| class MemoryManager { | class MemoryManager { | ||||
| public: | public: | ||||
| @@ -42,6 +44,7 @@ class MemoryManager { | |||||
| virtual void ClearGlobalIdleMem() {} | virtual void ClearGlobalIdleMem() {} | ||||
| void MallocReusedDynamicMem(const session::KernelGraph *graph); | void MallocReusedDynamicMem(const session::KernelGraph *graph); | ||||
| void MallocSomasDynamicMem(const session::KernelGraph *graph); | |||||
| uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size, | uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size, | ||||
| const DeviceAddressPtr &address); | const DeviceAddressPtr &address); | ||||
| uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size); | uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size); | ||||
| @@ -68,6 +71,7 @@ class MemoryManager { | |||||
| size_t total_static_size_ = 0; | size_t total_static_size_ = 0; | ||||
| size_t total_dynamic_size_ = 0; | size_t total_dynamic_size_ = 0; | ||||
| MemReuseUtilPtr mem_reuse_util_ptr_{nullptr}; | MemReuseUtilPtr mem_reuse_util_ptr_{nullptr}; | ||||
| SomasPtr somas_reuse_util_ptr_{nullptr}; | |||||
| }; | }; | ||||
| } // namespace device | } // namespace device | ||||
| } // namespace mindspore | } // namespace mindspore | ||||