| @@ -117,8 +117,8 @@ void FootPrint::Merge(vector<Interval> *interval_v, stack<Interval> *s) { | |||||
| return; | return; | ||||
| } | } | ||||
| void FootPrint::ConstrainedBLocks(std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, const BlockTensor &b2, | |||||
| vector<Interval> *oInterval) { | |||||
| void FootPrint::ConstrainedBLocks(const std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, | |||||
| const BlockTensor &b2, vector<Interval> *oInterval) { | |||||
| MS_EXCEPTION_IF_NULL(oInterval); | MS_EXCEPTION_IF_NULL(oInterval); | ||||
| // propagate | // propagate | ||||
| size_t acum = m_offset_; | size_t acum = m_offset_; | ||||
| @@ -136,7 +136,7 @@ void FootPrint::ConstrainedBLocks(std::vector<DynamicBitSet> *constraints, const | |||||
| acum += p1->size_; | acum += p1->size_; | ||||
| } | } | ||||
| } | } | ||||
| bool FootPrint::findOffset(std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset) { | |||||
| bool FootPrint::findOffset(const std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset) { | |||||
| MS_EXCEPTION_IF_NULL(offset); | MS_EXCEPTION_IF_NULL(offset); | ||||
| bool bretval = true; | bool bretval = true; | ||||
| vector<Interval> l_interval; | vector<Interval> l_interval; | ||||
| @@ -201,7 +201,7 @@ void FootPrint::printStats() { | |||||
| MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_; | MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_; | ||||
| } | } | ||||
| bool FastHeuristic::Eval(vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print, | bool FastHeuristic::Eval(vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print, | ||||
| std::vector<DynamicBitSet> *pConstraints) { | |||||
| const std::vector<DynamicBitSet> *pConstraints) { | |||||
| MS_EXCEPTION_IF_NULL(foot_print); | MS_EXCEPTION_IF_NULL(foot_print); | ||||
| auto start = std::chrono::system_clock::now(); | auto start = std::chrono::system_clock::now(); | ||||
| @@ -125,7 +125,8 @@ class FootPrint : public std::enable_shared_from_this<FootPrint> { | |||||
| std::shared_ptr<FootPrint> m_foot_print_next_; | std::shared_ptr<FootPrint> m_foot_print_next_; | ||||
| FootPrint() | FootPrint() | ||||
| : m_foot_print_next_(NULL), | |||||
| : m_solId_(0), | |||||
| m_foot_print_next_(NULL), | |||||
| m_offset_(0), | m_offset_(0), | ||||
| m_starts_({}), | m_starts_({}), | ||||
| m_alignment_(0), | m_alignment_(0), | ||||
| @@ -143,8 +144,8 @@ class FootPrint : public std::enable_shared_from_this<FootPrint> { | |||||
| void Destroy(); | void Destroy(); | ||||
| const size_t getOffset() { return m_offset_; } | const size_t getOffset() { return m_offset_; } | ||||
| void setOffset(const size_t &offset) { m_offset_ = offset; } | void setOffset(const size_t &offset) { m_offset_ = offset; } | ||||
| bool findOffset(std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset); | |||||
| void ConstrainedBLocks(std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, const BlockTensor &b2, | |||||
| bool findOffset(const std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset); | |||||
| void ConstrainedBLocks(const std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, const BlockTensor &b2, | |||||
| vector<Interval> *oInterval_l); | vector<Interval> *oInterval_l); | ||||
| void Merge(vector<Interval> *l_interval, stack<Interval> *l_merged); | void Merge(vector<Interval> *l_interval, stack<Interval> *l_merged); | ||||
| bool findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset); | bool findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset); | ||||
| @@ -167,7 +168,7 @@ class FastHeuristic { | |||||
| void setAlignment(const size_t &a) { m_alignment_ = a; } | void setAlignment(const size_t &a) { m_alignment_ = a; } | ||||
| void Destroy(); | void Destroy(); | ||||
| bool Eval(vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print, | bool Eval(vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print, | ||||
| std::vector<DynamicBitSet> *pConstraints); | |||||
| const std::vector<DynamicBitSet> *pConstraints); | |||||
| private: | private: | ||||
| size_t m_alignment_; | size_t m_alignment_; | ||||
| @@ -317,7 +317,7 @@ void SomasSolverCore::SortTensors() { // need to sort the tensors for Fast Heur | |||||
| void SomasSolverCore::RestoreSolution(uint32_t sol_id) { | void SomasSolverCore::RestoreSolution(uint32_t sol_id) { | ||||
| for (auto block : block_tensors_) { | for (auto block : block_tensors_) { | ||||
| if (block.offsets_.count(sol_id) == 0) assert(0); | |||||
| if (block.offsets_.count(sol_id) == 0) MS_ASSERT(0); | |||||
| size_t bestOffset = block.offsets_[sol_id]; | size_t bestOffset = block.offsets_[sol_id]; | ||||
| size_t offset = bestOffset; | size_t offset = bestOffset; | ||||
| SomasSolverTensorDescPtr pTensor = block.m_start_tensor_; | SomasSolverTensorDescPtr pTensor = block.m_start_tensor_; | ||||
| @@ -33,7 +33,7 @@ class SomasSolverCore { | |||||
| public: | public: | ||||
| /// Interface Function: receive parameters, creates the model to solve and then save the result | /// Interface Function: receive parameters, creates the model to solve and then save the result | ||||
| SomasSolverCore(const std::unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | SomasSolverCore(const std::unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | ||||
| std::vector<DynamicBitSet> *constraints) | |||||
| const std::vector<DynamicBitSet> *constraints) | |||||
| : tensors_(tensors), | : tensors_(tensors), | ||||
| constraints_(*constraints), | constraints_(*constraints), | ||||
| upperbound_(SIZE_MAX), | upperbound_(SIZE_MAX), | ||||
| @@ -22,14 +22,15 @@ | |||||
| #include "backend/optimizer/somas/somas_solver_core.h" | #include "backend/optimizer/somas/somas_solver_core.h" | ||||
| #include "backend/optimizer/somas/somas_solver_pre.h" | #include "backend/optimizer/somas/somas_solver_pre.h" | ||||
| #include "debug/common.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace somas { | namespace somas { | ||||
| Status SomasSolverPre::Solving(const session::KernelGraph *graph, | Status SomasSolverPre::Solving(const session::KernelGraph *graph, | ||||
| std::unordered_map<size_t, SomasSolverTensorDescPtr> *ptensors, | std::unordered_map<size_t, SomasSolverTensorDescPtr> *ptensors, | ||||
| std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v, | |||||
| bool bVerifySolution, bool ball, SortingType sorting, FittingType fitting, | |||||
| AlgorithmType algorithm) { | |||||
| const std::vector<DynamicBitSet> *pConstraints, | |||||
| const vector<vector<size_t>> &continuous_v, bool bVerifySolution, bool ball, | |||||
| SortingType sorting, FittingType fitting, AlgorithmType algorithm) { | |||||
| Status retval = SUCCESS; | Status retval = SUCCESS; | ||||
| try { | try { | ||||
| @@ -92,7 +93,7 @@ Status SomasSolverPre::Solving(const session::KernelGraph *graph, | |||||
| void SomasSolverPre::Log(const session::KernelGraph *graph, | void SomasSolverPre::Log(const session::KernelGraph *graph, | ||||
| const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | ||||
| std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v) { | |||||
| const std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v) { | |||||
| MS_LOG(INFO) << "SomasSolver::Log Writing somas-input.txt.."; | MS_LOG(INFO) << "SomasSolver::Log Writing somas-input.txt.."; | ||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| @@ -103,28 +104,22 @@ void SomasSolverPre::Log(const session::KernelGraph *graph, | |||||
| MS_LOG(ERROR) << "File path " << filename << " is too long."; | MS_LOG(ERROR) << "File path " << filename << " is too long."; | ||||
| return; | return; | ||||
| } | } | ||||
| char real_path[PATH_MAX] = {0}; | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| if (_fullpath(real_path, filename.c_str(), PATH_MAX) == nullptr) { | |||||
| MS_LOG(DEBUG) << "dir " << filename << " does not exit."; | |||||
| } | |||||
| #else | |||||
| if (realpath(filename.c_str(), real_path) == nullptr) { | |||||
| MS_LOG(DEBUG) << "Dir " << filename << " does not exit."; | |||||
| auto real_path = Common::GetRealPath(filename); | |||||
| if (!real_path.has_value()) { | |||||
| MS_LOG(ERROR) << "Get real path failed. path=" << filename; | |||||
| return; | |||||
| } | } | ||||
| #endif | |||||
| std::string path_string = real_path; | |||||
| ChangeFileMode(path_string, S_IRWXU); | |||||
| std::ofstream ofs_1(real_path); | |||||
| ChangeFileMode(real_path.value(), S_IRWXU); | |||||
| std::ofstream ofs(real_path.value()); | |||||
| if (!ofs_1.is_open()) { | |||||
| MS_LOG(ERROR) << "Open log file '" << real_path << "' failed!"; | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(ERROR) << "Open log file '" << real_path.value() << "' failed!"; | |||||
| return; | return; | ||||
| } | } | ||||
| for (auto &t : tensors) { | for (auto &t : tensors) { | ||||
| ofs_1 << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl; | |||||
| ofs << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl; | |||||
| } | } | ||||
| for (auto &t1 : tensors) { | for (auto &t1 : tensors) { | ||||
| @@ -132,18 +127,18 @@ void SomasSolverPre::Log(const session::KernelGraph *graph, | |||||
| size_t idx1 = t1.first; | size_t idx1 = t1.first; | ||||
| size_t idx2 = t2.first; | size_t idx2 = t2.first; | ||||
| if ((idx1 != idx2) && (*pConstraints)[idx1].IsBitTrue(idx2) == false) { | if ((idx1 != idx2) && (*pConstraints)[idx1].IsBitTrue(idx2) == false) { | ||||
| ofs_1 << "C " << idx1 << " " << idx2 << std::endl; | |||||
| ofs << "C " << idx1 << " " << idx2 << std::endl; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| for (auto &s : continuous_v) { | for (auto &s : continuous_v) { | ||||
| ofs_1 << "S"; | |||||
| ofs << "S"; | |||||
| for (auto idx : s) { | for (auto idx : s) { | ||||
| ofs_1 << " " << idx; | |||||
| ofs << " " << idx; | |||||
| } | } | ||||
| ofs_1 << std::endl; | |||||
| ofs << std::endl; | |||||
| } | } | ||||
| ofs_1.close(); | |||||
| ofs.close(); | |||||
| MS_LOG(INFO) << "SomasSolver::Log Writing somas-output.txt.."; | MS_LOG(INFO) << "SomasSolver::Log Writing somas-output.txt.."; | ||||
| std::string out_filename = | std::string out_filename = | ||||
| @@ -152,21 +147,17 @@ void SomasSolverPre::Log(const session::KernelGraph *graph, | |||||
| MS_LOG(ERROR) << "File path " << out_filename << " is too long."; | MS_LOG(ERROR) << "File path " << out_filename << " is too long."; | ||||
| return; | return; | ||||
| } | } | ||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| if (_fullpath(real_path, out_filename.c_str(), PATH_MAX) == nullptr) { | |||||
| MS_LOG(DEBUG) << "dir " << out_filename << " does not exit."; | |||||
| } | |||||
| #else | |||||
| if (realpath(out_filename.c_str(), real_path) == nullptr) { | |||||
| MS_LOG(DEBUG) << "Dir " << out_filename << " does not exit."; | |||||
| auto out_real_path = Common::GetRealPath(filename); | |||||
| if (!out_real_path.has_value()) { | |||||
| MS_LOG(ERROR) << "Get real path failed. path=" << filename; | |||||
| return; | |||||
| } | } | ||||
| #endif | |||||
| path_string = real_path; | |||||
| ChangeFileMode(path_string, S_IRWXU); | |||||
| std::ofstream ofs_2(real_path); | |||||
| if (!ofs_2.is_open()) { | |||||
| MS_LOG(ERROR) << "Open log file '" << real_path << "' failed!"; | |||||
| ChangeFileMode(out_real_path.value(), S_IRWXU); | |||||
| std::ofstream ofs_out(out_real_path.value()); | |||||
| if (!ofs_out.is_open()) { | |||||
| MS_LOG(ERROR) << "Open log file '" << out_real_path.value() << "' failed!"; | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -183,12 +174,12 @@ void SomasSolverPre::Log(const session::KernelGraph *graph, | |||||
| bool size_aligned = tensor->size_ % alignment == 0; | bool size_aligned = tensor->size_ % alignment == 0; | ||||
| bool offset_aligned = tensor->offset_ % alignment == 0; | bool offset_aligned = tensor->offset_ % alignment == 0; | ||||
| ofs_2 << std::endl | |||||
| << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_ | |||||
| << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned | |||||
| << "\toffset_aligned=" << offset_aligned; | |||||
| ofs_out << std::endl | |||||
| << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_ | |||||
| << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned | |||||
| << "\toffset_aligned=" << offset_aligned; | |||||
| } | } | ||||
| ofs_2.close(); | |||||
| ofs_out.close(); | |||||
| MS_LOG(INFO) << "SomasSolver::Log done"; | MS_LOG(INFO) << "SomasSolver::Log done"; | ||||
| } | } | ||||
| @@ -62,9 +62,11 @@ class DynamicBitSet { | |||||
| size_t bit_size_; | size_t bit_size_; | ||||
| std::vector<uint64_t> bit_; | std::vector<uint64_t> bit_; | ||||
| inline size_t GetIndex(size_t index) { return index / bit_width_; } | |||||
| inline size_t GetIndex(size_t index) const { return index / bit_width_; } | |||||
| inline uint64_t GetBitMask(size_t index) { return (((uint64_t)0x1) << (bit_width_ - 1 - (index % bit_width_))); } | |||||
| inline uint64_t GetBitMask(size_t index) const { | |||||
| return (((uint64_t)0x1) << (bit_width_ - 1 - (index % bit_width_))); | |||||
| } | |||||
| inline void Reset(uint64_t val) { | inline void Reset(uint64_t val) { | ||||
| bit_.clear(); | bit_.clear(); | ||||
| @@ -88,7 +90,7 @@ class DynamicBitSet { | |||||
| void SetBitFalse(size_t index) { bit_[GetIndex(index)] &= (~GetBitMask(index)); } | void SetBitFalse(size_t index) { bit_[GetIndex(index)] &= (~GetBitMask(index)); } | ||||
| bool IsBitTrue(size_t index) { return (bit_[GetIndex(index)] & GetBitMask(index)) != 0x0; } | |||||
| bool IsBitTrue(size_t index) const { return (bit_[GetIndex(index)] & GetBitMask(index)) != 0x0; } | |||||
| void Log() { | void Log() { | ||||
| std::cout << "Start Print Bitset "; | std::cout << "Start Print Bitset "; | ||||
| @@ -156,14 +158,14 @@ class SomasSolverPre { | |||||
| size_t GetMaxOffset() { return max_offset_; } | size_t GetMaxOffset() { return max_offset_; } | ||||
| Status Solving(const session::KernelGraph *graph, std::unordered_map<size_t, SomasSolverTensorDescPtr> *tensors, | Status Solving(const session::KernelGraph *graph, std::unordered_map<size_t, SomasSolverTensorDescPtr> *tensors, | ||||
| std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v, | |||||
| const std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v, | |||||
| bool bVerifySolution, // true -> Check continuous and non overlapping constraints solution | bool bVerifySolution, // true -> Check continuous and non overlapping constraints solution | ||||
| bool ball = true, // true -> run full set of heuristics, false -> run single heuristic specified | bool ball = true, // true -> run full set of heuristics, false -> run single heuristic specified | ||||
| SortingType sorting = kGreaterSizeSmallerIndex, FittingType fitting = kBest, | SortingType sorting = kGreaterSizeSmallerIndex, FittingType fitting = kBest, | ||||
| AlgorithmType algorithm = kManyObjects); | AlgorithmType algorithm = kManyObjects); | ||||
| void Log(const session::KernelGraph *graph, const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | void Log(const session::KernelGraph *graph, const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | ||||
| std::vector<DynamicBitSet> *pConstraints_v, const vector<vector<size_t>> &continuous_v); | |||||
| const std::vector<DynamicBitSet> *pConstraints_v, const vector<vector<size_t>> &continuous_v); | |||||
| private: | private: | ||||
| size_t max_offset_; | size_t max_offset_; | ||||
| @@ -646,8 +646,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const | |||||
| output_type_id = AnfAlgo::GetOutputInferDataType(value_node, output_idx); | output_type_id = AnfAlgo::GetOutputInferDataType(value_node, output_idx); | ||||
| } | } | ||||
| auto output_format = AnfAlgo::GetOutputFormat(value_node, output_idx); | auto output_format = AnfAlgo::GetOutputFormat(value_node, output_idx); | ||||
| DeviceAddressPtr address = nullptr; | |||||
| address = CreateDeviceAddress(nullptr, node_size, output_format, output_type_id); | |||||
| DeviceAddressPtr address = CreateDeviceAddress(nullptr, node_size, output_format, output_type_id); | |||||
| MS_EXCEPTION_IF_NULL(address); | MS_EXCEPTION_IF_NULL(address); | ||||
| if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) && | if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) && | ||||
| !mem_manager_->MallocMemFromMemPool(address, node_size)) { | !mem_manager_->MallocMemFromMemPool(address, node_size)) { | ||||