| @@ -117,8 +117,8 @@ void FootPrint::Merge(vector<Interval> *interval_v, stack<Interval> *s) { | |||
| return; | |||
| } | |||
| void FootPrint::ConstrainedBLocks(std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, const BlockTensor &b2, | |||
| vector<Interval> *oInterval) { | |||
| void FootPrint::ConstrainedBLocks(const std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, | |||
| const BlockTensor &b2, vector<Interval> *oInterval) { | |||
| MS_EXCEPTION_IF_NULL(oInterval); | |||
| // propagate | |||
| size_t acum = m_offset_; | |||
| @@ -136,7 +136,7 @@ void FootPrint::ConstrainedBLocks(std::vector<DynamicBitSet> *constraints, const | |||
| acum += p1->size_; | |||
| } | |||
| } | |||
| bool FootPrint::findOffset(std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset) { | |||
| bool FootPrint::findOffset(const std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset) { | |||
| MS_EXCEPTION_IF_NULL(offset); | |||
| bool bretval = true; | |||
| vector<Interval> l_interval; | |||
| @@ -201,7 +201,7 @@ void FootPrint::printStats() { | |||
| MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_; | |||
| } | |||
| bool FastHeuristic::Eval(vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print, | |||
| std::vector<DynamicBitSet> *pConstraints) { | |||
| const std::vector<DynamicBitSet> *pConstraints) { | |||
| MS_EXCEPTION_IF_NULL(foot_print); | |||
| auto start = std::chrono::system_clock::now(); | |||
| @@ -125,7 +125,8 @@ class FootPrint : public std::enable_shared_from_this<FootPrint> { | |||
| std::shared_ptr<FootPrint> m_foot_print_next_; | |||
| FootPrint() | |||
| : m_foot_print_next_(NULL), | |||
| : m_solId_(0), | |||
| m_foot_print_next_(NULL), | |||
| m_offset_(0), | |||
| m_starts_({}), | |||
| m_alignment_(0), | |||
| @@ -143,8 +144,8 @@ class FootPrint : public std::enable_shared_from_this<FootPrint> { | |||
| void Destroy(); | |||
| const size_t getOffset() { return m_offset_; } | |||
| void setOffset(const size_t &offset) { m_offset_ = offset; } | |||
| bool findOffset(std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset); | |||
| void ConstrainedBLocks(std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, const BlockTensor &b2, | |||
| bool findOffset(const std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset); | |||
| void ConstrainedBLocks(const std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, const BlockTensor &b2, | |||
| vector<Interval> *oInterval_l); | |||
| void Merge(vector<Interval> *l_interval, stack<Interval> *l_merged); | |||
| bool findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset); | |||
| @@ -167,7 +168,7 @@ class FastHeuristic { | |||
| void setAlignment(const size_t &a) { m_alignment_ = a; } | |||
| void Destroy(); | |||
| bool Eval(vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print, | |||
| std::vector<DynamicBitSet> *pConstraints); | |||
| const std::vector<DynamicBitSet> *pConstraints); | |||
| private: | |||
| size_t m_alignment_; | |||
| @@ -317,7 +317,7 @@ void SomasSolverCore::SortTensors() { // need to sort the tensors for Fast Heur | |||
| void SomasSolverCore::RestoreSolution(uint32_t sol_id) { | |||
| for (auto block : block_tensors_) { | |||
| if (block.offsets_.count(sol_id) == 0) assert(0); | |||
| if (block.offsets_.count(sol_id) == 0) MS_ASSERT(0); | |||
| size_t bestOffset = block.offsets_[sol_id]; | |||
| size_t offset = bestOffset; | |||
| SomasSolverTensorDescPtr pTensor = block.m_start_tensor_; | |||
| @@ -33,7 +33,7 @@ class SomasSolverCore { | |||
| public: | |||
| /// Interface Function: receive parameters, creates the model to solve and then save the result | |||
| SomasSolverCore(const std::unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | |||
| std::vector<DynamicBitSet> *constraints) | |||
| const std::vector<DynamicBitSet> *constraints) | |||
| : tensors_(tensors), | |||
| constraints_(*constraints), | |||
| upperbound_(SIZE_MAX), | |||
| @@ -22,14 +22,15 @@ | |||
| #include "backend/optimizer/somas/somas_solver_core.h" | |||
| #include "backend/optimizer/somas/somas_solver_pre.h" | |||
| #include "debug/common.h" | |||
| namespace mindspore { | |||
| namespace somas { | |||
| Status SomasSolverPre::Solving(const session::KernelGraph *graph, | |||
| std::unordered_map<size_t, SomasSolverTensorDescPtr> *ptensors, | |||
| std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v, | |||
| bool bVerifySolution, bool ball, SortingType sorting, FittingType fitting, | |||
| AlgorithmType algorithm) { | |||
| const std::vector<DynamicBitSet> *pConstraints, | |||
| const vector<vector<size_t>> &continuous_v, bool bVerifySolution, bool ball, | |||
| SortingType sorting, FittingType fitting, AlgorithmType algorithm) { | |||
| Status retval = SUCCESS; | |||
| try { | |||
| @@ -92,7 +93,7 @@ Status SomasSolverPre::Solving(const session::KernelGraph *graph, | |||
| void SomasSolverPre::Log(const session::KernelGraph *graph, | |||
| const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | |||
| std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v) { | |||
| const std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v) { | |||
| MS_LOG(INFO) << "SomasSolver::Log Writing somas-input.txt.."; | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| @@ -103,28 +104,22 @@ void SomasSolverPre::Log(const session::KernelGraph *graph, | |||
| MS_LOG(ERROR) << "File path " << filename << " is too long."; | |||
| return; | |||
| } | |||
| char real_path[PATH_MAX] = {0}; | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| if (_fullpath(real_path, filename.c_str(), PATH_MAX) == nullptr) { | |||
| MS_LOG(DEBUG) << "dir " << filename << " does not exit."; | |||
| } | |||
| #else | |||
| if (realpath(filename.c_str(), real_path) == nullptr) { | |||
| MS_LOG(DEBUG) << "Dir " << filename << " does not exit."; | |||
| auto real_path = Common::GetRealPath(filename); | |||
| if (!real_path.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed. path=" << filename; | |||
| return; | |||
| } | |||
| #endif | |||
| std::string path_string = real_path; | |||
| ChangeFileMode(path_string, S_IRWXU); | |||
| std::ofstream ofs_1(real_path); | |||
| ChangeFileMode(real_path.value(), S_IRWXU); | |||
| std::ofstream ofs(real_path.value()); | |||
| if (!ofs_1.is_open()) { | |||
| MS_LOG(ERROR) << "Open log file '" << real_path << "' failed!"; | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(ERROR) << "Open log file '" << real_path.value() << "' failed!"; | |||
| return; | |||
| } | |||
| for (auto &t : tensors) { | |||
| ofs_1 << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl; | |||
| ofs << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl; | |||
| } | |||
| for (auto &t1 : tensors) { | |||
| @@ -132,18 +127,18 @@ void SomasSolverPre::Log(const session::KernelGraph *graph, | |||
| size_t idx1 = t1.first; | |||
| size_t idx2 = t2.first; | |||
| if ((idx1 != idx2) && (*pConstraints)[idx1].IsBitTrue(idx2) == false) { | |||
| ofs_1 << "C " << idx1 << " " << idx2 << std::endl; | |||
| ofs << "C " << idx1 << " " << idx2 << std::endl; | |||
| } | |||
| } | |||
| } | |||
| for (auto &s : continuous_v) { | |||
| ofs_1 << "S"; | |||
| ofs << "S"; | |||
| for (auto idx : s) { | |||
| ofs_1 << " " << idx; | |||
| ofs << " " << idx; | |||
| } | |||
| ofs_1 << std::endl; | |||
| ofs << std::endl; | |||
| } | |||
| ofs_1.close(); | |||
| ofs.close(); | |||
| MS_LOG(INFO) << "SomasSolver::Log Writing somas-output.txt.."; | |||
| std::string out_filename = | |||
| @@ -152,21 +147,17 @@ void SomasSolverPre::Log(const session::KernelGraph *graph, | |||
| MS_LOG(ERROR) << "File path " << out_filename << " is too long."; | |||
| return; | |||
| } | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| if (_fullpath(real_path, out_filename.c_str(), PATH_MAX) == nullptr) { | |||
| MS_LOG(DEBUG) << "dir " << out_filename << " does not exit."; | |||
| } | |||
| #else | |||
| if (realpath(out_filename.c_str(), real_path) == nullptr) { | |||
| MS_LOG(DEBUG) << "Dir " << out_filename << " does not exit."; | |||
| auto out_real_path = Common::GetRealPath(filename); | |||
| if (!out_real_path.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed. path=" << filename; | |||
| return; | |||
| } | |||
| #endif | |||
| path_string = real_path; | |||
| ChangeFileMode(path_string, S_IRWXU); | |||
| std::ofstream ofs_2(real_path); | |||
| if (!ofs_2.is_open()) { | |||
| MS_LOG(ERROR) << "Open log file '" << real_path << "' failed!"; | |||
| ChangeFileMode(out_real_path.value(), S_IRWXU); | |||
| std::ofstream ofs_out(out_real_path.value()); | |||
| if (!ofs_out.is_open()) { | |||
| MS_LOG(ERROR) << "Open log file '" << out_real_path.value() << "' failed!"; | |||
| return; | |||
| } | |||
| @@ -183,12 +174,12 @@ void SomasSolverPre::Log(const session::KernelGraph *graph, | |||
| bool size_aligned = tensor->size_ % alignment == 0; | |||
| bool offset_aligned = tensor->offset_ % alignment == 0; | |||
| ofs_2 << std::endl | |||
| << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_ | |||
| << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned | |||
| << "\toffset_aligned=" << offset_aligned; | |||
| ofs_out << std::endl | |||
| << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_ | |||
| << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned | |||
| << "\toffset_aligned=" << offset_aligned; | |||
| } | |||
| ofs_2.close(); | |||
| ofs_out.close(); | |||
| MS_LOG(INFO) << "SomasSolver::Log done"; | |||
| } | |||
| @@ -62,9 +62,11 @@ class DynamicBitSet { | |||
| size_t bit_size_; | |||
| std::vector<uint64_t> bit_; | |||
| inline size_t GetIndex(size_t index) { return index / bit_width_; } | |||
| inline size_t GetIndex(size_t index) const { return index / bit_width_; } | |||
| inline uint64_t GetBitMask(size_t index) { return (((uint64_t)0x1) << (bit_width_ - 1 - (index % bit_width_))); } | |||
| inline uint64_t GetBitMask(size_t index) const { | |||
| return (((uint64_t)0x1) << (bit_width_ - 1 - (index % bit_width_))); | |||
| } | |||
| inline void Reset(uint64_t val) { | |||
| bit_.clear(); | |||
| @@ -88,7 +90,7 @@ class DynamicBitSet { | |||
| void SetBitFalse(size_t index) { bit_[GetIndex(index)] &= (~GetBitMask(index)); } | |||
| bool IsBitTrue(size_t index) { return (bit_[GetIndex(index)] & GetBitMask(index)) != 0x0; } | |||
| bool IsBitTrue(size_t index) const { return (bit_[GetIndex(index)] & GetBitMask(index)) != 0x0; } | |||
| void Log() { | |||
| std::cout << "Start Print Bitset "; | |||
| @@ -156,14 +158,14 @@ class SomasSolverPre { | |||
| size_t GetMaxOffset() { return max_offset_; } | |||
| Status Solving(const session::KernelGraph *graph, std::unordered_map<size_t, SomasSolverTensorDescPtr> *tensors, | |||
| std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v, | |||
| const std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v, | |||
| bool bVerifySolution, // true -> Check continuous and non overlapping constraints solution | |||
| bool ball = true, // true -> run full set of heuristics, false -> run single heuristic specified | |||
| SortingType sorting = kGreaterSizeSmallerIndex, FittingType fitting = kBest, | |||
| AlgorithmType algorithm = kManyObjects); | |||
| void Log(const session::KernelGraph *graph, const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors, | |||
| std::vector<DynamicBitSet> *pConstraints_v, const vector<vector<size_t>> &continuous_v); | |||
| const std::vector<DynamicBitSet> *pConstraints_v, const vector<vector<size_t>> &continuous_v); | |||
| private: | |||
| size_t max_offset_; | |||
| @@ -646,8 +646,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const | |||
| output_type_id = AnfAlgo::GetOutputInferDataType(value_node, output_idx); | |||
| } | |||
| auto output_format = AnfAlgo::GetOutputFormat(value_node, output_idx); | |||
| DeviceAddressPtr address = nullptr; | |||
| address = CreateDeviceAddress(nullptr, node_size, output_format, output_type_id); | |||
| DeviceAddressPtr address = CreateDeviceAddress(nullptr, node_size, output_format, output_type_id); | |||
| MS_EXCEPTION_IF_NULL(address); | |||
| if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) && | |||
| !mem_manager_->MallocMemFromMemPool(address, node_size)) { | |||