You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common_utils.h 13 kB

4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. /**
  2. * Copyright 2019-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_
  17. #define MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_
  18. #include <dirent.h>
  19. #include <memory>
  20. #include <unordered_map>
  21. #include <unordered_set>
  22. #include <map>
  23. #include <string>
  24. #include <sstream>
  25. #include <algorithm>
  26. #include <vector>
  27. #include <utility>
  28. #include <nlohmann/json.hpp>
  29. #include "include/common/utils/utils.h"
  30. #include "kernel/kernel.h"
  31. #include "kernel/oplib/opinfo.h"
  32. #include "kernel/kernel_build_info.h"
  33. namespace mindspore {
  34. namespace kernel {
  35. constexpr auto kAkgKernelMeta = "kernel_meta/";
  36. constexpr auto kProcessorAiCore = "aicore";
  37. constexpr auto kProcessorAiCpu = "aicpu";
  38. constexpr auto kProcessorCuda = "cuda";
  39. constexpr auto kProcessorCpu = "cpu";
  40. constexpr auto kProcessorUnknown = "unknown";
  41. constexpr auto kJsonSuffix = ".json";
  42. constexpr auto kInfoSuffix = ".info";
  43. constexpr unsigned int AUTODIFF_COMPILE_OVERTIME = 600;
  44. const std::vector<std::string> support_devices = {"aicore", "aicpu", "cuda"};
  45. // an enum to indicate a vector or matrix alignment direction.
  46. // real_data: [1,2,3] left_align: [1,2,3,0] right_align:[0,1,2,3]
  47. namespace MatrixDiag {
  48. enum Alignment { RIGHT = 0, LEFT = 1 };
  49. static const mindspore::HashMap<std::string, std::pair<MatrixDiag::Alignment, MatrixDiag::Alignment>> AlignmentMap{
  50. {"RIGHT_LEFT", {MatrixDiag::RIGHT, MatrixDiag::LEFT}},
  51. {"LEFT_RIGHT", {MatrixDiag::LEFT, MatrixDiag::RIGHT}},
  52. {"RIGHT_RIGHT", {MatrixDiag::RIGHT, MatrixDiag::RIGHT}},
  53. {"LEFT_LEFT", {MatrixDiag::LEFT, MatrixDiag::LEFT}}};
  54. } // namespace MatrixDiag
  55. struct KernelMetaInfo {
  56. uintptr_t func_stub_;
  57. uint32_t block_dim_;
  58. };
  59. using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
  60. class KernelMeta {
  61. public:
  62. KernelMeta() = default;
  63. void Initialize();
  64. std::string Search(const std::string &kernel_name) const;
  65. bool Insert(const std::string &kernel_name, const std::string &kernel_json);
  66. std::string kernel_meta_path() const { return kernel_meta_path_; }
  67. bool initialized() const { return initialized_; }
  68. static KernelMeta *GetInstance() {
  69. static KernelMeta kernel_meta;
  70. return &kernel_meta;
  71. }
  72. ~KernelMeta() = default;
  73. private:
  74. bool initialized_ = false;
  75. std::string kernel_meta_path_;
  76. std::unordered_map<std::string, std::string> kernel_meta_map_;
  77. };
  78. class MatrixInfo {
  79. public:
  80. explicit MatrixInfo(size_t max_index, const std::vector<size_t> &matrix_shapes)
  81. : max_index_(max_index), shapes_(matrix_shapes) {
  82. current_indexes_.resize(shapes_.size(), 0);
  83. }
  84. ~MatrixInfo() = default;
  85. bool SetIndex(size_t start, size_t end) {
  86. // check data from start to end whether valid.
  87. if (start < min_index || end > max_index_ || start >= end) {
  88. return false;
  89. }
  90. // initial current indexes.
  91. int last_rank = SizeToInt(current_indexes_.size()) - 1;
  92. for (int i = last_rank; start != 0 && i >= 0; --i) {
  93. size_t position = IntToSize(i);
  94. current_indexes_[position] = start % shapes_.at(position);
  95. start = start / shapes_.at(position);
  96. }
  97. return true;
  98. }
  99. std::vector<size_t> IndexIterator() {
  100. if (is_first_iterator_) {
  101. is_first_iterator_ = false;
  102. return current_indexes_;
  103. }
  104. size_t last_rank = current_indexes_.size() - 1;
  105. current_indexes_[last_rank]++;
  106. for (size_t i = last_rank; current_indexes_.at(i) >= shapes_.at(i) && i > 0; --i) {
  107. current_indexes_[i] = 0;
  108. current_indexes_[i - 1] += 1;
  109. }
  110. is_first_iterator_ = false;
  111. return current_indexes_;
  112. }
  113. private:
  114. bool is_first_iterator_{true};
  115. size_t min_index{0};
  116. size_t max_index_{1};
  117. std::vector<size_t> shapes_;
  118. std::vector<size_t> current_indexes_;
  119. };
  120. using MatrixInfoPtr = std::shared_ptr<MatrixInfo>;
  121. std::pair<MatrixDiag::Alignment, MatrixDiag::Alignment> GetAlignments(const std::string &alignment);
  122. int CalDiagOffset(int diag_index, int max_diag_len, int inner_rows, int inner_cols,
  123. const std::pair<MatrixDiag::Alignment, MatrixDiag::Alignment> &alignment);
  124. std::string GetCompilerCachePath();
  125. bool CheckCache(const std::string &kernel_name);
  126. KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
  127. KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
  128. TypeId DtypeToTypeId(const std::string &dtypes);
  129. std::string Dtype2ShortType(const std::string &dtypes);
  130. size_t GetDtypeNbyte(const std::string &dtypes);
  131. bool GetShapeSize(const std::vector<size_t> &shape, const TypePtr &type_ptr, int64_t *size_i);
  132. bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpInfo> &op_info_ptr, Processor processor,
  133. std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list);
  134. void SaveJsonInfo(const std::string &json_name, const std::string &info, const std::string &base_path);
  135. std::string GetProcessor(const AnfNodePtr &anf_node);
  136. Processor GetProcessor(const string &processor);
  137. bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
  138. std::vector<std::pair<AnfNodePtr, size_t>> GetOutputIndex(const std::vector<AnfNodePtr> &node_list,
  139. const std::vector<AnfNodePtr> &input_list,
  140. const std::vector<AnfNodePtr> &output_list);
  141. void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list);
  142. void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list,
  143. std::vector<AnfNodePtr> *input_list, std::vector<AnfNodePtr> *output_list);
  144. void GetFuncGraphOutputNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *output_list);
  145. void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list);
  146. bool IsWeightBoundary(const AnfNodePtr &node);
  147. std::vector<int64_t> GetReduceAttrAxis(const CNodePtr &cnode);
  148. std::string GetProcessorStr(const AnfNodePtr &anf_node);
  149. Processor GetProcessorFromContext();
  150. std::string GetStrProcessorFromContext();
  151. float Scaling(size_t in_size, size_t out_size, bool align_corners);
  152. float ScaleGrid(const int x, const float scale);
  153. FusionType GetFusionTypeByName(const std::string &name);
  154. std::string GetFusionNameByType(const kernel::FusionType &type);
  155. std::vector<bool> Dec2Bin(const int64_t &mask);
  156. void FillEmptyDims(const CNodePtr &kernel_node, std::vector<int64_t> *begin, std::vector<int64_t> *end,
  157. std::vector<int64_t> *stride, std::vector<size_t> *input_shape);
  158. void ParseStrideSliceMasks(const CNodePtr &kernel_node, std::vector<int64_t> *begin, std::vector<int64_t> *end,
  159. std::vector<int64_t> *stride, const std::vector<size_t> &input_shape);
  160. struct CachedInterpolation {
  161. size_t lower;
  162. size_t upper;
  163. float lerp;
  164. };
  165. void ComputeInterpolationWeights(const size_t out_size, const size_t in_size, const float scale,
  166. CachedInterpolation *interpolation);
  167. template <typename T>
  168. inline std::string Vector2Str(const std::vector<T> &inputs) {
  169. if (!inputs.empty()) {
  170. std::ostringstream oss;
  171. (void)std::copy(inputs.begin(), inputs.end() - 1, std::ostream_iterator<T>(oss, ", "));
  172. oss << inputs.back();
  173. return oss.str();
  174. }
  175. return "";
  176. }
  177. template <typename T>
  178. inline std::string Map2Str(const std::map<std::string, T> value) {
  179. std::stringstream ss;
  180. ss << "(";
  181. for (auto it = value.begin(); it != value.end(); it++) {
  182. if (it == value.begin()) {
  183. ss << it->first;
  184. } else {
  185. ss << ", " << it->first;
  186. }
  187. }
  188. ss << ")";
  189. return ss.str();
  190. }
  191. template <typename T>
  192. inline std::string Unorderedmap2Str(const std::unordered_map<std::string, T> value) {
  193. std::stringstream ss;
  194. ss << "(";
  195. for (auto it = value.begin(); it != value.end(); it++) {
  196. if (it == value.begin()) {
  197. ss << it->first;
  198. } else {
  199. ss << ", " << it->first;
  200. }
  201. }
  202. ss << ")";
  203. return ss.str();
  204. }
  205. template <typename T>
  206. inline T ComputeLerp(T top_left, T top_right, T bottom_left, T bottom_right, T x_lerp, T y_lerp) {
  207. T top = top_left + (top_right - top_left) * x_lerp;
  208. T bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
  209. return top + (bottom - top) * y_lerp;
  210. }
  211. void CastShapeSizeToLong(const std::vector<size_t> &shape, std::vector<int64_t> *long_shape);
  212. void CheckSliceValid(const std::vector<int64_t> &start, const std::vector<int64_t> &stop,
  213. const std::vector<int64_t> &step, const std::vector<int64_t> &input_shape);
  214. size_t CalOffset(const std::vector<int64_t> &start, const std::vector<int64_t> &stop,
  215. const std::vector<int64_t> &dim_offset);
  216. std::vector<int64_t> CalDimOffset(const std::vector<int64_t> &input_shape);
  217. size_t GetCopySize(const std::vector<int64_t> &dim_offset, const std::vector<int64_t> &start,
  218. const std::vector<int64_t> &stop);
  219. size_t UnitSizeInBytes(const mindspore::TypeId &t);
  220. class KernelAttr {
  221. public:
  222. using DataType = std::pair<TypeId, std::string>;
  223. KernelAttr() = default;
  224. ~KernelAttr() = default;
  225. KernelAttr &AddInputAttr(const TypeId &ms_type, const std::string &format = kOpFormat_DEFAULT);
  226. KernelAttr &AddOutputAttr(const TypeId &ms_type, const std::string &format = kOpFormat_DEFAULT);
  227. KernelAttr &AddAllSameAttr(const bool &all_same);
  228. KernelAttr &AddOutInRef(size_t output_index, size_t input_index);
  229. const DataType &GetInputAttr(const size_t index) const { return input_type_[index]; }
  230. const DataType &GetOutputAttr(const size_t index) const { return output_type_[index]; }
  231. const bool &GetAllSame() const { return all_same_; }
  232. size_t GetInputSize() const { return input_type_.size(); }
  233. size_t GetOutputSize() const { return output_type_.size(); }
  234. const OutputInputRefMap &GetOutInRefMap() const { return out_in_ref_map_; }
  235. void SetInputAttrList(const std::vector<DataType> &addr_list);
  236. private:
  237. std::vector<DataType> input_type_;
  238. std::vector<DataType> output_type_;
  239. bool all_same_{false};
  240. // The map between kernel's output and input ref relationship.
  241. OutputInputRefMap out_in_ref_map_;
  242. };
  243. std::ostream &operator<<(std::ostream &os, KernelAttr kernel_attr);
  244. std::pair<bool, size_t> MatchKernelAttr(const KernelAttr &kernel_attr, const std::vector<KernelAttr> &attr_list);
  245. KernelAttr GetKernelAttrFromBuildInfo(const KernelBuildInfoPtr &build_info);
  246. KernelAttr GetKernelAttrFromNode(const AnfNodePtr &kernel_node);
  247. #define CHECK_KERNEL_INPUTS_NUM(actual_inputs_num, expect_inputs_num, kernel_name) \
  248. do { \
  249. if ((actual_inputs_num) != (expect_inputs_num)) { \
  250. MS_LOG(EXCEPTION) << (kernel_name) << " requires " << (expect_inputs_num) << " inputs, but got " \
  251. << (actual_inputs_num) << "."; \
  252. } \
  253. } while (0)
  254. #define CHECK_KERNEL_OUTPUTS_NUM(actual_outputs_num, expect_outputs_num, kernel_name) \
  255. do { \
  256. if ((actual_outputs_num) != (expect_outputs_num)) { \
  257. MS_LOG(EXCEPTION) << (kernel_name) << " should have " << (expect_outputs_num) << " outputs, but got " \
  258. << (actual_outputs_num) << "."; \
  259. } \
  260. } while (0)
  261. #define CHECK_KERNEL_WORKSPACE_SIZE(actual_size, expect_size, kernel_name) \
  262. do { \
  263. if ((actual_size) != (expect_size)) { \
  264. MS_LOG(EXCEPTION) << (kernel_name) << " requires " << (expect_size) << " workspace, but got " << (actual_size) \
  265. << "."; \
  266. } \
  267. } while (0)
  268. } // namespace kernel
  269. } // namespace mindspore
  270. #endif // MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_