You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common_utils.h 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_COMMON_UTILS_H_
  17. #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_COMMON_UTILS_H_
  18. #include <dirent.h>
  19. #include <memory>
  20. #include <unordered_map>
  21. #include <unordered_set>
  22. #include <map>
  23. #include <string>
  24. #include <vector>
  25. #include <utility>
  26. #include <nlohmann/json.hpp>
  27. #include "backend/kernel_compiler/kernel.h"
  28. #include "backend/kernel_compiler/oplib/opinfo.h"
  29. #include "backend/kernel_compiler/kernel_build_info.h"
  30. namespace mindspore {
  31. namespace kernel {
  32. constexpr auto kCceKernelMeta = "./kernel_meta/";
  33. constexpr auto kGpuKernelMeta = "./cuda_meta";
  34. constexpr auto kProcessorAiCore = "aicore";
  35. constexpr auto kProcessorAiCpu = "aicpu";
  36. constexpr auto kProcessorCuda = "cuda";
  37. constexpr auto kJsonSuffix = ".json";
  38. constexpr auto kInfoSuffix = ".info";
  39. constexpr unsigned int AUTODIFF_COMPILE_OVERTIME = 600;
  40. constexpr auto kArgDataformat = "data_format";
  41. const std::vector<std::string> support_devices = {"aicore", "aicpu", "cuda"};
  42. struct KernelMetaInfo {
  43. uintptr_t func_stub_;
  44. uint32_t block_dim_;
  45. };
  46. using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
  47. class KernelMeta {
  48. public:
  49. KernelMeta() = default;
  50. void Initialize(int pid);
  51. void RemoveKernelCache();
  52. std::string Search(const std::string &kernel_name) const;
  53. bool Insert(const std::string &kernel_name, const std::string &kernel_json);
  54. std::string GetKernelMetaPath() { return kernel_meta_path_; }
  55. static KernelMeta *GetInstance() {
  56. static KernelMeta kernel_meta;
  57. return &kernel_meta;
  58. }
  59. ~KernelMeta() = default;
  60. private:
  61. bool initialized_ = false;
  62. std::string kernel_meta_path_;
  63. std::unordered_map<std::string, std::string> kernel_meta_map_;
  64. };
  65. struct SparseGradient {
  66. float *value_{nullptr};
  67. int *indices_{nullptr};
  68. size_t indices_size_{0};
  69. };
  70. struct ReduceSparseGradientParam {
  71. SparseGradient *input_grad_{nullptr};
  72. SparseGradient *workspace_grad_{nullptr};
  73. SparseGradient *output_grad_{nullptr};
  74. size_t max_index_{0};
  75. size_t value_stride_{0};
  76. bool use_sort_reduce_{false};
  77. };
  78. struct MultiThreadComputeParams {
  79. float *var_;
  80. float *accum_;
  81. float *linear_;
  82. float *m_;
  83. float *m_t_;
  84. float *v_;
  85. float lr_;
  86. float l1_;
  87. float l2_;
  88. float lr_power_;
  89. float beta1_;
  90. float beta2_;
  91. float epsilon_;
  92. SparseGradient sparse_grad_;
  93. size_t var_first_dim_size_;
  94. size_t var_outer_dim_size_;
  95. bool use_nesterov_;
  96. };
  97. using MultiThreadComputeFunc = std::function<void(MultiThreadComputeParams *param, size_t start, size_t end)>;
  98. bool CheckCache(const std::string &kernel_name);
  99. KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
  100. KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
  101. TypeId DtypeToTypeId(const std::string &dtypes);
  102. std::string Dtype2ShortType(const std::string &dtypes);
  103. std::string TypeId2String(TypeId type_id);
  104. size_t GetDtypeNbyte(const std::string &dtypes);
  105. bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpInfo> &op_info_ptr, Processor processor,
  106. std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list);
  107. void SaveJsonInfo(const std::string &json_name, const std::string &info);
  108. std::string GetProcessor(const AnfNodePtr &anf_node);
  109. bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
  110. int Sign(float x);
  111. std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
  112. std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
  113. const std::vector<AnfNodePtr> &input_list);
  114. std::vector<std::pair<AnfNodePtr, size_t>> GetOutputIndex(const std::vector<AnfNodePtr> &node_list,
  115. const std::vector<AnfNodePtr> &input_list,
  116. const std::vector<AnfNodePtr> &output_list);
  117. void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list,
  118. std::vector<AnfNodePtr> *input_list, std::vector<AnfNodePtr> *output_list);
  119. void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list);
  120. bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json);
  121. void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list);
  122. bool IsWeightBoundary(const AnfNodePtr &node);
  123. void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
  124. size_t total_compute_size);
  125. void BucketReduceSparseGradient(const ReduceSparseGradientParam &param);
  126. std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
  127. } // namespace kernel
  128. } // namespace mindspore
  129. #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_COMMON_UTILS_H_