You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

kernel.h 7.8 kB

4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. /**
  2. * Copyright 2019-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_
  17. #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_
  18. #include <vector>
  19. #include <string>
  20. #include <memory>
  21. #include <map>
  22. #include <set>
  23. #include "nlohmann/json.hpp"
  24. #include "ir/anf.h"
  25. #include "ir/dtype.h"
  26. #include "include/common/utils/utils.h"
  27. #include "ir/tensor.h"
  28. #include "abstract/dshape.h"
  29. #include "utils/log_adapter.h"
  30. #include "runtime/device/executor/dynamic_kernel.h"
  31. #ifdef _MSC_VER
  32. #undef OPAQUE
  33. #endif
  34. namespace mindspore {
  35. enum KernelType : int {
  36. UNKNOWN_KERNEL_TYPE = 0,
  37. AKG_KERNEL,
  38. AICPU_KERNEL,
  39. RT_KERNEL,
  40. HCCL_KERNEL,
  41. TBE_KERNEL,
  42. HOST_KERNEL,
  43. CPU_KERNEL,
  44. GPU_KERNEL,
  45. };
  46. namespace kernel {
  47. // Supported fusion type
  48. enum FusionType {
  49. CONV = 0,
  50. ELEMWISE,
  51. COMMREDUCE,
  52. SEGMENT,
  53. OPAQUE,
  54. BN_UPDATE_GRAD,
  55. BN_GRAD_REDUCE,
  56. LAYER_NORM_GRAD,
  57. L2LOSS_MUL_ADDN,
  58. PURE_BROADCAST,
  59. INPLACE,
  60. MATMUL,
  61. MATMUL_V2,
  62. GEMM,
  63. CONV2D_BACKPROP_INPUT,
  64. CONV2D_BACKPROP_FILTER,
  65. CONV3D_BACKPROP_INPUT,
  66. CONV3D_BACKPROP_FILTER,
  67. CUBE_LAYER_NORM,
  68. BN_REDUCE,
  69. BN_UPDATE,
  70. SOFTMAX_CROSS_ENTROPY_WITH_LOGITS,
  71. L2_NORMALIZE,
  72. SOFTMAX,
  73. L2_LOSS,
  74. ASCEND_QUANT,
  75. ASCEND_DEQUANT,
  76. ASCEND_ANTI_QUANT,
  77. STRIDED_READ,
  78. STRIDED_WRITE,
  79. ASCEND_DEQUANT_S16,
  80. ASCEND_REQUANT,
  81. ASCEND_REQUANT_S16,
  82. MAX_POOL,
  83. DEPTHWISECONV,
  84. CONV3D,
  85. POOL2D,
  86. POOL3D,
  87. READ_SELECT,
  88. WRITE_SELECT,
  89. COSINE_EMBEDDING_LOSS,
  90. DILATION_PATTERN,
  91. BROAD_CAST,
  92. BATCH_MATMUL,
  93. CONFUSION_TRANSPOSE,
  94. DROPOUT_DOMASKV3D,
  95. UNKNOWN_FUSION_TYPE = -1,
  96. };
  97. enum OpPattern {
  98. kCommonPattern = 0,
  99. kFormatAgnosticPattern = 1,
  100. kBroadcastPattern = 2,
  101. kReducePattern = 3,
  102. };
  103. // Backend processor
  104. enum Processor {
  105. UNKNOWN = -1,
  106. AICORE = 0,
  107. AICPU,
  108. CUDA,
  109. CPU,
  110. };
  111. struct FlexArray {
  112. size_t len;
  113. char contents[];
  114. };
  115. struct KernelJsonInfo {
  116. std::string bin_file_name;
  117. std::string bin_file_suffix;
  118. uint32_t block_dim;
  119. std::string kernel_name;
  120. std::string magic;
  121. std::vector<size_t> parameters;
  122. std::string sha256;
  123. std::vector<size_t> workspaces;
  124. bool has_kernel_list = false;
  125. uint32_t op_para_size;
  126. KernelJsonInfo() : block_dim(0), op_para_size(0) {}
  127. };
  128. class KernelPack {
  129. public:
  130. KernelPack() : json_(nullptr), kernel_(nullptr) {}
  131. KernelPack(const KernelPack &) = default;
  132. KernelJsonInfo kernel_json_info() const;
  133. bool LoadKernelMeta(const std::string &json_f);
  134. bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
  135. const FlexArray *GetJson() const { return json_; }
  136. const FlexArray *GetKernel() const { return kernel_; }
  137. ~KernelPack() {
  138. if (json_ != nullptr) {
  139. delete[] json_;
  140. json_ = nullptr;
  141. }
  142. if (kernel_ != nullptr) {
  143. delete[] kernel_;
  144. kernel_ = nullptr;
  145. }
  146. }
  147. private:
  148. bool ReadFromJsonFileHelper(std::ifstream &kernel_bin);
  149. void ParseKernelJson(const nlohmann::json &js);
  150. KernelJsonInfo kernel_json_info_;
  151. FlexArray *json_;
  152. FlexArray *kernel_;
  153. };
  154. using KernelPackPtr = std::shared_ptr<KernelPack>;
  155. /**
  156. * @brief base class for autotensor kernel and cce kernel.
  157. */
  158. struct Address {
  159. Address() : addr(nullptr), size(0) {}
  160. Address(void *address_addr, size_t address_size) : addr(address_addr), size(address_size) {}
  161. void *addr;
  162. size_t size;
  163. };
  164. using AddressPtr = std::shared_ptr<Address>;
  165. using AddressPtrList = std::vector<AddressPtr>;
  166. using StreamType = void *;
  167. // The memory info of kernel launch.
  168. struct KernelLaunchInfo {
  169. AddressPtrList inputs_;
  170. AddressPtrList outputs_;
  171. AddressPtrList workspaces_;
  172. };
  173. class KernelMod {
  174. public:
  175. KernelMod() {}
  176. explicit KernelMod(const AnfNodePtr &anf_node_ptr) : anf_node_(anf_node_ptr) {}
  177. virtual ~KernelMod() = default;
  178. bool LaunchKernel(const KernelLaunchInfo &kernel_launch_address, void *stream_ptr) {
  179. return Launch(kernel_launch_address.inputs_, kernel_launch_address.workspaces_, kernel_launch_address.outputs_,
  180. stream_ptr);
  181. }
  182. virtual void SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
  183. virtual void SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
  184. virtual void SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
  185. virtual const std::vector<size_t> &GetInputSizeList() const { return input_size_list_; }
  186. virtual const std::vector<size_t> &GetOutputSizeList() const { return output_size_list_; }
  187. virtual const std::vector<size_t> &GetWorkspaceSizeList() const { return workspace_size_list_; }
  188. virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
  189. const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0;
  190. virtual device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &, void *) { return nullptr; }
  191. virtual std::vector<size_t> GenParameters() { return {}; }
  192. virtual void ReleaseResource() {}
  193. virtual void InferOp() {}
  194. virtual void InitOp() {}
  195. virtual void UpdateOp() {}
  196. void set_unique_name(const std::string &unique_name) { unique_name_ = unique_name; }
  197. void set_fullname(const std::string &fullname) { fullname_ = fullname; }
  198. void set_is_monad(bool is_monad) { is_monad_ = is_monad; }
  199. void set_inputs_addr(const std::vector<AddressPtr> &addr) { inputs_addr_ = addr; }
  200. void set_workspaces_addr(const std::vector<AddressPtr> &addr) { workspaces_addr_ = addr; }
  201. void set_outputs_addr(const std::vector<AddressPtr> &addr) { outputs_addr_ = addr; }
  202. const std::vector<AddressPtr> &GetInputsAddr() const { return inputs_addr_; }
  203. const std::vector<AddressPtr> &GetWorkSpacesAddr() const { return workspaces_addr_; }
  204. const std::vector<AddressPtr> &GetOutputsAddr() const { return outputs_addr_; }
  205. void set_stream(StreamType stream) { stream_ = stream; }
  206. StreamType stream() const { return stream_; }
  207. void SetAtomicCleanNodes(const std::vector<CNodePtr> &atomic_clean_node);
  208. // set true if need to update output's shape after launch in dynamic_shape, like Unique
  209. virtual bool IsNeedUpdateOp() { return is_need_updateop_; }
  210. protected:
  211. void InferShape();
  212. void GetDepndLists(const CNodePtr &cnode);
  213. void UpdateOutputSizeList();
  214. bool NeedSkipExecute(const CNodePtr &cnode);
  215. std::string kernel_name_;
  216. std::string unique_name_;
  217. std::string fullname_;
  218. bool is_monad_{false};
  219. StreamType stream_{nullptr};
  220. AnfNodeWeakPtr anf_node_;
  221. std::map<uint32_t, tensor::TensorPtr> depend_tensor_map_;
  222. std::vector<CNodeWeakPtr> atomic_clean_nodes_;
  223. std::vector<size_t> input_size_list_;
  224. std::vector<size_t> output_size_list_;
  225. std::vector<size_t> workspace_size_list_;
  226. std::set<uint32_t> depend_list_;
  227. bool is_need_updateop_ = false;
  228. private:
  229. void InferShapeForNopNode(const AnfNodePtr &input_node);
  230. bool InferShapeForDefiniteOutputNode(const CNodePtr &cnode);
  231. std::vector<AddressPtr> inputs_addr_;
  232. std::vector<AddressPtr> workspaces_addr_;
  233. std::vector<AddressPtr> outputs_addr_;
  234. };
  235. using KernelModPtr = std::shared_ptr<KernelMod>;
  236. } // namespace kernel
  237. } // namespace mindspore
  238. #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_