You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

kernel.h 5.8 kB

4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_
  17. #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_
  18. #include <vector>
  19. #include <string>
  20. #include <memory>
  21. #include "nlohmann/json.hpp"
  22. #include "ir/anf.h"
  23. #include "ir/dtype.h"
  24. #include "utils/utils.h"
  25. #include "ir/tensor.h"
  26. #include "abstract/dshape.h"
  27. #include "utils/log_adapter.h"
  28. #include "runtime/device/executor/dynamic_kernel.h"
  29. #ifdef _MSC_VER
  30. #undef OPAQUE
  31. #endif
  32. namespace mindspore {
  33. enum KernelType : int {
  34. UNKNOWN_KERNEL_TYPE = 0,
  35. AKG_KERNEL,
  36. AICPU_KERNEL,
  37. RT_KERNEL,
  38. HCCL_KERNEL,
  39. TBE_KERNEL,
  40. HOST_KERNEL,
  41. CPU_KERNEL,
  42. };
  43. namespace kernel {
  44. // Supported fusion type
  45. enum FusionType {
  46. CONV = 0,
  47. ELEMWISE,
  48. COMMREDUCE,
  49. SEGMENT,
  50. OPAQUE,
  51. BN_UPDATE_GRAD,
  52. BN_GRAD_REDUCE,
  53. LAYER_NORM_GRAD,
  54. L2LOSS_MUL_ADDN,
  55. PURE_BROADCAST,
  56. INPLACE,
  57. MATMUL,
  58. MATMUL_V2,
  59. GEMM,
  60. CONV2D_BACKPROP_INPUT,
  61. CONV2D_BACKPROP_FILTER,
  62. CONV3D_BACKPROP_INPUT,
  63. CONV3D_BACKPROP_FILTER,
  64. CUBE_LAYER_NORM,
  65. BN_REDUCE,
  66. BN_UPDATE,
  67. SOFTMAX_CROSS_ENTROPY_WITH_LOGITS,
  68. L2_NORMALIZE,
  69. SOFTMAX,
  70. L2_LOSS,
  71. ASCEND_QUANT,
  72. ASCEND_DEQUANT,
  73. ASCEND_ANTI_QUANT,
  74. STRIDED_READ,
  75. STRIDED_WRITE,
  76. ASCEND_DEQUANT_S16,
  77. ASCEND_REQUANT,
  78. ASCEND_REQUANT_S16,
  79. MAX_POOL,
  80. DEPTHWISECONV,
  81. CONV3D,
  82. POOL2D,
  83. POOL3D,
  84. READ_SELECT,
  85. WRITE_SELECT,
  86. COSINE_EMBEDDING_LOSS,
  87. DILATION_PATTERN,
  88. BROAD_CAST,
  89. BATCH_MATMUL,
  90. CONFUSION_TRANSPOSE,
  91. UNKNOWN_FUSION_TYPE = -1,
  92. };
  93. enum OpPattern {
  94. kCommonPattern = 0,
  95. kFormatAgnosticPattern = 1,
  96. kBroadcastPattern = 2,
  97. kReducePattern = 3,
  98. };
  99. // Backend processor
  100. enum Processor {
  101. UNKNOWN = -1,
  102. AICORE = 0,
  103. AICPU,
  104. CUDA,
  105. };
  106. struct FlexArray {
  107. size_t len;
  108. char contents[];
  109. };
  110. struct KernelJsonInfo {
  111. std::string bin_file_name;
  112. std::string bin_file_suffix;
  113. uint32_t block_dim;
  114. std::string kernel_name;
  115. std::string magic;
  116. std::vector<size_t> parameters;
  117. std::string sha256;
  118. std::vector<size_t> workspaces;
  119. uint32_t op_para_size;
  120. KernelJsonInfo() : block_dim(0), op_para_size(0) {}
  121. };
  122. class KernelPack {
  123. public:
  124. KernelPack() : json_(nullptr), kernel_(nullptr) {}
  125. KernelPack(const KernelPack &) = default;
  126. KernelJsonInfo kernel_json_info() const;
  127. bool LoadKernelMeta(const std::string &json_f);
  128. bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
  129. const FlexArray *GetJson() const { return json_; }
  130. const FlexArray *GetKernel() const { return kernel_; }
  131. ~KernelPack() {
  132. if (json_ != nullptr) {
  133. delete[] json_;
  134. json_ = nullptr;
  135. }
  136. if (kernel_ != nullptr) {
  137. delete[] kernel_;
  138. kernel_ = nullptr;
  139. }
  140. }
  141. private:
  142. bool ReadFromJsonFileHelper(std::ifstream &kernel_bin);
  143. void ParseKernelJson(const nlohmann::json &js);
  144. KernelJsonInfo kernel_json_info_;
  145. FlexArray *json_;
  146. FlexArray *kernel_;
  147. };
  148. using KernelPackPtr = std::shared_ptr<KernelPack>;
  149. /**
  150. * @brief base class for autotensor kernel and cce kernel.
  151. */
  152. struct Address {
  153. Address() : addr(nullptr), size(0) {}
  154. Address(void *address_addr, size_t address_size) : addr(address_addr), size(address_size) {}
  155. void *addr;
  156. size_t size;
  157. };
  158. using AddressPtr = std::shared_ptr<Address>;
  159. // The memory info of kernel launch.
  160. struct KernelLaunchInfo {
  161. std::vector<AddressPtr> inputs_;
  162. std::vector<AddressPtr> outputs_;
  163. std::vector<AddressPtr> workspaces_;
  164. };
  165. class KernelMod {
  166. public:
  167. virtual const std::vector<size_t> &GetInputSizeList() const = 0;
  168. virtual const std::vector<size_t> &GetOutputSizeList() const = 0;
  169. virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0;
  170. virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
  171. const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0;
  172. virtual device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) { return nullptr; }
  173. virtual std::vector<size_t> GenParameters() { return {}; }
  174. virtual void ReleaseResource() {}
  175. virtual ~KernelMod() = default;
  176. void set_unique_name(const std::string &unique_name) { unique_name_ = unique_name; }
  177. void set_fullname(const std::string &fullname) { fullname_ = fullname; }
  178. void set_is_monad(bool is_monad) { is_monad_ = is_monad; }
  179. void set_inputs_addr(const std::vector<AddressPtr> &addr) { inputs_addr_ = addr; }
  180. void set_workspaces_addr(const std::vector<AddressPtr> &addr) { workspaces_addr_ = addr; }
  181. void set_outputs_addr(const std::vector<AddressPtr> &addr) { outputs_addr_ = addr; }
  182. const std::vector<AddressPtr> &GetInputsAddr() { return inputs_addr_; }
  183. const std::vector<AddressPtr> &GetWorkSpacesAddr() { return workspaces_addr_; }
  184. const std::vector<AddressPtr> &GetOutputsAddr() { return outputs_addr_; }
  185. protected:
  186. std::string unique_name_;
  187. std::string fullname_;
  188. bool is_monad_{false};
  189. private:
  190. std::vector<AddressPtr> inputs_addr_;
  191. std::vector<AddressPtr> workspaces_addr_;
  192. std::vector<AddressPtr> outputs_addr_;
  193. };
  194. using KernelModPtr = std::shared_ptr<KernelMod>;
  195. } // namespace kernel
  196. } // namespace mindspore
  197. #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_