You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

model.h 16 kB

4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_INCLUDE_API_MODEL_H
  17. #define MINDSPORE_INCLUDE_API_MODEL_H
  18. #include <string>
  19. #include <vector>
  20. #include <map>
  21. #include <memory>
  22. #include <utility>
  23. #include "include/api/status.h"
  24. #include "include/api/types.h"
  25. #include "include/api/graph.h"
  26. #include "include/api/context.h"
  27. #include "include/api/callback/callback.h"
  28. #include "include/api/cell.h"
  29. #include "include/api/cfg.h"
  30. #include "include/api/dual_abi_helper.h"
  31. namespace mindspore {
  32. class ModelImpl;
  33. class Metrics;
  34. namespace dataset {
  35. class Dataset;
  36. } // namespace dataset
  37. /// \brief The Model class is used to define a MindSpore model, facilitating computational graph management.
  38. class MS_API Model {
  39. public:
  40. Model();
  41. ~Model();
  42. Model(const Model &) = delete;
  43. void operator=(const Model &) = delete;
  44. /// \brief Builds a model
  45. ///
  46. /// \param[in] graph GraphCell is a derivative of Cell. Cell is not available currently. GraphCell can be constructed
  47. /// from Graph, for example, model.Build(GraphCell(graph), context).
  48. /// \param[in] model_context A context used to store options during execution.
  49. /// \param[in] train_cfg A config used by training.
  50. ///
  51. /// \return Status.
  52. Status Build(GraphCell graph, const std::shared_ptr<Context> &model_context = nullptr,
  53. const std::shared_ptr<TrainCfg> &train_cfg = nullptr);
  54. /// \brief Builds a Transfer Learning model where the backbone weights are fixed and the head weights are trainable
  55. ///
  56. /// \param[in] backbone The static, non-learnable part of the graph
  57. /// \param[in] head The trainable part of the graph
  58. /// \param[in] context A context used to store options during execution
  59. /// \param[in] cfg A config used by training
  60. ///
  61. /// \return Status
  62. Status BuildTransferLearning(GraphCell backbone, GraphCell head, const std::shared_ptr<Context> &context,
  63. const std::shared_ptr<TrainCfg> &train_cfg = nullptr);
  64. /// \brief Resizes the shapes of inputs.
  65. ///
  66. /// \param[in] inputs A vector that includes all input tensors in order.
  67. /// \param[in] dims Defines the new shapes of inputs, should be consistent with inputs.
  68. ///
  69. /// \return Status.
  70. Status Resize(const std::vector<MSTensor> &inputs, const std::vector<std::vector<int64_t>> &dims);
  71. /// \brief Change the size and or content of weight tensors
  72. ///
  73. /// \param[in] new_weights a vector of tensors with new shapes and data to use in the model
  74. /// If data pointer is null, the data of the original tensors will be copied to the new ones
  75. ///
  76. /// \return Status.
  77. Status UpdateWeights(const std::vector<MSTensor> &new_weights);
  78. /// \brief Inference model.
  79. ///
  80. /// \param[in] inputs A vector where model inputs are arranged in sequence.
  81. /// \param[out] outputs Which is a pointer to a vector. The model outputs are filled in the container in sequence.
  82. /// \param[in] before CallBack before predict.
  83. /// \param[in] after CallBack after predict.
  84. ///
  85. /// \return Status.
  86. Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs,
  87. const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
  88. /// \brief Train model by step.
  89. ///
  90. /// \param[in] before CallBack before predict.
  91. /// \param[in] after CallBack after predict.
  92. ///
  93. /// \return Status.
  94. Status RunStep(const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
  95. /// \brief Inference model with preprocess in model.
  96. ///
  97. /// \param[in] inputs A vector where model inputs are arranged in sequence.
  98. /// \param[out] outputs Which is a pointer to a vector. The model outputs are filled in the container in sequence.
  99. /// \param[in] whether to use data preprocess in model.
  100. /// \param[in] before CallBack before predict.
  101. /// \param[in] after CallBack after predict.
  102. ///
  103. /// \return Status.
  104. Status PredictWithPreprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs,
  105. const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
  106. /// \brief Apply data preprocess if it exits in model.
  107. ///
  108. /// \param[in] inputs A vector where model inputs are arranged in sequence.
  109. /// \param[out] outputs Which is a pointer to a vector. The model outputs are filled in the container in sequence.
  110. ///
  111. /// \return Status.
  112. Status Preprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs);
  113. /// \brief Check if data preprocess exists in model.
  114. /// \return true if data preprocess exists.
  115. bool HasPreprocess();
  116. /// \brief Load config file.
  117. ///
  118. /// \param[in] config_path config file path.
  119. ///
  120. /// \return Status.
  121. inline Status LoadConfig(const std::string &config_path);
  122. /// \brief Update config.
  123. ///
  124. /// \param[in] section define the config section.
  125. /// \param[in] config define the config will be updated.
  126. ///
  127. /// \return Status.
  128. inline Status UpdateConfig(const std::string &section, const std::pair<std::string, std::string> &config);
  129. /// \brief Obtains all input tensors of the model.
  130. ///
  131. /// \return The vector that includes all input tensors.
  132. std::vector<MSTensor> GetInputs();
  133. /// \brief Obtains the input tensor of the model by name.
  134. ///
  135. /// \return The input tensor with the given name, if the name is not found, an invalid tensor is returned.
  136. inline MSTensor GetInputByTensorName(const std::string &tensor_name);
  137. /// \brief Obtains all gradient tensors of the model.
  138. ///
  139. /// \return The vector that includes all gradient tensors.
  140. std::vector<MSTensor> GetGradients() const;
  141. /// \brief update gradient tensors of the model.
  142. ///
  143. /// \param[in] inputs A vector new gradients.
  144. /// \return Status of operation
  145. Status ApplyGradients(const std::vector<MSTensor> &gradients);
  146. /// \brief Obtains all weights tensors of the model.
  147. ///
  148. /// \return The vector that includes all gradient tensors.
  149. std::vector<MSTensor> GetFeatureMaps() const;
  150. /// \brief update weights tensors of the model.
  151. ///
  152. /// \param[in] inputs A vector new weights.
  153. /// \return Status of operation
  154. Status UpdateFeatureMaps(const std::vector<MSTensor> &new_weights);
  155. /// \brief Obtains optimizer params tensors of the model.
  156. ///
  157. /// \return The vector that includes all params tensors.
  158. std::vector<MSTensor> GetOptimizerParams() const;
  159. /// \brief update the optimizer parameters
  160. ///
  161. /// \param[in] inputs A vector new optimizer params.
  162. /// \return Status of operation
  163. Status SetOptimizerParams(const std::vector<MSTensor> &params);
  164. /// \brief Setup training with virtual batches
  165. ///
  166. /// \param[in] virtual_batch_multiplier - virtual batch multiplier, use any number < 1 to disable
  167. /// \param[in] lr - learning rate to use for virtual batch, -1 for internal configuration
  168. /// \param[in] momentum - batch norm momentum to use for virtual batch, -1 for internal configuration
  169. /// \return Status of operation
  170. Status SetupVirtualBatch(int virtual_batch_multiplier, float lr = -1.0f, float momentum = -1.0f);
  171. /// \brief Sets the Learning Rate of the training
  172. ///
  173. /// \param[in] learning_rate to set
  174. /// \return Status of operation
  175. Status SetLearningRate(float learning_rate);
  176. /// \brief Gets the Learning Rate of the optimizer
  177. ///
  178. /// \return learning rate. 0.0 if no optimizer was found
  179. float GetLearningRate();
  180. Status InitMetrics(std::vector<Metrics *> metrics);
  181. std::vector<Metrics *> GetMetrics();
  182. /// \brief Obtains all output tensors of the model.
  183. ///
  184. /// \return The vector that includes all output tensors.
  185. std::vector<MSTensor> GetOutputs();
  186. /// \brief Obtains names of all output tensors of the model.
  187. ///
  188. /// \return A vector that includes names of all output tensors.
  189. inline std::vector<std::string> GetOutputTensorNames();
  190. /// \brief Obtains the output tensor of the model by name.
  191. ///
  192. /// \return The output tensor with the given name, if the name is not found, an invalid tensor is returned.
  193. inline MSTensor GetOutputByTensorName(const std::string &tensor_name);
  194. /// \brief Get output MSTensors of model by node name.
  195. ///
  196. /// \param[in] node_name Define node name.
  197. ///
  198. /// \note Deprecated, replace with GetOutputByTensorName
  199. ///
  200. /// \return The vector of output MSTensor.
  201. inline std::vector<MSTensor> GetOutputsByNodeName(const std::string &node_name);
  202. /// \brief Bind GLTexture2D object to cl Memory.
  203. ///
  204. /// \param[in] inputGlTexture The input GLTexture id for Model.
  205. /// \param[in] outputGLTexture The output GLTexture id for Model.
  206. ///
  207. /// \return Status of operation.
  208. Status BindGLTexture2DMemory(const std::map<std::string, unsigned int> &inputGLTexture,
  209. std::map<std::string, unsigned int> *outputGLTexture);
  210. /// \brief Inference model.
  211. ///
  212. /// \param[in] device_type Device type,options are kGPU, kAscend etc.
  213. /// \param[in] model_type The type of model file, options are ModelType::kMindIR, ModelType::kOM.
  214. ///
  215. /// \return Is supported or not.
  216. static bool CheckModelSupport(enum DeviceType device_type, ModelType model_type);
  217. Status SetTrainMode(bool train);
  218. bool GetTrainMode() const;
  219. Status Train(int epochs, std::shared_ptr<dataset::Dataset> ds, std::vector<TrainCallBack *> cbs);
  220. Status Evaluate(std::shared_ptr<dataset::Dataset> ds, std::vector<TrainCallBack *> cbs);
  221. /// \brief Build a model from model buffer so that it can run on a device. Only valid for Lite.
  222. ///
  223. /// \param[in] model_data Define the buffer read from a model file.
  224. /// \param[in] data_size Define bytes number of model buffer.
  225. /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kOM. Only
  226. /// ModelType::kMindIR is valid for Lite.
  227. /// \param[in] model_context Define the context used to store options during execution.
  228. ///
  229. /// \return Status.
  230. Status Build(const void *model_data, size_t data_size, ModelType model_type,
  231. const std::shared_ptr<Context> &model_context = nullptr);
  232. /// \brief Load and build a model from model buffer so that it can run on a device. Only valid for Lite.
  233. ///
  234. /// \param[in] model_path Define the model path.
  235. /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kOM. Only
  236. /// ModelType::kMindIR is valid for Lite.
  237. /// \param[in] model_context Define the context used to store options during execution.
  238. ///
  239. /// \return Status.
  240. Status Build(const std::string &model_path, ModelType model_type,
  241. const std::shared_ptr<Context> &model_context = nullptr);
  242. /// \brief Build a model from model buffer so that it can run on a device. Only valid for Lite.
  243. ///
  244. /// \param[in] model_data Define the buffer read from a model file.
  245. /// \param[in] data_size Define bytes number of model buffer.
  246. /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kOM. Only
  247. /// ModelType::kMindIR is valid for Lite.
  248. /// \param[in] model_context Define the context used to store options during execution.
  249. /// \param[in] dec_key Define the key used to decrypt the ciphertext model. The key length is 16.
  250. /// \param[in] dec_mode Define the decryption mode. Options: AES-GCM.
  251. /// \param[in] cropto_lib_path Define the openssl library path.
  252. ///
  253. /// \return Status.
  254. Status Build(const void *model_data, size_t data_size, ModelType model_type,
  255. const std::shared_ptr<Context> &model_context, const Key &dec_key, const std::string &dec_mode,
  256. const std::string &cropto_lib_path);
  257. /// \brief Load and build a model from model buffer so that it can run on a device. Only valid for Lite.
  258. ///
  259. /// \param[in] model_path Define the model path.
  260. /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kOM. Only
  261. /// ModelType::kMindIR is valid for Lite.
  262. /// \param[in] model_context Define the context used to store options during execution.
  263. /// \param[in] dec_key Define the key used to decrypt the ciphertext model. The key length is 16.
  264. /// \param[in] dec_mode Define the decryption mode. Options: AES-GCM.
  265. /// \param[in] cropto_lib_path Define the openssl library path.
  266. ///
  267. /// \return Status.
  268. Status Build(const std::string &model_path, ModelType model_type, const std::shared_ptr<Context> &model_context,
  269. const Key &dec_key, const std::string &dec_mode, const std::string &cropto_lib_path);
  270. private:
  271. friend class Serialization;
  272. // api without std::string
  273. MSTensor GetInputByTensorName(const std::vector<char> &tensor_name);
  274. std::vector<std::vector<char>> GetOutputTensorNamesChar();
  275. MSTensor GetOutputByTensorName(const std::vector<char> &tensor_name);
  276. std::vector<MSTensor> GetOutputsByNodeName(const std::vector<char> &node_name);
  277. Status LoadConfig(const std::vector<char> &config_path);
  278. Status UpdateConfig(const std::vector<char> &section, const std::pair<std::vector<char>, std::vector<char>> &config);
  279. Status Build(const std::vector<char> &model_path, ModelType model_type,
  280. const std::shared_ptr<Context> &model_context);
  281. Status Build(const std::vector<char> &model_path, ModelType model_type, const std::shared_ptr<Context> &model_context,
  282. const Key &dec_key, const std::string &dec_mode, const std::vector<char> &cropto_lib_path);
  283. std::shared_ptr<ModelImpl> impl_;
  284. };
  285. MSTensor Model::GetInputByTensorName(const std::string &tensor_name) {
  286. return GetInputByTensorName(StringToChar(tensor_name));
  287. }
  288. std::vector<std::string> Model::GetOutputTensorNames() { return VectorCharToString(GetOutputTensorNamesChar()); }
  289. MSTensor Model::GetOutputByTensorName(const std::string &tensor_name) {
  290. return GetOutputByTensorName(StringToChar(tensor_name));
  291. }
  292. std::vector<MSTensor> Model::GetOutputsByNodeName(const std::string &node_name) {
  293. return GetOutputsByNodeName(StringToChar(node_name));
  294. }
  295. Status Model::LoadConfig(const std::string &config_path) { return LoadConfig(StringToChar(config_path)); }
  296. Status Model::UpdateConfig(const std::string &section, const std::pair<std::string, std::string> &config) {
  297. std::pair<std::vector<char>, std::vector<char>> config_pair = {StringToChar(config.first),
  298. StringToChar(config.second)};
  299. return UpdateConfig(StringToChar(section), config_pair);
  300. }
  301. inline Status Model::Build(const std::string &model_path, ModelType model_type,
  302. const std::shared_ptr<Context> &model_context, const Key &dec_key,
  303. const std::string &dec_mode, const std::string &cropto_lib_path) {
  304. return Build(StringToChar(model_path), model_type, model_context, dec_key, dec_mode, StringToChar(cropto_lib_path));
  305. }
  306. inline Status Model::Build(const std::string &model_path, ModelType model_type,
  307. const std::shared_ptr<Context> &model_context) {
  308. return Build(StringToChar(model_path), model_type, model_context);
  309. }
  310. } // namespace mindspore
  311. #endif // MINDSPORE_INCLUDE_API_MODEL_H