You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

costmodel_context.h 8.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_
  17. #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_
  18. #include <memory>
  19. #include <string>
  20. #include <vector>
  21. #include "utils/log_adapter.h"
  22. #include "utils/ms_context.h"
  23. namespace mindspore {
  24. namespace parallel {
  25. #define OPERATOR_TO_OPERATOR_CONNECTOR "-"
  26. #define DEFAULT_DEVICE_MEMORY_CAPACITY (1024.0 * 1024.0 * 1024.0 * 16.0)
  27. #define DEFAULT_COST_MODEL_ALPHA 1.0
  28. #define DEFAULT_COST_MODEL_BETA_ASCEND 400.0 // for 'device_target = Ascend'
  29. #define DEFAULT_COST_MODEL_BETA_GPU 50.0 // for 'device_target = GPU'
  30. #define DEFAULT_COST_MODEL_GAMMA 0.001
  31. #define DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION true
  32. #define DEFAULT_COST_MODEL_COMMUNI_THRESHOLD 2048.0
  33. #define DEFAULT_COST_MODEL_COMMUNI_CONST 3072.0
  34. #define DEFAULT_COST_MODEL_COMMUNI_BIAS 1024.0
  35. #define DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE false
  36. #define DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE 16
  37. #define DEFAULT_FULLY_USE_DEVICES true
  38. #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false
  39. #define DEFAULT_IS_MULTI_SUBGRAPHS false
  40. #define TRAINING_PHASE 0
  41. #define INFERENCE_PHASE 1
  42. #define DEFAULT_TRIANGLE_STAR_STRATEGY_OVERWRITE true;
  43. #define DEFAULT_DP_ALGO_ENABLE_APPROX false
  44. #define DEFAULT_DP_ALGO_APPROX_EPSILON 0.1
  45. #define DEFAULT_DP_ALGO_SINGLE_LOOP true
  46. class CostModelContext {
  47. public:
  48. ~CostModelContext() = default;
  49. CostModelContext(const CostModelContext &) = delete;
  50. CostModelContext &operator=(const CostModelContext &) = delete;
  51. void ResetCostModel();
  52. void ResetAlgoParameters();
  53. static std::shared_ptr<CostModelContext> GetInstance();
  54. void PrintCostModel();
  55. void set_costmodel_context_for_device(const std::string &);
  56. // DEVICE_MEMORY_CAPACITY
  57. void set_device_memory_capacity(double);
  58. double device_memory_capacity() const { return device_memory_capacity_; }
  59. // COST_MODEL_ALPHA
  60. void set_costmodel_alpha(double);
  61. double costmodel_alpha() const { return costmodel_alpha_; }
  62. // COST_MODEL_BETA
  63. void set_costmodel_beta(double);
  64. double costmodel_beta() const { return costmodel_beta_; }
  65. // COST_MODEL_GAMMA
  66. void set_costmodel_gamma(double);
  67. double costmodel_gamma() const { return costmodel_gamma_; }
  68. // COST_MODEL_SIMPLIFY_CALCULATION
  69. void set_costmodel_simplify_cal(bool);
  70. bool costmodel_simplify_cal() const { return costmodel_simplify_cal_; }
  71. // COST_MODEL_COMMUNI_THRESHOLD
  72. void set_costmodel_communi_threshold(double);
  73. double costmodel_communi_threshold() const { return costmodel_communi_threshold_; }
  74. // COST_MODEL_COMMUNI_CONST
  75. void set_costmodel_communi_const(double);
  76. double costmodel_communi_const() const { return costmodel_communi_const_; }
  77. // COST_MODEL_COMMUNI_BIAS
  78. void set_costmodel_communi_bias(double);
  79. double costmodel_communi_bias() const { return costmodel_communi_bias_; }
  80. void set_multi_subgraphs(bool);
  81. bool is_multi_subgraphs() const { return is_multi_subgraphs_; }
  82. void set_costmodel_allreduce_fusion_algorithm(int64_t);
  83. int64_t costmodel_allreduce_fusion_algorithm() const { return costmodel_allreduce_fusion_algorithm_; }
  84. void set_costmodel_allreduce_fusion_times(int64_t);
  85. int64_t costmodel_allreduce_fusion_times() const { return costmodel_allreduce_fusion_times_; }
  86. void set_costmodel_allreduce_fusion_tail_percent(double);
  87. double costmodel_allreduce_fusion_tail_percent() const { return costmodel_allreduce_fusion_tail_percent_; }
  88. void set_costmodel_allreduce_fusion_tail_time(double);
  89. double costmodel_allreduce_fusion_tail_time() const { return costmodel_allreduce_fusion_tail_time_; }
  90. void set_costmodel_allreduce_fusion_allreduce_inherent_time(double);
  91. double costmodel_allreduce_fusion_allreduce_inherent_time() const {
  92. return costmodel_allreduce_fusion_allreduce_inherent_time_;
  93. }
  94. void set_costmodel_allreduce_fusion_allreduce_bandwidth(double);
  95. double costmodel_allreduce_fusion_allreduce_bandwidth() const {
  96. return costmodel_allreduce_fusion_allreduce_bandwidth_;
  97. }
  98. void set_costmodel_allreduce_fusion_computation_time_parameter(double);
  99. double costmodel_allreduce_fusion_computation_time_parameter() const {
  100. return costmodel_allreduce_fusion_computation_time_parameter_;
  101. }
  102. // TENSOR_SLICE_ALIGNMENT_ENABLE
  103. void set_tensor_slice_alignment_enable(bool);
  104. bool tensor_slice_alignment_enable() const { return tensor_slice_alignment_enable_; }
  105. // TENSOR_SLICE_ALIGNMENT_SIZE
  106. void set_tensor_slice_alignment_size(size_t);
  107. size_t tensor_slice_alignment_size() const { return tensor_slice_alignment_size_; }
  108. // FULLY_USE_DEVICES
  109. void set_fully_use_device(bool);
  110. bool fully_use_device() const { return fully_use_device_; }
  111. // ELEMENTWISE_OP_STRA_FOLLOW
  112. void set_elementwise_stra_follow(bool);
  113. bool elementwise_stra_follow() const { return elementwise_stra_follow_; }
  114. void set_triangle_star_strategy_overwrite(bool);
  115. bool triangle_star_strategy_overwrite() const { return triangle_star_strategy_overwrite_; }
  116. void set_run_phase(int64_t);
  117. int64_t run_phase() const { return run_phase_; }
  118. void set_dp_algo_approxi_epsilon(double);
  119. double dp_algo_approxi_epsilon() const { return dp_algo_approxi_epsilon_; }
  120. void set_dp_algo_enable_approxi(bool);
  121. bool dp_algo_enable_approxi() const { return dp_algo_enable_approxi_; }
  122. void set_dp_algo_single_loop(bool);
  123. bool dp_algo_single_loop() const { return dp_algo_single_loop_; }
  124. private:
  125. CostModelContext();
  126. static std::shared_ptr<CostModelContext> cm_context_inst_;
  127. // DEVICE_MEMORY_CAPACITY
  128. double device_memory_capacity_;
  129. // COST_MODEL_ALPHA
  130. double costmodel_alpha_;
  131. // COST_MODEL_BETA
  132. double costmodel_beta_;
  133. // COST_MODEL_GAMMA
  134. double costmodel_gamma_;
  135. // COST_MODEL_SIMPLIFY_CALCULATION
  136. bool costmodel_simplify_cal_;
  137. // COST_MODEL_COMMUNI_THRESHOLD
  138. double costmodel_communi_threshold_;
  139. // COST_MODEL_COMMUNI_CONST
  140. double costmodel_communi_const_;
  141. // COST_MODEL_COMMUNI_BIAS
  142. double costmodel_communi_bias_;
  143. // MULTI_SUBGRAPHS
  144. bool is_multi_subgraphs_;
  145. // In the recovery phase of DP algorithm, when encountering triangle structure and star structure,
  146. // whether overwrite the right-node strategy
  147. bool triangle_star_strategy_overwrite_;
  148. // Whether to enable APPROXIMATION in the DP algorithm.
  149. bool dp_algo_enable_approxi_;
  150. // When APPROXIMATION is enabled in the DP algorithm, the 'epsilon' value used in the APPROXIMATION.
  151. double dp_algo_approxi_epsilon_;
  152. // Whether to generate a single suite of OperatorInfo for a loop.
  153. bool dp_algo_single_loop_;
  154. int64_t run_phase_; // 0: 'training', 1: 'inference'
  155. int64_t costmodel_allreduce_fusion_algorithm_;
  156. int64_t costmodel_allreduce_fusion_times_;
  157. double costmodel_allreduce_fusion_tail_percent_;
  158. double costmodel_allreduce_fusion_tail_time_;
  159. double costmodel_allreduce_fusion_allreduce_inherent_time_;
  160. double costmodel_allreduce_fusion_allreduce_bandwidth_;
  161. double costmodel_allreduce_fusion_computation_time_parameter_;
  162. // TENSOR_SLICE_ALIGNMENT_ENABLE
  163. bool tensor_slice_alignment_enable_;
  164. // TENSOR_SLICE_ALIGNMENT_SIZE
  165. size_t tensor_slice_alignment_size_;
  166. // FULLY_USE_DEVICES
  167. bool fully_use_device_;
  168. // ELEMENTWISE_OP_STRA_FOLLOW
  169. bool elementwise_stra_follow_;
  170. };
  171. } // namespace parallel
  172. } // namespace mindspore
  173. #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_