You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

hccl_multi_machine_multi_rank.json 5.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. {
  2. "board_id": "0x0000",
  3. "chip_info": "910",
  4. "deploy_mode": "lab",
  5. "group_count": "1",
  6. "group_list": [{
  7. "device_num": "16",
  8. "server_num": "2",
  9. "group_name": "",
  10. "instance_count": "16",
  11. "instance_list": [{
  12. "devices": [{
  13. "device_id": "0",
  14. "device_ip": "[A_device_ip_0]"
  15. }],
  16. "rank_id": "0",
  17. "server_id": "[server_id_A]"
  18. },
  19. {
  20. "devices": [{
  21. "device_id": "1",
  22. "device_ip": "[A_device_ip_1]"
  23. }],
  24. "rank_id": "1",
  25. "server_id": "[server_id_A]"
  26. },
  27. {
  28. "devices": [{
  29. "device_id": "2",
  30. "device_ip": "[A_device_ip_2]"
  31. }],
  32. "rank_id": "2",
  33. "server_id": "[server_id_A]"
  34. },
  35. {
  36. "devices": [{
  37. "device_id": "3",
  38. "device_ip": "[A_device_ip_3]"
  39. }],
  40. "rank_id": "3",
  41. "server_id": "[server_id_A]"
  42. },
  43. {
  44. "devices": [{
  45. "device_id": "4",
  46. "device_ip": "[A_device_ip_4]"
  47. }],
  48. "rank_id": "4",
  49. "server_id": "[server_id_A]"
  50. },
  51. {
  52. "devices": [{
  53. "device_id": "5",
  54. "device_ip": "[A_device_ip_5]"
  55. }],
  56. "rank_id": "5",
  57. "server_id": "[server_id_A]"
  58. },
  59. {
  60. "devices": [{
  61. "device_id": "6",
  62. "device_ip": "[A_device_ip_6]"
  63. }],
  64. "rank_id": "6",
  65. "server_id": "[server_id_A]"
  66. },
  67. {
  68. "devices": [{
  69. "device_id": "7",
  70. "device_ip": "[A_device_ip_7]"
  71. }],
  72. "rank_id": "7",
  73. "server_id": "[server_id_A]"
  74. },
  75. {
  76. "devices": [{
  77. "device_id": "0",
  78. "device_ip": "[B_device_ip_0]"
  79. }],
  80. "rank_id": "8",
  81. "server_id": "[server_id_B]"
  82. },
  83. {
  84. "devices": [{
  85. "device_id": "1",
  86. "device_ip": "[B_device_ip_1]"
  87. }],
  88. "rank_id": "9",
  89. "server_id": "[server_id_B]"
  90. },
  91. {
  92. "devices": [{
  93. "device_id": "2",
  94. "device_ip": "[B_device_ip_2]"
  95. }],
  96. "rank_id": "10",
  97. "server_id": "[server_id_B]"
  98. },
  99. {
  100. "devices": [{
  101. "device_id": "3",
  102. "device_ip": "[B_device_ip_3]"
  103. }],
  104. "rank_id": "11",
  105. "server_id": "[server_id_B]"
  106. },
  107. {
  108. "devices": [{
  109. "device_id": "4",
  110. "device_ip": "[B_device_ip_4]"
  111. }],
  112. "rank_id": "12",
  113. "server_id": "[server_id_B]"
  114. },
  115. {
  116. "devices": [{
  117. "device_id": "5",
  118. "device_ip": "[B_device_ip_5]"
  119. }],
  120. "rank_id": "13",
  121. "server_id": "[server_id_B]"
  122. },
  123. {
  124. "devices": [{
  125. "device_id": "6",
  126. "device_ip": "[B_device_ip_6]"
  127. }],
  128. "rank_id": "14",
  129. "server_id": "[server_id_B]"
  130. },
  131. {
  132. "devices": [{
  133. "device_id": "7",
  134. "device_ip": "[B_device_ip_7]"
  135. }],
  136. "rank_id": "15",
  137. "server_id": "[server_id_B]"
  138. }
  139. ]
  140. }],
  141. "para_plane_nic_location": "device",
  142. "para_plane_nic_name": [
  143. "eth0",
  144. "eth1",
  145. "eth2",
  146. "eth3",
  147. "eth4",
  148. "eth5",
  149. "eth6",
  150. "eth7"
  151. ],
  152. "para_plane_nic_num": "8",
  153. "status": "completed",
  154. "hccl_config_json_spec": {
  155. "board_id": "board id, current support x0000 or 0x3000",
  156. "chip_info": "chip info, current is 910",
  157. "deploy_mode": "current use lab",
  158. "group_count": "number of groups used",
  159. "group_list": "detailed group information",
  160. "device_num": "number of devices used, the value is the nth power of 2",
  161. "server_num": "number of multiple machines, single machine is 1",
  162. "group_name": "default is hccl_world_group or specified",
  163. "instance_count": "number of instance used, generally equal to device_num",
  164. "instance_list": "detailed instance information",
  165. "device_id": "designated davinic device id to use, values start from 0, but no more than single machine total device num.if server_num greater than 1, the id can be restart from 0",
  166. "device_ip": "ip corresponding to device_id",
  167. "rank_id": "the first device must be 0 and then increase in order",
  168. "server_id": "can be specified as the machine's ip address",
  169. "para_plane_nic_location": "current use device",
  170. "para_plane_nic_name": "network card corresponding to device ip",
  171. "para_plane_nic_num": "number of network cards used",
  172. "status": "current use completed"
  173. }
  174. }