You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_auto_parallel_resnet.py 31 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import re
  15. import numpy as np
  16. import mindspore.common.dtype as mstype
  17. import mindspore.nn as nn
  18. import mindspore.ops.functional as F
  19. from mindspore import Tensor
  20. from mindspore import context
  21. from mindspore.common.api import _executor
  22. from mindspore.common.initializer import TruncatedNormal
  23. from mindspore.communication.management import init
  24. from mindspore.nn.loss.loss import _Loss
  25. from mindspore.nn.optim.momentum import Momentum
  26. from mindspore.ops import operations as P
  27. from mindspore.parallel import _cost_model_context as cost_model_context
  28. from mindspore.parallel import set_algo_parameters
  29. from mindspore.parallel._utils import _reset_op_id as resset_op_id
  30. from mindspore.train.model import Model
  31. from mindspore.context import ParallelMode
  32. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
  33. context.set_context(device_id=0)
  34. init()
  35. def weight_variable():
  36. return TruncatedNormal(0.02)
  37. def _conv3x3(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
  38. """Get a conv2d layer with 3x3 kernel size."""
  39. init_value = weight_variable()
  40. return nn.Conv2d(in_channels, out_channels,
  41. kernel_size=3, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
  42. def _conv1x1(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
  43. """Get a conv2d layer with 1x1 kernel size."""
  44. init_value = weight_variable()
  45. return nn.Conv2d(in_channels, out_channels,
  46. kernel_size=1, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
  47. def _conv7x7(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
  48. """Get a conv2d layer with 7x7 kernel size."""
  49. init_value = weight_variable()
  50. return nn.Conv2d(in_channels, out_channels,
  51. kernel_size=7, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
  52. def _fused_bn(channels, momentum=0.9):
  53. """Get a fused batchnorm"""
  54. return nn.BatchNorm2d(channels, momentum=momentum)
  55. class ResidualBlock(nn.Cell):
  56. expansion = 4
  57. def __init__(self,
  58. in_channels,
  59. out_channels,
  60. stride=1,
  61. momentum=0.9):
  62. super(ResidualBlock, self).__init__()
  63. out_chls = out_channels // self.expansion
  64. self.conv1 = _conv1x1(in_channels, out_chls, stride=1)
  65. self.bn1 = _fused_bn(out_chls, momentum=momentum)
  66. self.conv2 = _conv3x3(out_chls, out_chls, stride=stride)
  67. self.bn2 = _fused_bn(out_chls, momentum=momentum)
  68. self.conv3 = _conv1x1(out_chls, out_channels, stride=1)
  69. self.bn3 = _fused_bn(out_channels, momentum=momentum)
  70. self.relu = P.ReLU()
  71. self.downsample = (in_channels != out_channels)
  72. self.stride = stride
  73. if self.downsample:
  74. self.conv_down_sample = _conv1x1(in_channels, out_channels,
  75. stride=stride)
  76. self.bn_down_sample = _fused_bn(out_channels, momentum=momentum)
  77. elif self.stride != 1:
  78. self.maxpool_down = nn.MaxPool2d(kernel_size=1, stride=2, pad_mode='same')
  79. self.add = P.TensorAdd()
  80. def construct(self, x):
  81. identity = x
  82. out = self.conv1(x)
  83. out = self.bn1(out)
  84. out = self.relu(out)
  85. out = self.conv2(out)
  86. out = self.bn2(out)
  87. out = self.relu(out)
  88. out = self.conv3(out)
  89. out = self.bn3(out)
  90. if self.downsample:
  91. identity = self.conv_down_sample(identity)
  92. identity = self.bn_down_sample(identity)
  93. elif self.stride != 1:
  94. identity = self.maxpool_down(identity)
  95. out = self.add(out, identity)
  96. out = self.relu(out)
  97. return out
  98. class ResNet(nn.Cell):
  99. def __init__(self,
  100. block,
  101. layer_nums,
  102. in_channels,
  103. out_channels,
  104. strides=None,
  105. num_classes=100):
  106. super(ResNet, self).__init__()
  107. if strides is None:
  108. strides = [1, 2, 2, 2]
  109. if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
  110. raise ValueError("the length of "
  111. "layer_num, inchannel, outchannel list must be 4!")
  112. self.conv1 = _conv7x7(3, 64, stride=2)
  113. self.bn1 = _fused_bn(64)
  114. self.relu = P.ReLU()
  115. self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='same')
  116. self.layer1 = self._make_layer(block,
  117. layer_nums[0],
  118. in_channel=in_channels[0],
  119. out_channel=out_channels[0],
  120. stride=strides[0])
  121. self.layer2 = self._make_layer(block,
  122. layer_nums[1],
  123. in_channel=in_channels[1],
  124. out_channel=out_channels[1],
  125. stride=strides[1])
  126. self.layer3 = self._make_layer(block,
  127. layer_nums[2],
  128. in_channel=in_channels[2],
  129. out_channel=out_channels[2],
  130. stride=strides[2])
  131. self.layer4 = self._make_layer(block,
  132. layer_nums[3],
  133. in_channel=in_channels[3],
  134. out_channel=out_channels[3],
  135. stride=strides[3])
  136. self.mean = P.ReduceMean(keep_dims=True)
  137. self.end_point = nn.Dense(2048, num_classes, has_bias=True,
  138. weight_init=weight_variable(),
  139. bias_init=weight_variable()).add_flags_recursive(fp16=True)
  140. self.squeeze = P.Squeeze()
  141. self.cast = P.Cast()
  142. def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
  143. layers = []
  144. resblk = block(in_channel, out_channel, stride=1)
  145. layers.append(resblk)
  146. for _ in range(1, layer_num - 1):
  147. resblk = block(out_channel, out_channel, stride=1)
  148. layers.append(resblk)
  149. resblk = block(out_channel, out_channel, stride=stride)
  150. layers.append(resblk)
  151. return nn.SequentialCell(layers)
  152. def construct(self, x):
  153. x = self.conv1(x)
  154. x = self.bn1(x)
  155. x = self.relu(x)
  156. c1 = self.maxpool(x)
  157. c2 = self.layer1(c1)
  158. c3 = self.layer2(c2)
  159. c4 = self.layer3(c3)
  160. c5 = self.layer4(c4)
  161. out = self.mean(c5, (2, 3))
  162. out = self.squeeze(out)
  163. out = self.end_point(out)
  164. return out
  165. def resnet50(class_num=10):
  166. return ResNet(ResidualBlock,
  167. [3, 4, 6, 3],
  168. [64, 256, 512, 1024],
  169. [256, 512, 1024, 2048],
  170. [2, 2, 2, 1],
  171. class_num)
  172. class SoftmaxCrossEntropyExpand(_Loss):
  173. def __init__(self, sparse=False):
  174. super(SoftmaxCrossEntropyExpand, self).__init__()
  175. self.exp = P.Exp()
  176. self.sum = P.ReduceSum(keep_dims=True)
  177. self.onehot = P.OneHot()
  178. self.on_value = Tensor(1.0, mstype.float32)
  179. self.off_value = Tensor(0.0, mstype.float32)
  180. self.div = P.Div()
  181. self.log = P.Log()
  182. self.sum_cross_entropy = P.ReduceSum(keep_dims=False)
  183. self.mul = P.Mul()
  184. self.mul2 = P.Mul()
  185. self.cast = P.Cast()
  186. self.mean = P.ReduceMean(keep_dims=False).add_prim_attr("cross_batch", True)
  187. self.sparse = sparse
  188. self.max = P.ReduceMax(keep_dims=True)
  189. self.sub = P.Sub()
  190. self.cast1 = P.Cast()
  191. def construct(self, logit, label):
  192. logit = self.cast1(logit, mstype.float32)
  193. logit_max = self.max(logit)
  194. exp = self.exp(self.sub(logit, logit_max))
  195. exp_sum = self.sum(exp, -1)
  196. softmax_result = self.div(exp, exp_sum)
  197. if self.sparse:
  198. label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
  199. softmax_result_log = self.log(softmax_result)
  200. loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1)
  201. loss = self.mul2(F.scalar_to_array(-1.0), loss)
  202. loss = self.mean(loss, -1)
  203. return loss
  204. class DatasetLenet():
  205. def __init__(self, predict, label, length=3):
  206. self.predict = predict
  207. self.label = label
  208. self.index = 0
  209. self.length = length
  210. def __iter__(self):
  211. return self
  212. def __next__(self):
  213. if self.index >= self.length:
  214. raise StopIteration
  215. self.index += 1
  216. return self.predict, self.label
  217. def reset(self):
  218. self.index = 0
  219. def get_dataset_size(self):
  220. return 32
  221. def get_repeat_count(self):
  222. return 1
  223. def create_tuple_iterator(self, num_epochs=-1):
  224. return self
  225. def test_train_32k_8p(batch_size=32, num_classes=32768):
  226. dev_num = 8
  227. context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=dev_num)
  228. set_algo_parameters(elementwise_op_strategy_follow=True)
  229. resset_op_id()
  230. np.random.seed(6)
  231. input_np = np.ones([batch_size, 3, 224, 224]).astype(np.float32)
  232. label_np = np.zeros([batch_size]).astype(np.int32)
  233. for i in range(0, batch_size):
  234. label_np[i] = i % num_classes
  235. dataset = DatasetLenet(Tensor(input_np), Tensor(label_np), 1)
  236. net = resnet50(num_classes)
  237. loss = SoftmaxCrossEntropyExpand(sparse=True)
  238. opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9)
  239. model = Model(net, loss_fn=loss, optimizer=opt)
  240. model.train(5, dataset, dataset_sink_mode=False)
  241. strategies = _executor._get_shard_strategy(model._train_network)
  242. for (k, v) in strategies.items():
  243. if re.search('Conv2D-op', k) is not None:
  244. assert v[0][0] == dev_num
  245. elif re.search('MatMul-op', k) is not None:
  246. assert v == [[dev_num, 1], [1, 1]]
  247. elif re.search('ReduceSum-op', k) is not None:
  248. assert v == [[dev_num, 1]]
  249. allreduce_fusion_dict = _executor._get_allreduce_fusion(model._train_network)
  250. print(allreduce_fusion_dict)
  251. return allreduce_fusion_dict
  252. def train_32k_8p_fusion1(batch_size=32, num_classes=32768): # 1048576 #131072 #32768 #8192
  253. cost_model_context.set_cost_model_context(costmodel_gamma=0.001, costmodel_beta=400.0)
  254. cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
  255. cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
  256. cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5)
  257. allreduce_fusion_dict = test_train_32k_8p(batch_size, num_classes)
  258. expect_dict = {'end_point.bias': 2,
  259. 'end_point.weight': 2,
  260. 'layer4.2.bn3.beta': 2,
  261. 'layer4.2.bn3.gamma': 2,
  262. 'layer4.2.conv3.weight': 2,
  263. 'layer4.2.bn2.beta': 2,
  264. 'layer4.2.bn2.gamma': 2,
  265. 'layer4.2.conv2.weight': 2,
  266. 'layer4.2.bn1.beta': 2,
  267. 'layer4.2.bn1.gamma': 2,
  268. 'layer4.2.conv1.weight': 2,
  269. 'layer4.1.bn3.beta': 2,
  270. 'layer4.1.bn3.gamma': 2,
  271. 'layer4.1.conv3.weight': 2,
  272. 'layer4.1.bn2.beta': 2,
  273. 'layer4.1.bn2.gamma': 2,
  274. 'layer4.1.conv2.weight': 2,
  275. 'layer4.1.bn1.beta': 2,
  276. 'layer4.1.bn1.gamma': 2,
  277. 'layer4.1.conv1.weight': 2,
  278. 'layer4.0.bn_down_sample.beta': 2,
  279. 'layer4.0.bn_down_sample.gamma': 2,
  280. 'layer4.0.conv_down_sample.weight': 2,
  281. 'layer4.0.bn3.beta': 2,
  282. 'layer4.0.bn3.gamma': 2,
  283. 'layer4.0.conv3.weight': 2,
  284. 'layer4.0.bn2.beta': 2,
  285. 'layer4.0.bn2.gamma': 2,
  286. 'layer4.0.conv2.weight': 2,
  287. 'layer4.0.bn1.beta': 2,
  288. 'layer4.0.bn1.gamma': 2,
  289. 'layer4.0.conv1.weight': 2,
  290. 'layer3.5.bn3.beta': 2,
  291. 'layer3.5.bn3.gamma': 2,
  292. 'layer3.5.conv3.weight': 2,
  293. 'layer3.5.bn2.beta': 2,
  294. 'layer3.5.bn2.gamma': 2,
  295. 'layer3.5.conv2.weight': 2,
  296. 'layer3.5.bn1.beta': 2,
  297. 'layer3.5.bn1.gamma': 2,
  298. 'layer3.5.conv1.weight': 2,
  299. 'layer3.4.bn3.beta': 2,
  300. 'layer3.4.bn3.gamma': 2,
  301. 'layer3.4.conv3.weight': 2,
  302. 'layer3.4.bn2.beta': 2,
  303. 'layer3.4.bn2.gamma': 2,
  304. 'layer3.4.conv2.weight': 2,
  305. 'layer3.4.bn1.beta': 2,
  306. 'layer3.4.bn1.gamma': 2,
  307. 'layer3.4.conv1.weight': 2,
  308. 'layer3.3.bn3.beta': 2,
  309. 'layer3.3.bn3.gamma': 2,
  310. 'layer3.3.conv3.weight': 2,
  311. 'layer3.3.bn2.beta': 2,
  312. 'layer3.3.bn2.gamma': 2,
  313. 'layer3.3.conv2.weight': 2,
  314. 'layer3.3.bn1.beta': 2,
  315. 'layer3.3.bn1.gamma': 2,
  316. 'layer3.3.conv1.weight': 2,
  317. 'layer3.2.bn3.beta': 2,
  318. 'layer3.2.bn3.gamma': 2,
  319. 'layer3.2.conv3.weight': 2,
  320. 'layer3.2.bn2.beta': 2,
  321. 'layer3.2.bn2.gamma': 2,
  322. 'layer3.2.conv2.weight': 2,
  323. 'layer3.2.bn1.beta': 2,
  324. 'layer3.2.bn1.gamma': 2,
  325. 'layer3.2.conv1.weight': 2,
  326. 'layer3.1.bn3.beta': 2,
  327. 'layer3.1.bn3.gamma': 2,
  328. 'layer3.1.conv3.weight': 2,
  329. 'layer3.1.bn2.beta': 2,
  330. 'layer3.1.bn2.gamma': 2,
  331. 'layer3.1.conv2.weight': 2,
  332. 'layer3.1.bn1.beta': 2,
  333. 'layer3.1.bn1.gamma': 2,
  334. 'layer3.1.conv1.weight': 2,
  335. 'layer3.0.bn_down_sample.beta': 2,
  336. 'layer3.0.bn_down_sample.gamma': 2,
  337. 'layer3.0.conv_down_sample.weight': 2,
  338. 'layer3.0.bn3.beta': 2,
  339. 'layer3.0.bn3.gamma': 2,
  340. 'layer3.0.conv3.weight': 2,
  341. 'layer3.0.bn2.beta': 2,
  342. 'layer3.0.bn2.gamma': 2,
  343. 'layer3.0.conv2.weight': 2,
  344. 'layer3.0.bn1.beta': 2,
  345. 'layer3.0.bn1.gamma': 2,
  346. 'layer3.0.conv1.weight': 2,
  347. 'layer2.3.bn3.beta': 2,
  348. 'layer2.3.bn3.gamma': 2,
  349. 'layer2.3.conv3.weight': 2,
  350. 'layer2.3.bn2.beta': 2,
  351. 'layer2.3.bn2.gamma': 2,
  352. 'layer2.3.conv2.weight': 2,
  353. 'layer2.3.bn1.beta': 2,
  354. 'layer2.3.bn1.gamma': 2,
  355. 'layer2.3.conv1.weight': 2,
  356. 'layer2.2.bn3.beta': 2,
  357. 'layer2.2.bn3.gamma': 2,
  358. 'layer2.2.conv3.weight': 2,
  359. 'layer2.2.bn2.beta': 2,
  360. 'layer2.2.bn2.gamma': 2,
  361. 'layer2.2.conv2.weight': 2,
  362. 'layer2.2.bn1.beta': 2,
  363. 'layer2.2.bn1.gamma': 2,
  364. 'layer2.2.conv1.weight': 2,
  365. 'layer2.1.bn3.beta': 2,
  366. 'layer2.1.bn3.gamma': 2,
  367. 'layer2.1.conv3.weight': 2,
  368. 'layer2.1.bn2.beta': 2,
  369. 'layer2.1.bn2.gamma': 2,
  370. 'layer2.1.conv2.weight': 2,
  371. 'layer2.1.bn1.beta': 2,
  372. 'layer2.1.bn1.gamma': 2,
  373. 'layer2.1.conv1.weight': 2,
  374. 'layer2.0.bn_down_sample.beta': 2,
  375. 'layer2.0.bn_down_sample.gamma': 2,
  376. 'layer2.0.conv_down_sample.weight': 2,
  377. 'layer2.0.bn3.beta': 2,
  378. 'layer2.0.bn3.gamma': 2,
  379. 'layer2.0.conv3.weight': 2,
  380. 'layer2.0.bn2.beta': 2,
  381. 'layer2.0.bn2.gamma': 2,
  382. 'layer2.0.conv2.weight': 2,
  383. 'layer2.0.bn1.beta': 2,
  384. 'layer2.0.bn1.gamma': 2,
  385. 'layer2.0.conv1.weight': 2,
  386. 'layer1.2.bn3.beta': 2,
  387. 'layer1.2.bn3.gamma': 2,
  388. 'layer1.2.conv3.weight': 2,
  389. 'layer1.2.bn2.beta': 2,
  390. 'layer1.2.bn2.gamma': 2,
  391. 'layer1.2.conv2.weight': 2,
  392. 'layer1.2.bn1.beta': 2,
  393. 'layer1.2.bn1.gamma': 2,
  394. 'layer1.2.conv1.weight': 2,
  395. 'layer1.1.bn3.beta': 2,
  396. 'layer1.1.bn3.gamma': 2,
  397. 'layer1.1.conv3.weight': 2,
  398. 'layer1.1.bn2.beta': 2,
  399. 'layer1.1.bn2.gamma': 2,
  400. 'layer1.1.conv2.weight': 2,
  401. 'layer1.1.bn1.beta': 2,
  402. 'layer1.1.bn1.gamma': 2,
  403. 'layer1.1.conv1.weight': 2,
  404. 'layer1.0.bn_down_sample.beta': 2,
  405. 'layer1.0.bn_down_sample.gamma': 2,
  406. 'layer1.0.conv_down_sample.weight': 2,
  407. 'layer1.0.bn3.beta': 2,
  408. 'layer1.0.bn3.gamma': 2,
  409. 'layer1.0.conv3.weight': 2,
  410. 'layer1.0.bn2.beta': 2,
  411. 'layer1.0.bn2.gamma': 2,
  412. 'layer1.0.conv2.weight': 2,
  413. 'layer1.0.bn1.beta': 2,
  414. 'layer1.0.bn1.gamma': 2,
  415. 'layer1.0.conv1.weight': 2,
  416. 'bn1.beta': 1,
  417. 'bn1.gamma': 1,
  418. 'conv1.weight': 1}
  419. assert allreduce_fusion_dict == expect_dict
  420. cost_model_context.reset_cost_model_context()
  421. def train_32k_8p_fusion2(batch_size=32, num_classes=32768): # 1048576 #131072 #32768 #8192
  422. cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=2)
  423. cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_time=0.1)
  424. cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.05)
  425. cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.000001)
  426. cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.0000015)
  427. allreduce_fusion_dict = test_train_32k_8p(batch_size, num_classes)
  428. expect_dict = {'end_point.bias': 2,
  429. 'end_point.weight': 2,
  430. 'layer4.2.bn3.beta': 2,
  431. 'layer4.2.bn3.gamma': 2,
  432. 'layer4.2.conv3.weight': 2,
  433. 'layer4.2.bn2.beta': 2,
  434. 'layer4.2.bn2.gamma': 2,
  435. 'layer4.2.conv2.weight': 2,
  436. 'layer4.2.bn1.beta': 2,
  437. 'layer4.2.bn1.gamma': 2,
  438. 'layer4.2.conv1.weight': 2,
  439. 'layer4.1.bn3.beta': 2,
  440. 'layer4.1.bn3.gamma': 2,
  441. 'layer4.1.conv3.weight': 2,
  442. 'layer4.1.bn2.beta': 2,
  443. 'layer4.1.bn2.gamma': 2,
  444. 'layer4.1.conv2.weight': 2,
  445. 'layer4.1.bn1.beta': 2,
  446. 'layer4.1.bn1.gamma': 2,
  447. 'layer4.1.conv1.weight': 2,
  448. 'layer4.0.bn_down_sample.beta': 2,
  449. 'layer4.0.bn_down_sample.gamma': 2,
  450. 'layer4.0.conv_down_sample.weight': 2,
  451. 'layer4.0.bn3.beta': 2,
  452. 'layer4.0.bn3.gamma': 2,
  453. 'layer4.0.conv3.weight': 2,
  454. 'layer4.0.bn2.beta': 2,
  455. 'layer4.0.bn2.gamma': 2,
  456. 'layer4.0.conv2.weight': 2,
  457. 'layer4.0.bn1.beta': 2,
  458. 'layer4.0.bn1.gamma': 2,
  459. 'layer4.0.conv1.weight': 2,
  460. 'layer3.5.bn3.beta': 2,
  461. 'layer3.5.bn3.gamma': 2,
  462. 'layer3.5.conv3.weight': 2,
  463. 'layer3.5.bn2.beta': 2,
  464. 'layer3.5.bn2.gamma': 2,
  465. 'layer3.5.conv2.weight': 2,
  466. 'layer3.5.bn1.beta': 2,
  467. 'layer3.5.bn1.gamma': 2,
  468. 'layer3.5.conv1.weight': 2,
  469. 'layer3.4.bn3.beta': 2,
  470. 'layer3.4.bn3.gamma': 2,
  471. 'layer3.4.conv3.weight': 2,
  472. 'layer3.4.bn2.beta': 2,
  473. 'layer3.4.bn2.gamma': 2,
  474. 'layer3.4.conv2.weight': 2,
  475. 'layer3.4.bn1.beta': 2,
  476. 'layer3.4.bn1.gamma': 2,
  477. 'layer3.4.conv1.weight': 2,
  478. 'layer3.3.bn3.beta': 2,
  479. 'layer3.3.bn3.gamma': 2,
  480. 'layer3.3.conv3.weight': 2,
  481. 'layer3.3.bn2.beta': 2,
  482. 'layer3.3.bn2.gamma': 2,
  483. 'layer3.3.conv2.weight': 2,
  484. 'layer3.3.bn1.beta': 2,
  485. 'layer3.3.bn1.gamma': 2,
  486. 'layer3.3.conv1.weight': 2,
  487. 'layer3.2.bn3.beta': 2,
  488. 'layer3.2.bn3.gamma': 2,
  489. 'layer3.2.conv3.weight': 2,
  490. 'layer3.2.bn2.beta': 2,
  491. 'layer3.2.bn2.gamma': 2,
  492. 'layer3.2.conv2.weight': 2,
  493. 'layer3.2.bn1.beta': 2,
  494. 'layer3.2.bn1.gamma': 2,
  495. 'layer3.2.conv1.weight': 2,
  496. 'layer3.1.bn3.beta': 2,
  497. 'layer3.1.bn3.gamma': 2,
  498. 'layer3.1.conv3.weight': 2,
  499. 'layer3.1.bn2.beta': 2,
  500. 'layer3.1.bn2.gamma': 2,
  501. 'layer3.1.conv2.weight': 2,
  502. 'layer3.1.bn1.beta': 2,
  503. 'layer3.1.bn1.gamma': 2,
  504. 'layer3.1.conv1.weight': 2,
  505. 'layer3.0.bn_down_sample.beta': 2,
  506. 'layer3.0.bn_down_sample.gamma': 2,
  507. 'layer3.0.conv_down_sample.weight': 2,
  508. 'layer3.0.bn3.beta': 2,
  509. 'layer3.0.bn3.gamma': 2,
  510. 'layer3.0.conv3.weight': 2,
  511. 'layer3.0.bn2.beta': 2,
  512. 'layer3.0.bn2.gamma': 2,
  513. 'layer3.0.conv2.weight': 2,
  514. 'layer3.0.bn1.beta': 2,
  515. 'layer3.0.bn1.gamma': 2,
  516. 'layer3.0.conv1.weight': 2,
  517. 'layer2.3.bn3.beta': 2,
  518. 'layer2.3.bn3.gamma': 2,
  519. 'layer2.3.conv3.weight': 2,
  520. 'layer2.3.bn2.beta': 2,
  521. 'layer2.3.bn2.gamma': 2,
  522. 'layer2.3.conv2.weight': 2,
  523. 'layer2.3.bn1.beta': 2,
  524. 'layer2.3.bn1.gamma': 2,
  525. 'layer2.3.conv1.weight': 2,
  526. 'layer2.2.bn3.beta': 2,
  527. 'layer2.2.bn3.gamma': 2,
  528. 'layer2.2.conv3.weight': 2,
  529. 'layer2.2.bn2.beta': 2,
  530. 'layer2.2.bn2.gamma': 2,
  531. 'layer2.2.conv2.weight': 2,
  532. 'layer2.2.bn1.beta': 2,
  533. 'layer2.2.bn1.gamma': 2,
  534. 'layer2.2.conv1.weight': 2,
  535. 'layer2.1.bn3.beta': 2,
  536. 'layer2.1.bn3.gamma': 2,
  537. 'layer2.1.conv3.weight': 2,
  538. 'layer2.1.bn2.beta': 2,
  539. 'layer2.1.bn2.gamma': 2,
  540. 'layer2.1.conv2.weight': 2,
  541. 'layer2.1.bn1.beta': 2,
  542. 'layer2.1.bn1.gamma': 2,
  543. 'layer2.1.conv1.weight': 2,
  544. 'layer2.0.bn_down_sample.beta': 2,
  545. 'layer2.0.bn_down_sample.gamma': 2,
  546. 'layer2.0.conv_down_sample.weight': 2,
  547. 'layer2.0.bn3.beta': 2,
  548. 'layer2.0.bn3.gamma': 2,
  549. 'layer2.0.conv3.weight': 2,
  550. 'layer2.0.bn2.beta': 2,
  551. 'layer2.0.bn2.gamma': 2,
  552. 'layer2.0.conv2.weight': 2,
  553. 'layer2.0.bn1.beta': 2,
  554. 'layer2.0.bn1.gamma': 2,
  555. 'layer2.0.conv1.weight': 2,
  556. 'layer1.2.bn3.beta': 2,
  557. 'layer1.2.bn3.gamma': 2,
  558. 'layer1.2.conv3.weight': 2,
  559. 'layer1.2.bn2.beta': 2,
  560. 'layer1.2.bn2.gamma': 2,
  561. 'layer1.2.conv2.weight': 2,
  562. 'layer1.2.bn1.beta': 2,
  563. 'layer1.2.bn1.gamma': 2,
  564. 'layer1.2.conv1.weight': 2,
  565. 'layer1.1.bn3.beta': 2,
  566. 'layer1.1.bn3.gamma': 2,
  567. 'layer1.1.conv3.weight': 2,
  568. 'layer1.1.bn2.beta': 2,
  569. 'layer1.1.bn2.gamma': 2,
  570. 'layer1.1.conv2.weight': 2,
  571. 'layer1.1.bn1.beta': 2,
  572. 'layer1.1.bn1.gamma': 2,
  573. 'layer1.1.conv1.weight': 2,
  574. 'layer1.0.bn_down_sample.beta': 2,
  575. 'layer1.0.bn_down_sample.gamma': 2,
  576. 'layer1.0.conv_down_sample.weight': 2,
  577. 'layer1.0.bn3.beta': 2,
  578. 'layer1.0.bn3.gamma': 2,
  579. 'layer1.0.conv3.weight': 2,
  580. 'layer1.0.bn2.beta': 2,
  581. 'layer1.0.bn2.gamma': 2,
  582. 'layer1.0.conv2.weight': 1,
  583. 'layer1.0.bn1.beta': 1,
  584. 'layer1.0.bn1.gamma': 1,
  585. 'layer1.0.conv1.weight': 1,
  586. 'bn1.beta': 1,
  587. 'bn1.gamma': 1,
  588. 'conv1.weight': 1}
  589. assert allreduce_fusion_dict == expect_dict
  590. cost_model_context.reset_cost_model_context()
  591. def test_train_64k_8p(batch_size=32, num_classes=65536): # 1048576 #131072 #32768 #8192
  592. dev_num = 8
  593. context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=dev_num)
  594. cost_model_context.set_cost_model_context(costmodel_gamma=0.001, costmodel_beta=400.0)
  595. set_algo_parameters(elementwise_op_strategy_follow=True)
  596. resset_op_id()
  597. np.random.seed(6)
  598. input_np = np.ones([batch_size, 3, 224, 224]).astype(np.float32)
  599. label_np = np.zeros([batch_size]).astype(np.int32)
  600. for i in range(0, batch_size):
  601. label_np[i] = i % num_classes
  602. dataset = DatasetLenet(Tensor(input_np), Tensor(label_np), 1)
  603. net = resnet50(num_classes)
  604. loss = SoftmaxCrossEntropyExpand(sparse=True)
  605. opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9)
  606. model = Model(net, loss_fn=loss, optimizer=opt)
  607. model.train(5, dataset, dataset_sink_mode=False)
  608. strategies = _executor._get_shard_strategy(model._train_network)
  609. for (k, v) in strategies.items():
  610. if re.search('Conv2D-op', k) is not None:
  611. assert v[0][0] == dev_num
  612. elif re.search('MatMul-op', k) is not None:
  613. assert v == [[1, 1], [dev_num, 1]]
  614. elif re.search('ReduceSum-op', k) is not None:
  615. assert v == [[1, dev_num]]
  616. def test_train_8k_8p_gpu(batch_size=32, num_classes=8192):
  617. dev_num = 8
  618. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  619. context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=dev_num)
  620. set_algo_parameters(elementwise_op_strategy_follow=True)
  621. resset_op_id()
  622. np.random.seed(6)
  623. input_np = np.ones([batch_size, 3, 224, 224]).astype(np.float32)
  624. label_np = np.zeros([batch_size]).astype(np.int32)
  625. for i in range(0, batch_size):
  626. label_np[i] = i % num_classes
  627. dataset = DatasetLenet(Tensor(input_np), Tensor(label_np), 1)
  628. net = resnet50(num_classes)
  629. loss = SoftmaxCrossEntropyExpand(sparse=True)
  630. opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9)
  631. model = Model(net, loss_fn=loss, optimizer=opt)
  632. model.train(5, dataset, dataset_sink_mode=False)
  633. strategies = _executor._get_shard_strategy(model._train_network)
  634. for (k, v) in strategies.items():
  635. if re.search('Conv2D-op', k) is not None:
  636. assert v[0][0] == dev_num
  637. elif re.search('MatMul-op', k) is not None:
  638. assert v == [[1, 1], [dev_num, 1]]
  639. elif re.search('ReduceSum-op', k) is not None:
  640. assert v == [[1, dev_num]]
  641. def test_train_4k_8p_gpu(batch_size=32, num_classes=4096):
  642. dev_num = 8
  643. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  644. context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=dev_num)
  645. set_algo_parameters(elementwise_op_strategy_follow=True)
  646. resset_op_id()
  647. np.random.seed(6)
  648. input_np = np.ones([batch_size, 3, 224, 224]).astype(np.float32)
  649. label_np = np.zeros([batch_size]).astype(np.int32)
  650. for i in range(0, batch_size):
  651. label_np[i] = i % num_classes
  652. dataset = DatasetLenet(Tensor(input_np), Tensor(label_np), 1)
  653. net = resnet50(num_classes)
  654. loss = SoftmaxCrossEntropyExpand(sparse=True)
  655. opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9)
  656. model = Model(net, loss_fn=loss, optimizer=opt)
  657. model.train(5, dataset, dataset_sink_mode=False)
  658. strategies = _executor._get_shard_strategy(model._train_network)
  659. for (k, v) in strategies.items():
  660. if re.search('Conv2D-op', k) is not None:
  661. assert v[0][0] == dev_num
  662. elif re.search('MatMul-op', k) is not None:
  663. assert v == [[dev_num, 1], [1, 1]]
  664. elif re.search('ReduceSum-op', k) is not None:
  665. assert v == [[dev_num, 1]]