| @@ -46,6 +46,7 @@ exit 1 | |||||
| fi | fi | ||||
| ulimit -u unlimited | ulimit -u unlimited | ||||
| export HCCL_CONNECT_TIMEOUT=600 | |||||
| export DEVICE_NUM=8 | export DEVICE_NUM=8 | ||||
| export RANK_SIZE=8 | export RANK_SIZE=8 | ||||
| export RANK_TABLE_FILE=$PATH1 | export RANK_TABLE_FILE=$PATH1 | ||||
| @@ -142,12 +142,12 @@ sh scripts/run_standalone_train_ascend.sh DEVICE_ID DATA_DIR | |||||
| Training result will be stored in the example path. Checkpoints will be stored at `ckpt_path` by default, and training log will be redirected to `./log.txt` like followings. | Training result will be stored in the example path. Checkpoints will be stored at `ckpt_path` by default, and training log will be redirected to `./log.txt` like followings. | ||||
| ```python | ```python | ||||
| epoch: 1 step: 1251, loss is 5.861846 | |||||
| Epoch time: 701416.649, per step time: 560.685 | |||||
| epoch: 2 step: 1251, loss is 4.295785 | |||||
| Epoch time: 472524.154, per step time: 377.717 | |||||
| epoch: 3 step: 1251, loss is 3.691987 | |||||
| Epoch time: 472505.767, per step time: 377.702 | |||||
| epoch: 1 step: 1251, loss is 5.4833196 | |||||
| Epoch time: 520274.060, per step time: 415.887 | |||||
| epoch: 2 step: 1251, loss is 4.093194 | |||||
| Epoch time: 288520.628, per step time: 230.632 | |||||
| epoch: 3 step: 1251, loss is 3.6242008 | |||||
| Epoch time: 288507.506, per step time: 230.622 | |||||
| ``` | ``` | ||||
| ## [Eval process](#contents) | ## [Eval process](#contents) | ||||
| @@ -201,7 +201,7 @@ metric: {'Loss': 0.9849, 'Top1-Acc':0.7985, 'Top5-Acc':0.9460} | |||||
| | Outputs | probability | | | Outputs | probability | | ||||
| | Loss | 0.98486 | | | Loss | 0.98486 | | ||||
| | Accuracy (8p) | ACC1[79.85%] ACC5[94.60%] | | | Accuracy (8p) | ACC1[79.85%] ACC5[94.60%] | | ||||
| | Total time (8p) | 33h | | |||||
| | Total time (8p) | 20h | | |||||
| | Params (M) | 153M | | | Params (M) | 153M | | ||||
| | Checkpoint for Fine tuning | 2135M | | | Checkpoint for Fine tuning | 2135M | | ||||
| | Scripts | [inceptionv4 script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/inceptionv4) | | | Scripts | [inceptionv4 script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/inceptionv4) | | ||||
| @@ -225,11 +225,11 @@ metric: {'Loss': 0.9849, 'Top1-Acc':0.7985, 'Top5-Acc':0.9460} | |||||
| | **Ascend** | train performance | | | **Ascend** | train performance | | ||||
| | :--------: | :---------------: | | | :--------: | :---------------: | | ||||
| | 1p | 345 img/s | | |||||
| | 1p | 556 img/s | | |||||
| | **Ascend** | train performance | | | **Ascend** | train performance | | ||||
| | :--------: | :---------------: | | | :--------: | :---------------: | | ||||
| | 8p | 2708img/s | | |||||
| | 8p | 4430 img/s | | |||||
| # [Description of Random Situation](#contents) | # [Description of Random Situation](#contents) | ||||
| @@ -14,6 +14,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| export HCCL_CONNECT_TIMEOUT=600 | |||||
| export RANK_TABLE_FILE=$1 | export RANK_TABLE_FILE=$1 | ||||
| DATA_DIR=$2 | DATA_DIR=$2 | ||||
| export RANK_SIZE=8 | export RANK_SIZE=8 | ||||
| @@ -41,7 +41,4 @@ config_ascend = edict({ | |||||
| 'lr_max': 0.4, | 'lr_max': 0.4, | ||||
| 'warmup_epochs': 1, | 'warmup_epochs': 1, | ||||
| 'start_epoch': 1, | 'start_epoch': 1, | ||||
| 'onnx_filename': 'inceptionv4.onnx', | |||||
| 'air_filename': 'inceptionv4.air' | |||||
| }) | }) | ||||
| @@ -15,40 +15,15 @@ | |||||
| """InceptionV4""" | """InceptionV4""" | ||||
| import mindspore.nn as nn | import mindspore.nn as nn | ||||
| from mindspore.ops import operations as P | from mindspore.ops import operations as P | ||||
| from mindspore.common.initializer import Initializer | |||||
| class Avginitializer(Initializer): | |||||
| """ | |||||
| Initialize the weight to 1/m*n, (m, n) is the shape of kernel. | |||||
| """ | |||||
| def _initialize(self, arr): | |||||
| arr[:] = 0 | |||||
| for i in range(arr.shape[0]): | |||||
| for j in range(arr.shape[2]): | |||||
| for k in range(arr.shape[3]): | |||||
| arr[i][i][j][k] = 1/(arr.shape[2]*arr.shape[3]) | |||||
| class Avgpool(nn.Cell): | class Avgpool(nn.Cell): | ||||
| """ | |||||
| Average pooling for temporal data. | |||||
| Using a custom initializer to turn conv2d into avgpool2d. The weights won't be trained. | |||||
| """ | |||||
| def __init__(self, channel, kernel_size, stride=1, pad_mode='same'): | |||||
| """Avgpool""" | |||||
| def __init__(self, kernel_size, stride=1, pad_mode='same'): | |||||
| super(Avgpool, self).__init__() | super(Avgpool, self).__init__() | ||||
| self.init = Avginitializer() | |||||
| self.conv = nn.Conv2d(channel, channel, kernel_size, | |||||
| stride=stride, pad_mode=pad_mode, weight_init=self.init) | |||||
| self.conv.set_train(False) | |||||
| self.avg_pool = nn.AvgPool2d(kernel_size=kernel_size, stride=stride, pad_mode=pad_mode) | |||||
| def construct(self, x): | def construct(self, x): | ||||
| x = self.conv(x) | |||||
| x = self.avg_pool(x) | |||||
| return x | return x | ||||
| @@ -141,7 +116,7 @@ class InceptionA(nn.Cell): | |||||
| Conv2d(96, 96, 3, stride=1, pad_mode='pad', padding=1, has_bias=False)]) | Conv2d(96, 96, 3, stride=1, pad_mode='pad', padding=1, has_bias=False)]) | ||||
| self.branch_3 = nn.SequentialCell([ | self.branch_3 = nn.SequentialCell([ | ||||
| Avgpool(384, kernel_size=3, stride=1, pad_mode='same'), | |||||
| Avgpool(kernel_size=3, stride=1, pad_mode='same'), | |||||
| Conv2d(384, 96, 1, stride=1, padding=0, has_bias=False)]) | Conv2d(384, 96, 1, stride=1, padding=0, has_bias=False)]) | ||||
| self.concat = P.Concat(1) | self.concat = P.Concat(1) | ||||
| @@ -178,7 +153,7 @@ class InceptionB(nn.Cell): | |||||
| Conv2d(224, 256, (1, 7), pad_mode='same', stride=1, has_bias=False) | Conv2d(224, 256, (1, 7), pad_mode='same', stride=1, has_bias=False) | ||||
| ]) | ]) | ||||
| self.branch_3 = nn.SequentialCell([ | self.branch_3 = nn.SequentialCell([ | ||||
| Avgpool(in_channels, kernel_size=3, stride=1, pad_mode='same'), | |||||
| Avgpool(kernel_size=3, stride=1, pad_mode='same'), | |||||
| Conv2d(in_channels, 128, 1, stride=1, padding=0, has_bias=False) | Conv2d(in_channels, 128, 1, stride=1, padding=0, has_bias=False) | ||||
| ]) | ]) | ||||
| self.concat = P.Concat(1) | self.concat = P.Concat(1) | ||||
| @@ -265,7 +240,7 @@ class InceptionC(nn.Cell): | |||||
| 512, 256, (3, 1), pad_mode='same', stride=1, has_bias=False) | 512, 256, (3, 1), pad_mode='same', stride=1, has_bias=False) | ||||
| self.branch_3 = nn.SequentialCell([ | self.branch_3 = nn.SequentialCell([ | ||||
| Avgpool(in_channels, kernel_size=3, stride=1, pad_mode='same'), | |||||
| Avgpool(kernel_size=3, stride=1, pad_mode='same'), | |||||
| Conv2d(in_channels, 256, 1, stride=1, padding=0, has_bias=False) | Conv2d(in_channels, 256, 1, stride=1, padding=0, has_bias=False) | ||||
| ]) | ]) | ||||
| self.concat0 = P.Concat(1) | self.concat0 = P.Concat(1) | ||||
| @@ -26,6 +26,7 @@ then | |||||
| exit 1 | exit 1 | ||||
| fi | fi | ||||
| export HCCL_CONNECT_TIMEOUT=600 | |||||
| export RANK_SIZE=8 | export RANK_SIZE=8 | ||||
| for((i=0;i<RANK_SIZE;i++)) | for((i=0;i<RANK_SIZE;i++)) | ||||