Browse Source

Improve InceptionV4 network performance.

tags/v1.1.0
linqingke 5 years ago
parent
commit
2cd3e01503
6 changed files with 19 additions and 44 deletions
  1. +1
    -0
      model_zoo/official/cv/faster_rcnn/scripts/run_distribute_train_ascend.sh
  2. +9
    -9
      model_zoo/official/cv/inceptionv4/README.md
  3. +1
    -0
      model_zoo/official/cv/inceptionv4/scripts/run_distribute_train_ascend.sh
  4. +0
    -3
      model_zoo/official/cv/inceptionv4/src/config.py
  5. +7
    -32
      model_zoo/official/cv/inceptionv4/src/inceptionv4.py
  6. +1
    -0
      model_zoo/official/cv/unet/scripts/run_distribute_train.sh

+ 1
- 0
model_zoo/official/cv/faster_rcnn/scripts/run_distribute_train_ascend.sh View File

@@ -46,6 +46,7 @@ exit 1
fi

ulimit -u unlimited
export HCCL_CONNECT_TIMEOUT=600
export DEVICE_NUM=8
export RANK_SIZE=8
export RANK_TABLE_FILE=$PATH1


+ 9
- 9
model_zoo/official/cv/inceptionv4/README.md View File

@@ -142,12 +142,12 @@ sh scripts/run_standalone_train_ascend.sh DEVICE_ID DATA_DIR
Training result will be stored in the example path. Checkpoints will be stored at `ckpt_path` by default, and training log will be redirected to `./log.txt` like followings.

```python
epoch: 1 step: 1251, loss is 5.861846
Epoch time: 701416.649, per step time: 560.685
epoch: 2 step: 1251, loss is 4.295785
Epoch time: 472524.154, per step time: 377.717
epoch: 3 step: 1251, loss is 3.691987
Epoch time: 472505.767, per step time: 377.702
epoch: 1 step: 1251, loss is 5.4833196
Epoch time: 520274.060, per step time: 415.887
epoch: 2 step: 1251, loss is 4.093194
Epoch time: 288520.628, per step time: 230.632
epoch: 3 step: 1251, loss is 3.6242008
Epoch time: 288507.506, per step time: 230.622
```

## [Eval process](#contents)
@@ -201,7 +201,7 @@ metric: {'Loss': 0.9849, 'Top1-Acc':0.7985, 'Top5-Acc':0.9460}
| Outputs | probability |
| Loss | 0.98486 |
| Accuracy (8p) | ACC1[79.85%] ACC5[94.60%] |
| Total time (8p) | 33h |
| Total time (8p) | 20h |
| Params (M) | 153M |
| Checkpoint for Fine tuning | 2135M |
| Scripts | [inceptionv4 script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/inceptionv4) |
@@ -225,11 +225,11 @@ metric: {'Loss': 0.9849, 'Top1-Acc':0.7985, 'Top5-Acc':0.9460}

| **Ascend** | train performance |
| :--------: | :---------------: |
| 1p | 345 img/s |
| 1p | 556 img/s |

| **Ascend** | train performance |
| :--------: | :---------------: |
| 8p | 2708img/s |
| 8p | 4430 img/s |

# [Description of Random Situation](#contents)



+ 1
- 0
model_zoo/official/cv/inceptionv4/scripts/run_distribute_train_ascend.sh View File

@@ -14,6 +14,7 @@
# limitations under the License.
# ============================================================================

export HCCL_CONNECT_TIMEOUT=600
export RANK_TABLE_FILE=$1
DATA_DIR=$2
export RANK_SIZE=8


+ 0
- 3
model_zoo/official/cv/inceptionv4/src/config.py View File

@@ -41,7 +41,4 @@ config_ascend = edict({
'lr_max': 0.4,
'warmup_epochs': 1,
'start_epoch': 1,

'onnx_filename': 'inceptionv4.onnx',
'air_filename': 'inceptionv4.air'
})

+ 7
- 32
model_zoo/official/cv/inceptionv4/src/inceptionv4.py View File

@@ -15,40 +15,15 @@
"""InceptionV4"""
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.common.initializer import Initializer



class Avginitializer(Initializer):
"""
Initialize the weight to 1/m*n, (m, n) is the shape of kernel.
"""


def _initialize(self, arr):
arr[:] = 0
for i in range(arr.shape[0]):
for j in range(arr.shape[2]):
for k in range(arr.shape[3]):
arr[i][i][j][k] = 1/(arr.shape[2]*arr.shape[3])


class Avgpool(nn.Cell):
"""
Average pooling for temporal data.

Using a custom initializer to turn conv2d into avgpool2d. The weights won't be trained.

"""
def __init__(self, channel, kernel_size, stride=1, pad_mode='same'):
"""Avgpool"""
def __init__(self, kernel_size, stride=1, pad_mode='same'):
super(Avgpool, self).__init__()
self.init = Avginitializer()
self.conv = nn.Conv2d(channel, channel, kernel_size,
stride=stride, pad_mode=pad_mode, weight_init=self.init)
self.conv.set_train(False)
self.avg_pool = nn.AvgPool2d(kernel_size=kernel_size, stride=stride, pad_mode=pad_mode)

def construct(self, x):
x = self.conv(x)
x = self.avg_pool(x)
return x


@@ -141,7 +116,7 @@ class InceptionA(nn.Cell):
Conv2d(96, 96, 3, stride=1, pad_mode='pad', padding=1, has_bias=False)])

self.branch_3 = nn.SequentialCell([
Avgpool(384, kernel_size=3, stride=1, pad_mode='same'),
Avgpool(kernel_size=3, stride=1, pad_mode='same'),
Conv2d(384, 96, 1, stride=1, padding=0, has_bias=False)])

self.concat = P.Concat(1)
@@ -178,7 +153,7 @@ class InceptionB(nn.Cell):
Conv2d(224, 256, (1, 7), pad_mode='same', stride=1, has_bias=False)
])
self.branch_3 = nn.SequentialCell([
Avgpool(in_channels, kernel_size=3, stride=1, pad_mode='same'),
Avgpool(kernel_size=3, stride=1, pad_mode='same'),
Conv2d(in_channels, 128, 1, stride=1, padding=0, has_bias=False)
])
self.concat = P.Concat(1)
@@ -265,7 +240,7 @@ class InceptionC(nn.Cell):
512, 256, (3, 1), pad_mode='same', stride=1, has_bias=False)

self.branch_3 = nn.SequentialCell([
Avgpool(in_channels, kernel_size=3, stride=1, pad_mode='same'),
Avgpool(kernel_size=3, stride=1, pad_mode='same'),
Conv2d(in_channels, 256, 1, stride=1, padding=0, has_bias=False)
])
self.concat0 = P.Concat(1)


+ 1
- 0
model_zoo/official/cv/unet/scripts/run_distribute_train.sh View File

@@ -26,6 +26,7 @@ then
exit 1
fi

export HCCL_CONNECT_TIMEOUT=600
export RANK_SIZE=8

for((i=0;i<RANK_SIZE;i++))


Loading…
Cancel
Save