Browse Source

fix resnet50_quant run_train bug

tags/v1.1.0
yuchaojie 5 years ago
parent
commit
23b70fb9b4
3 changed files with 10 additions and 121 deletions
  1. +3
    -3
      model_zoo/official/cv/resnet50_quant/README.md
  2. +6
    -117
      model_zoo/official/cv/resnet50_quant/scripts/run_train.sh
  3. +1
    -1
      model_zoo/official/cv/resnet50_quant/train.py

+ 3
- 3
model_zoo/official/cv/resnet50_quant/README.md View File

@@ -13,8 +13,8 @@
- [Evaluation Process](#evaluation-process)
- [Model Description](#model-description)
- [Performance](#performance)
- [Training Performance](#training-performance)
- [Evaluation Performance](#evaluation-performance)
- [Inference Performance](#inference-performance)
- [Description of Random Situation](#description-of-random-situation)
- [ModelZoo Homepage](#modelzoo-homepage)

@@ -74,7 +74,7 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil

```python
├── resnet50_quant
├── Readme.md # descriptions about Resnet50-Quant
├── README.md # descriptions about Resnet50-Quant
├── scripts
│ ├──run_train.sh # shell script for train on Ascend
│ ├──run_infer.sh # shell script for evaluation on Ascend
@@ -193,7 +193,7 @@ result: {'acc': 0.76576314102564111}
| Checkpoint for Fine tuning | 197M (.ckpt file) |
| Scripts | [resnet50-quant script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet50_quant) |

#### Inference Performance
### Inference Performance

| Parameters | Ascend |
| ------------------- | --------------------------- |


+ 6
- 117
model_zoo/official/cv/resnet50_quant/scripts/run_train.sh View File

@@ -22,76 +22,6 @@ get_real_path(){
fi
}

# check_and_get_Ascend_device(){

# #device_list=(${1//,/ })
# IFS=',' read -ra device_list <<<"$1"
# last_device_id=0
# first_device_id=8
# device_used=(0 0 0 0 0 0 0 0)

# for var in "${device_list[@]}"
# do
# if [ $((var)) -lt 0 ] || [ $((var)) -ge 8 ]
# then
# echo "error: device id=${var} is incorrect, device id must be in range [0,8), please check your device id list!"
# exit 1
# fi

# if [ ${device_used[$((var))]} -eq 0 ]
# then
# device_used[ $((var)) ]=1
# else
# echo "error: device id is duplicate, please check your device id list!"
# exit 1
# fi

# if [ ${last_device_id} \< $((var)) ]
# then
# last_device_id=$((var))
# fi
# if [ ${first_device_id} \> $((var)) ]
# then
# first_device_id=$((var))
# fi
# done

# device_num=`expr ${last_device_id} - ${first_device_id} + 1`
# if [ ${device_num} != ${#device_list[*]} ]
# then
# echo "error: the Ascend chips used must be continuous, please check your device id list!"
# exit 1
# fi

# if [ ${first_device_id} -lt 4 ] && [ ${last_device_id} -ge 4 ]
# then
# if [ ${first_device_id} != 0 ] || [ ${last_device_id} != 7 ]
# then
# echo "error: device id list must be in the same group of [0,4) or [4,8) when using Ascend chips."
# exit 1
# fi
# fi

# echo "${first_device_id},`expr ${last_device_id} + 1`"
# }

# get_hccl_name(){

# server_ip=$(ifconfig -a | grep inet | grep -v 127.0.0.1 | grep -v inet6 | awk '{print $2}' | tr -d "addr:")
# device_num=`expr $2 - $1`
# device_id_list=""

# for(( i=$1 ; i < $2 ; i++ ))
# do
# device_id_list=${device_id_list}$i
# done
# hccl_name="hccl_${device_num}p_${device_id_list}_${server_ip}.json"

# echo ${hccl_name}
# }


run_ascend(){

if [ $# != 3 ] && [ $# != 4 ]
@@ -100,47 +30,6 @@ run_ascend(){
exit 1
fi

# first_last_device=$(check_and_get_Ascend_device $2)
# #devices=(${first_last_device//,/ })
# #IFS=',' read -ra devices <<<"${first_last_device}"
# first_device=${first_last_device:0:1}
# last_device=${first_last_device:2:1}
# device_num=`expr $((last_device)) - $((first_device))`

# #single ascend or multiple ascend
# if [ ${device_num} -gt 1 ]
# then
# ori_path=$(dirname "$(readlink -f "$0")")
# #generate hccl config file
# cd ../../../../utils/hccl_tools/ || exit
# device_num_arg="[${first_device},${last_device})"

# python hccl_tools.py --device_num=${device_num_arg}

# hccl_name=$(get_hccl_name ${first_device} ${last_device})

# if [ ! -e ${hccl_name} ]
# then
# echo "error: failed to generate the hccl config file!"
# exit 1
# fi

# mv ${hccl_name} ${ori_path}
# cd ${ori_path} || exit

# PATH1=$(get_real_path ${hccl_name})

# if [ ! -f $PATH1 ]
# then
# echo "error: RANK_TABLE_FILE=$PATH1 is not a file"
# exit 1
# fi

# export RANK_TABLE_FILE=$PATH1
# fi


PATH1=$(get_real_path $2)
PATH2=$(get_real_path $3)

@@ -167,12 +56,12 @@ run_ascend(){
exit 1
fi

rank_file_name=${2##*/}
IFS='_' read -ra array <<<"${rank_file_name}"
device_id_list=${array[2]}
first_device=${device_id_list:0:1}
device_num=${#device_id_list}
cat $2 | grep device_id >temp.log
array=$(cat temp.log | awk -F "[:]" '{print$2}')
IFS=" " read -ra device_list <<<$array
first_device=${device_list[0]:1:1}
device_num=$(cat temp.log | wc -l)
rm temp.log

ulimit -u unlimited
export DEVICE_NUM=${device_num}


+ 1
- 1
model_zoo/official/cv/resnet50_quant/train.py View File

@@ -116,7 +116,7 @@ if __name__ == '__main__':
quantizer = QuantizationAwareTraining(bn_fold=True,
per_channel=[True, False],
symmetric=[True, False])
network = quantizer.quantize(network)
net = quantizer.quantize(net)

# get learning rate
lr = get_lr(lr_init=config.lr_init,


Loading…
Cancel
Save